X-Git-Url: http://wien.tomnetworks.com/gitweb/?a=blobdiff_plain;f=src%2Fvm%2Futf8.c;h=fe2099d0298474e6e595b6fef67a2c0e840e82ca;hb=083e777c06a7c98e62fe3170125975b17459260a;hp=eb7fb5131b4a0b2b10591206e2b36f11813f66b4;hpb=c5fa1c7dccde6effeac76c0b9388d3bb9a980ded;p=cacao.git

diff --git a/src/vm/utf8.c b/src/vm/utf8.c
index eb7fb5131..fe2099d02 100644
--- a/src/vm/utf8.c
+++ b/src/vm/utf8.c
@@ -1,9 +1,7 @@
-/* src/vm/utf.c - utf functions
+/* src/vm/utf8.c - utf8 string functions
 
-   Copyright (C) 1996-2005 R. Grafl, A. Krall, C. Kruegel, C. Oates,
-   R. Obermaisser, M. Platter, M. Probst, S. Ring, E. Steiner,
-   C. Thalinger, D. Thuernbeck, P. Tomsich, C. Ullrich, J. Wenninger,
-   Institut f. Computersprachen - TU Wien
+   Copyright (C) 1996-2005, 2006, 2007, 2008
+   CACAOVM - Verein zur Foerderung der freien virtuellen Maschine CACAO
 
    This file is part of CACAO.
 
@@ -19,54 +17,106 @@
 
    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
-   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
-   02111-1307, USA.
+   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+   02110-1301, USA.
 
-   Contact: cacao@complang.tuwien.ac.at
+*/
 
-   Authors: Reinhard Grafl
 
-   Changes: Mark Probst
-            Andreas Krall
-            Christian Thalinger
+#include "config.h"
 
-   $Id: utf8.c 2148 2005-03-30 16:49:40Z twisti $
+#include <string.h>
+#include <assert.h>
 
-*/
+#include "vm/types.h"
 
+#include "mm/memory.hpp"
 
-#include <string.h>
+#include "threads/mutex.hpp"
+
+#include "toolbox/hashtable.h"
 
-#include "mm/memory.h"
-#include "vm/exceptions.h"
+#include "vm/exceptions.hpp"
 #include "vm/options.h"
-#include "vm/statistics.h"
-#include "vm/tables.h"
+
+#if defined(ENABLE_STATISTICS)
+# include "vm/statistics.h"
+#endif
+
 #include "vm/utf8.h"
 
 
-hashtable utf_hash;                     /* hashtable for utf8-symbols         */
+/* global variables ***********************************************************/
+
+/* hashsize must be power of 2 */
+
+#define HASHTABLE_UTF_SIZE    16384     /* initial size of utf-hash           */
+
+hashtable *hashtable_utf;               /* hashtable for utf8-symbols         */
 
 
 /* utf-symbols for pointer comparison of frequently used strings **************/
 
-utf *utf_java_lang_Object;              /* java/lang/Object                   */
+utf *utf_java_lang_Object;
 
 utf *utf_java_lang_Class;
 utf *utf_java_lang_ClassLoader;
+utf *utf_java_lang_ClassLoader_NativeLibrary;
 utf *utf_java_lang_Cloneable;
 utf *utf_java_lang_SecurityManager;
 utf *utf_java_lang_String;
-utf *utf_java_lang_System;
+utf *utf_java_lang_ThreadGroup;
+utf *utf_java_lang_ref_SoftReference;
+utf *utf_java_lang_ref_WeakReference;
+utf *utf_java_lang_ref_PhantomReference;
 utf *utf_java_io_Serializable;
 
 utf *utf_java_lang_Throwable;
-utf *utf_java_lang_VMThrowable;
-utf *utf_java_lang_Exception;
 utf *utf_java_lang_Error;
+
+utf *utf_java_lang_AbstractMethodError;
+utf *utf_java_lang_ClassCircularityError;
+utf *utf_java_lang_ClassFormatError;
+utf *utf_java_lang_ExceptionInInitializerError;
+utf *utf_java_lang_IncompatibleClassChangeError;
+utf *utf_java_lang_InstantiationError;
+utf *utf_java_lang_InternalError;
+utf *utf_java_lang_LinkageError;
+utf *utf_java_lang_NoClassDefFoundError;
+utf *utf_java_lang_NoSuchFieldError;
+utf *utf_java_lang_NoSuchMethodError;
 utf *utf_java_lang_OutOfMemoryError;
+utf *utf_java_lang_UnsatisfiedLinkError;
+utf *utf_java_lang_UnsupportedClassVersionError;
+utf *utf_java_lang_VerifyError;
+utf *utf_java_lang_VirtualMachineError;
+
+utf *utf_java_lang_Exception;
 
+utf *utf_java_lang_ArithmeticException;
+utf *utf_java_lang_ArrayIndexOutOfBoundsException;
+utf *utf_java_lang_ArrayStoreException;
+utf *utf_java_lang_ClassCastException;
+utf *utf_java_lang_ClassNotFoundException;
+utf *utf_java_lang_CloneNotSupportedException;
+utf *utf_java_lang_IllegalAccessException;
+utf *utf_java_lang_IllegalArgumentException;
+utf *utf_java_lang_IllegalMonitorStateException;
+utf *utf_java_lang_InstantiationException;
+utf *utf_java_lang_InterruptedException;
+utf *utf_java_lang_NegativeArraySizeException;
+utf *utf_java_lang_NullPointerException;
+utf *utf_java_lang_RuntimeException;
+utf *utf_java_lang_StringIndexOutOfBoundsException;
+
+utf *utf_java_lang_reflect_InvocationTargetException;
+
+utf *utf_java_security_PrivilegedActionException;
+
+#if defined(ENABLE_JAVASE)
 utf* utf_java_lang_Void;
+#endif
+
 utf* utf_java_lang_Boolean;
 utf* utf_java_lang_Byte;
 utf* utf_java_lang_Character;
@@ -76,7 +126,20 @@ utf* utf_java_lang_Long;
 utf* utf_java_lang_Float;
 utf* utf_java_lang_Double;
 
+#if defined(ENABLE_JAVASE)
+utf *utf_java_lang_StackTraceElement;
+utf *utf_java_lang_reflect_Constructor;
+utf *utf_java_lang_reflect_Field;
+utf *utf_java_lang_reflect_Method;
+
+# if defined(WITH_JAVA_RUNTIME_LIBRARY_GNU_CLASSPATH)
+utf *utf_java_lang_reflect_VMConstructor;
+utf *utf_java_lang_reflect_VMField;
+utf *utf_java_lang_reflect_VMMethod;
+# endif
+
 utf *utf_java_util_Vector;
+#endif
 
 utf *utf_InnerClasses;                  /* InnerClasses                       */
 utf *utf_ConstantValue;                 /* ConstantValue                      */
@@ -85,13 +148,60 @@ utf *utf_Exceptions;                    /* Exceptions                         */
 utf *utf_LineNumberTable;               /* LineNumberTable                    */
 utf *utf_SourceFile;                    /* SourceFile                         */
 
+#if defined(ENABLE_JAVASE)
+utf *utf_EnclosingMethod;
+utf *utf_Signature;
+utf *utf_StackMapTable;
+
+# if defined(ENABLE_JVMTI)
+utf *utf_LocalVariableTable;
+# endif
+
+# if defined(ENABLE_ANNOTATIONS)
+utf *utf_RuntimeVisibleAnnotations;            /* RuntimeVisibleAnnotations            */
+utf *utf_RuntimeInvisibleAnnotations;          /* RuntimeInvisibleAnnotations          */
+utf *utf_RuntimeVisibleParameterAnnotations;   /* RuntimeVisibleParameterAnnotations   */
+utf *utf_RuntimeInvisibleParameterAnnotations; /* RuntimeInvisibleParameterAnnotations */
+utf *utf_AnnotationDefault;                    /* AnnotationDefault                    */
+# endif
+#endif
+
 utf *utf_init;                          /* <init>                             */
 utf *utf_clinit;                        /* <clinit>                           */
+utf *utf_clone;                         /* clone                              */
 utf *utf_finalize;                      /* finalize                           */
+utf *utf_invoke;
+utf *utf_main;
+utf *utf_run;                           /* run                                */
+
+utf *utf_add;
+utf *utf_dispatch;
+utf *utf_remove;
+utf *utf_addThread;
+utf *utf_removeThread;
+utf *utf_put;
+utf *utf_get;
+utf *utf_uncaughtException;
+utf *utf_value;
 
-utf *utf_printStackTrace;
 utf *utf_fillInStackTrace;
+utf *utf_findNative;
+utf *utf_getSystemClassLoader;
+utf *utf_initCause;
 utf *utf_loadClass;
+utf *utf_loadClassInternal;
+utf *utf_printStackTrace;
+
+utf *utf_division_by_zero;
+
+utf *utf_Z;                             /* Z                                  */
+utf *utf_B;                             /* B                                  */
+utf *utf_C;                             /* C                                  */
+utf *utf_S;                             /* S                                  */
+utf *utf_I;                             /* I                                  */
+utf *utf_J;                             /* J                                  */
+utf *utf_F;                             /* F                                  */
+utf *utf_D;                             /* D                                  */
 
 utf *utf_void__void;                    /* ()V                                */
 utf *utf_boolean__void;                 /* (Z)V                               */
@@ -102,12 +212,23 @@ utf *utf_int__void;                     /* (I)V                               */
 utf *utf_long__void;                    /* (J)V                               */
 utf *utf_float__void;                   /* (F)V                               */
 utf *utf_double__void;                  /* (D)V                               */
+
+utf *utf_void__java_lang_ClassLoader;   /* ()Ljava/lang/ClassLoader;          */
 utf *utf_void__java_lang_Object;        /* ()Ljava/lang/Object;               */
 utf *utf_void__java_lang_Throwable;     /* ()Ljava/lang/Throwable;            */
+utf *utf_java_lang_ClassLoader_java_lang_String__J;
+utf *utf_java_lang_Exception__V;        /* (Ljava/lang/Exception;)V           */
+utf *utf_java_lang_Object__java_lang_Object;
 utf *utf_java_lang_String__void;        /* (Ljava/lang/String;)V              */
 utf *utf_java_lang_String__java_lang_Class;
+utf *utf_java_lang_Thread__V;           /* (Ljava/lang/Thread;)V              */
+utf *utf_java_lang_Thread_java_lang_Throwable__V;
+utf *utf_Ljava_lang_ThreadGroup_Ljava_lang_String__V;
 utf *utf_java_lang_Throwable__void;     /* (Ljava/lang/Throwable;)V           */
+utf *utf_java_lang_Throwable__java_lang_Throwable;
 
+utf *utf_not_named_yet;                 /* special name for unnamed classes   */
+utf *utf_null;
 utf *array_packagename;
 
 
@@ -119,6 +240,19 @@ utf *array_packagename;
 
 void utf8_init(void)
 {
+	TRACESUBSYSTEMINITIALIZATION("utf8_init");
+
+	/* create utf8 hashtable */
+
+	hashtable_utf = NEW(hashtable);
+
+	hashtable_create(hashtable_utf, HASHTABLE_UTF_SIZE);
+
+#if defined(ENABLE_STATISTICS)
+	if (opt_stat)
+		count_utf_len += sizeof(utf*) * hashtable_utf->size;
+#endif
+
 	/* create utf-symbols for pointer comparison of frequently used strings */
 
 	utf_java_lang_Object           = utf_new_char("java/lang/Object");
@@ -128,16 +262,126 @@ void utf8_init(void)
 	utf_java_lang_Cloneable        = utf_new_char("java/lang/Cloneable");
 	utf_java_lang_SecurityManager  = utf_new_char("java/lang/SecurityManager");
 	utf_java_lang_String           = utf_new_char("java/lang/String");
-	utf_java_lang_System           = utf_new_char("java/lang/System");
+	utf_java_lang_ThreadGroup      = utf_new_char("java/lang/ThreadGroup");
+
+	utf_java_lang_ClassLoader_NativeLibrary =
+		utf_new_char("java/lang/ClassLoader$NativeLibrary");
+
+	utf_java_lang_ref_SoftReference =
+		utf_new_char("java/lang/ref/SoftReference");
+
+	utf_java_lang_ref_WeakReference =
+		utf_new_char("java/lang/ref/WeakReference");
+
+	utf_java_lang_ref_PhantomReference =
+		utf_new_char("java/lang/ref/PhantomReference");
+
 	utf_java_io_Serializable       = utf_new_char("java/io/Serializable");
 
 	utf_java_lang_Throwable        = utf_new_char("java/lang/Throwable");
-	utf_java_lang_VMThrowable      = utf_new_char("java/lang/VMThrowable");
-	utf_java_lang_Exception        = utf_new_char("java/lang/Exception");
 	utf_java_lang_Error            = utf_new_char("java/lang/Error");
+
+	utf_java_lang_ClassCircularityError =
+		utf_new_char("java/lang/ClassCircularityError");
+
+	utf_java_lang_ClassFormatError = utf_new_char("java/lang/ClassFormatError");
+
+	utf_java_lang_ExceptionInInitializerError =
+		utf_new_char("java/lang/ExceptionInInitializerError");
+
+	utf_java_lang_IncompatibleClassChangeError =
+		utf_new_char("java/lang/IncompatibleClassChangeError");
+
+	utf_java_lang_InstantiationError =
+		utf_new_char("java/lang/InstantiationError");
+
+	utf_java_lang_InternalError    = utf_new_char("java/lang/InternalError");
+	utf_java_lang_LinkageError     = utf_new_char("java/lang/LinkageError");
+
+	utf_java_lang_NoClassDefFoundError =
+		utf_new_char("java/lang/NoClassDefFoundError");
+
 	utf_java_lang_OutOfMemoryError = utf_new_char("java/lang/OutOfMemoryError");
 
+	utf_java_lang_UnsatisfiedLinkError =
+		utf_new_char("java/lang/UnsatisfiedLinkError");
+
+	utf_java_lang_UnsupportedClassVersionError =
+		utf_new_char("java/lang/UnsupportedClassVersionError");
+
+	utf_java_lang_VerifyError      = utf_new_char("java/lang/VerifyError");
+
+	utf_java_lang_VirtualMachineError =
+		utf_new_char("java/lang/VirtualMachineError");
+
+#if defined(ENABLE_JAVASE)
+	utf_java_lang_AbstractMethodError =
+		utf_new_char("java/lang/AbstractMethodError");
+
+	utf_java_lang_NoSuchFieldError =
+		utf_new_char("java/lang/NoSuchFieldError");
+
+	utf_java_lang_NoSuchMethodError =
+		utf_new_char("java/lang/NoSuchMethodError");
+#endif
+
+	utf_java_lang_Exception        = utf_new_char("java/lang/Exception");
+
+	utf_java_lang_ArithmeticException =
+		utf_new_char("java/lang/ArithmeticException");
+
+	utf_java_lang_ArrayIndexOutOfBoundsException =
+		utf_new_char("java/lang/ArrayIndexOutOfBoundsException");
+
+	utf_java_lang_ArrayStoreException =
+		utf_new_char("java/lang/ArrayStoreException");
+
+	utf_java_lang_ClassCastException =
+		utf_new_char("java/lang/ClassCastException");
+
+	utf_java_lang_ClassNotFoundException =
+		utf_new_char("java/lang/ClassNotFoundException");
+
+	utf_java_lang_CloneNotSupportedException =
+		utf_new_char("java/lang/CloneNotSupportedException");
+
+	utf_java_lang_IllegalAccessException =
+		utf_new_char("java/lang/IllegalAccessException");
+
+	utf_java_lang_IllegalArgumentException =
+		utf_new_char("java/lang/IllegalArgumentException");
+
+	utf_java_lang_IllegalMonitorStateException =
+		utf_new_char("java/lang/IllegalMonitorStateException");
+
+	utf_java_lang_InstantiationException =
+		utf_new_char("java/lang/InstantiationException");
+
+	utf_java_lang_InterruptedException =
+		utf_new_char("java/lang/InterruptedException");
+
+	utf_java_lang_NegativeArraySizeException =
+		utf_new_char("java/lang/NegativeArraySizeException");
+
+	utf_java_lang_NullPointerException =
+		utf_new_char("java/lang/NullPointerException");
+
+	utf_java_lang_RuntimeException =
+		utf_new_char("java/lang/RuntimeException");
+
+	utf_java_lang_StringIndexOutOfBoundsException =
+		utf_new_char("java/lang/StringIndexOutOfBoundsException");
+
+	utf_java_lang_reflect_InvocationTargetException =
+		utf_new_char("java/lang/reflect/InvocationTargetException");
+
+	utf_java_security_PrivilegedActionException =
+		utf_new_char("java/security/PrivilegedActionException");
+ 
+#if defined(ENABLE_JAVASE)
 	utf_java_lang_Void             = utf_new_char("java/lang/Void");
+#endif
+
 	utf_java_lang_Boolean          = utf_new_char("java/lang/Boolean");
 	utf_java_lang_Byte             = utf_new_char("java/lang/Byte");
 	utf_java_lang_Character        = utf_new_char("java/lang/Character");
@@ -147,7 +391,24 @@ void utf8_init(void)
 	utf_java_lang_Float            = utf_new_char("java/lang/Float");
 	utf_java_lang_Double           = utf_new_char("java/lang/Double");
 
+#if defined(ENABLE_JAVASE)
+	utf_java_lang_StackTraceElement =
+		utf_new_char("java/lang/StackTraceElement");
+
+	utf_java_lang_reflect_Constructor =
+		utf_new_char("java/lang/reflect/Constructor");
+
+	utf_java_lang_reflect_Field    = utf_new_char("java/lang/reflect/Field");
+	utf_java_lang_reflect_Method   = utf_new_char("java/lang/reflect/Method");
+
+# if defined(WITH_JAVA_RUNTIME_LIBRARY_GNU_CLASSPATH)
+	utf_java_lang_reflect_VMConstructor = utf_new_char("java/lang/reflect/VMConstructor");
+	utf_java_lang_reflect_VMField       = utf_new_char("java/lang/reflect/VMField");
+	utf_java_lang_reflect_VMMethod      = utf_new_char("java/lang/reflect/VMMethod");
+# endif
+
 	utf_java_util_Vector           = utf_new_char("java/util/Vector");
+#endif
 
 	utf_InnerClasses               = utf_new_char("InnerClasses");
 	utf_ConstantValue              = utf_new_char("ConstantValue");
@@ -156,13 +417,60 @@ void utf8_init(void)
 	utf_LineNumberTable            = utf_new_char("LineNumberTable");
 	utf_SourceFile                 = utf_new_char("SourceFile");
 
+#if defined(ENABLE_JAVASE)
+	utf_EnclosingMethod            = utf_new_char("EnclosingMethod");
+	utf_Signature                  = utf_new_char("Signature");
+	utf_StackMapTable              = utf_new_char("StackMapTable");
+
+# if defined(ENABLE_JVMTI)
+	utf_LocalVariableTable         = utf_new_char("LocalVariableTable");
+# endif
+
+# if defined(ENABLE_ANNOTATIONS)
+	utf_RuntimeVisibleAnnotations            = utf_new_char("RuntimeVisibleAnnotations");
+	utf_RuntimeInvisibleAnnotations          = utf_new_char("RuntimeInvisibleAnnotations");
+	utf_RuntimeVisibleParameterAnnotations   = utf_new_char("RuntimeVisibleParameterAnnotations");
+	utf_RuntimeInvisibleParameterAnnotations = utf_new_char("RuntimeInvisibleParameterAnnotations");
+	utf_AnnotationDefault                    = utf_new_char("AnnotationDefault");
+# endif
+#endif
+
 	utf_init	                   = utf_new_char("<init>");
 	utf_clinit	                   = utf_new_char("<clinit>");
+	utf_clone                      = utf_new_char("clone");
 	utf_finalize	               = utf_new_char("finalize");
+	utf_invoke                     = utf_new_char("invoke");
+	utf_main                       = utf_new_char("main");
+	utf_run                        = utf_new_char("run");
+
+	utf_add                        = utf_new_char("add");
+	utf_dispatch                   = utf_new_char("dispatch");
+	utf_remove                     = utf_new_char("remove");
+	utf_addThread                  = utf_new_char("addThread");
+	utf_removeThread               = utf_new_char("removeThread");
+	utf_put                        = utf_new_char("put");
+	utf_get                        = utf_new_char("get");
+	utf_uncaughtException          = utf_new_char("uncaughtException");
+	utf_value                      = utf_new_char("value");
 
-	utf_printStackTrace            = utf_new_char("printStackTrace");
 	utf_fillInStackTrace           = utf_new_char("fillInStackTrace");
+	utf_findNative                 = utf_new_char("findNative");
+	utf_getSystemClassLoader       = utf_new_char("getSystemClassLoader");
+	utf_initCause                  = utf_new_char("initCause");
 	utf_loadClass                  = utf_new_char("loadClass");
+	utf_loadClassInternal          = utf_new_char("loadClassInternal");
+	utf_printStackTrace            = utf_new_char("printStackTrace");
+
+	utf_division_by_zero           = utf_new_char("/ by zero");
+
+	utf_Z                          = utf_new_char("Z");
+	utf_B                          = utf_new_char("B");
+	utf_C                          = utf_new_char("C");
+	utf_S                          = utf_new_char("S");
+	utf_I                          = utf_new_char("I");
+	utf_J                          = utf_new_char("J");
+	utf_F                          = utf_new_char("F");
+	utf_D                          = utf_new_char("D");
 
 	utf_void__void                 = utf_new_char("()V");
 	utf_boolean__void              = utf_new_char("(Z)V");
@@ -175,13 +483,38 @@ void utf8_init(void)
 	utf_double__void               = utf_new_char("(D)V");
 	utf_void__java_lang_Object     = utf_new_char("()Ljava/lang/Object;");
 	utf_void__java_lang_Throwable  = utf_new_char("()Ljava/lang/Throwable;");
+
+	utf_void__java_lang_ClassLoader =
+		utf_new_char("()Ljava/lang/ClassLoader;");
+
+	utf_java_lang_ClassLoader_java_lang_String__J =
+		utf_new_char("(Ljava/lang/ClassLoader;Ljava/lang/String;)J");
+
+	utf_java_lang_Exception__V     = utf_new_char("(Ljava/lang/Exception;)V");
+
+	utf_java_lang_Object__java_lang_Object =
+		utf_new_char("(Ljava/lang/Object;)Ljava/lang/Object;");
+
 	utf_java_lang_String__void     = utf_new_char("(Ljava/lang/String;)V");
 
 	utf_java_lang_String__java_lang_Class =
 		utf_new_char("(Ljava/lang/String;)Ljava/lang/Class;");
 
+	utf_java_lang_Thread__V        = utf_new_char("(Ljava/lang/Thread;)V");
+
+	utf_java_lang_Thread_java_lang_Throwable__V =
+		utf_new_char("(Ljava/lang/Thread;Ljava/lang/Throwable;)V");
+
+	utf_Ljava_lang_ThreadGroup_Ljava_lang_String__V =
+		utf_new_char("(Ljava/lang/ThreadGroup;Ljava/lang/String;)V");
+
 	utf_java_lang_Throwable__void  = utf_new_char("(Ljava/lang/Throwable;)V");
 
+	utf_java_lang_Throwable__java_lang_Throwable =
+		utf_new_char("(Ljava/lang/Throwable;)Ljava/lang/Throwable;");
+
+	utf_null                       = utf_new_char("null");
+	utf_not_named_yet              = utf_new_char("\t<not_named_yet>");
 	array_packagename              = utf_new_char("\t<the array package>");
 }
 
@@ -309,8 +642,36 @@ u4 utf_hashkey(const char *text, u4 length)
     }
 }
 
+/* utf_full_hashkey ************************************************************
 
-/* utf_hashkey *****************************************************************
+   This function computes a hash value using all bytes in the string.
+
+   The algorithm is the "One-at-a-time" algorithm as published
+   by Bob Jenkins on http://burtleburtle.net/bob/hash/doobs.html.
+
+*******************************************************************************/
+
+u4 utf_full_hashkey(const char *text, u4 length)
+{
+	register const unsigned char *p = (const unsigned char *) text;
+	register u4 hash;
+	register u4 i;
+
+	hash = 0;
+	for (i=length; i--;)
+	{
+	    hash += *p++;
+	    hash += (hash << 10);
+	    hash ^= (hash >> 6);
+	}
+	hash += (hash << 3);
+	hash ^= (hash >> 11);
+	hash += (hash << 15);
+
+	return hash;
+}
+
+/* unicode_hashkey *************************************************************
 
    Compute the hashkey of a unicode string.
 
@@ -335,116 +696,116 @@ u4 unicode_hashkey(u2 *text, u2 len)
 
 *******************************************************************************/
 
-utf *utf_new_intern(const char *text, u2 length);
-
 utf *utf_new(const char *text, u2 length)
-{
-    utf *r;
-
-#if defined(USE_THREADS) && defined(NATIVE_THREADS)
-    tables_lock();
-#endif
-
-    r = utf_new_intern(text, length);
-
-#if defined(USE_THREADS) && defined(NATIVE_THREADS)
-    tables_unlock();
-#endif
-
-    return r;
-}
-
-
-utf *utf_new_intern(const char *text, u2 length)
 {
 	u4 key;                             /* hashkey computed from utf-text     */
 	u4 slot;                            /* slot in hashtable                  */
 	utf *u;                             /* hashtable element                  */
 	u2 i;
 
-#ifdef STATISTICS
+	Mutex_lock(hashtable_utf->mutex);
+
+#if defined(ENABLE_STATISTICS)
 	if (opt_stat)
 		count_utf_new++;
 #endif
 
 	key  = utf_hashkey(text, length);
-	slot = key & (utf_hash.size - 1);
-	u    = utf_hash.ptr[slot];
+	slot = key & (hashtable_utf->size - 1);
+	u    = hashtable_utf->ptr[slot];
 
 	/* search external hash chain for utf-symbol */
+
 	while (u) {
 		if (u->blength == length) {
-
 			/* compare text of hashtable elements */
+
 			for (i = 0; i < length; i++)
-				if (text[i] != u->text[i]) goto nomatch;
+				if (text[i] != u->text[i])
+					goto nomatch;
 			
-#ifdef STATISTICS
+#if defined(ENABLE_STATISTICS)
 			if (opt_stat)
 				count_utf_new_found++;
 #endif
 
 			/* symbol found in hashtable */
+
+			Mutex_unlock(hashtable_utf->mutex);
+
 			return u;
 		}
+
 	nomatch:
 		u = u->hashlink; /* next element in external chain */
 	}
 
-#ifdef STATISTICS
-	if (opt_stat)
-		count_utf_len += sizeof(utf) + length;
-#endif
-
 	/* location in hashtable found, create new utf element */
+
 	u = NEW(utf);
+
 	u->blength  = length;               /* length in bytes of utfstring       */
-	u->hashlink = utf_hash.ptr[slot];   /* link in external hashchain         */
+	u->hashlink = hashtable_utf->ptr[slot]; /* link in external hashchain     */
 	u->text     = mem_alloc(length + 1);/* allocate memory for utf-text       */
+
 	memcpy(u->text, text, length);      /* copy utf-text                      */
 	u->text[length] = '\0';
-	utf_hash.ptr[slot] = u;             /* insert symbol into table           */
 
-	utf_hash.entries++;                 /* update number of entries           */
+#if defined(ENABLE_STATISTICS)
+	if (opt_stat)
+		count_utf_len += sizeof(utf) + length + 1;
+#endif
 
-	if (utf_hash.entries > (utf_hash.size * 2)) {
+	hashtable_utf->ptr[slot] = u;       /* insert symbol into table           */
+	hashtable_utf->entries++;           /* update number of entries           */
 
-        /* reorganization of hashtable, average length of 
-           the external chains is approx. 2                */  
+	if (hashtable_utf->entries > (hashtable_utf->size * 2)) {
 
-		u4 i;
-		utf *u;
-		hashtable newhash; /* the new hashtable */
+        /* reorganization of hashtable, average length of the external
+           chains is approx. 2 */
+
+		hashtable *newhash;                              /* the new hashtable */
+		u4         i;
+		utf       *u;
+		utf       *nextu;
+		u4         slot;
 
 		/* create new hashtable, double the size */
-		init_hashtable(&newhash, utf_hash.size * 2);
-		newhash.entries = utf_hash.entries;
 
-#ifdef STATISTICS
+		newhash = hashtable_resize(hashtable_utf, hashtable_utf->size * 2);
+
+#if defined(ENABLE_STATISTICS)
 		if (opt_stat)
-			count_utf_len += sizeof(utf*) * utf_hash.size;
+			count_utf_len += sizeof(utf*) * hashtable_utf->size;
 #endif
 
 		/* transfer elements to new hashtable */
-		for (i = 0; i < utf_hash.size; i++) {
-			u = (utf *) utf_hash.ptr[i];
+
+		for (i = 0; i < hashtable_utf->size; i++) {
+			u = hashtable_utf->ptr[i];
+
 			while (u) {
-				utf *nextu = u->hashlink;
-				u4 slot = utf_hashkey(u->text, u->blength) & (newhash.size - 1);
+				nextu = u->hashlink;
+				slot  = utf_hashkey(u->text, u->blength) & (newhash->size - 1);
 						
-				u->hashlink = (utf *) newhash.ptr[slot];
-				newhash.ptr[slot] = u;
+				u->hashlink = (utf *) newhash->ptr[slot];
+				newhash->ptr[slot] = u;
 
 				/* follow link in external hash chain */
+
 				u = nextu;
 			}
 		}
 	
 		/* dispose old table */
-		MFREE(utf_hash.ptr, void*, utf_hash.size);
-		utf_hash = newhash;
+
+		hashtable_free(hashtable_utf);
+
+		hashtable_utf = newhash;
 	}
 
+	Mutex_unlock(hashtable_utf->mutex);
+
 	return u;
 }
 
@@ -463,7 +824,7 @@ utf *utf_new_u2(u2 *unicode_pos, u4 unicode_length, bool isclassname)
 	u4 left;                        /* unicode characters left                */
 	u4 buflength;                   /* utf length in bytes of the u2 array    */
 	utf *result;                    /* resulting utf-string                   */
-	int i;    	
+	int i;
 
 	/* determine utf length in bytes and allocate memory */
 
@@ -566,6 +927,9 @@ utf *utf_new_char_classname(const char *text)
    Read the next unicode character from the utf string and increment
    the utf-string pointer accordingly.
 
+   CAUTION: This function is unsafe for input that was not checked 
+            by is_valid_utf!
+
 *******************************************************************************/
 
 u2 utf_nextu2(char **utf_ptr)
@@ -614,24 +978,105 @@ u2 utf_nextu2(char **utf_ptr)
 }
 
 
-/* utf_strlen ******************************************************************
+/* utf_bytes *******************************************************************
+
+   Determine number of bytes (aka. octets) in the utf string.
+
+   IN:
+      u............utf string
 
-   Determine number of unicode characters in the utf string.
+   OUT:
+      The number of octets of this utf string.
+	  There is _no_ terminating zero included in this count.
 
 *******************************************************************************/
 
-u4 utf_strlen(utf *u)
+u4 utf_bytes(utf *u)
+{
+	return u->blength;
+}
+
+
+/* utf_get_number_of_u2s_for_buffer ********************************************
+
+   Determine number of UTF-16 u2s in the given UTF-8 buffer
+
+   CAUTION: This function is unsafe for input that was not checked 
+            by is_valid_utf!
+
+   CAUTION: Use this function *only* when you want to convert an UTF-8 buffer
+   to an array of u2s (UTF-16) and want to know how many of them you will get.
+   All other uses of this function are probably wrong.
+
+   IN:
+      buffer........points to first char in buffer
+	  blength.......number of _bytes_ in the buffer
+
+   OUT:
+      the number of u2s needed to hold this string in UTF-16 encoding.
+	  There is _no_ terminating zero included in this count.
+
+   NOTE: Unlike utf_get_number_of_u2s, this function never throws an
+   exception.
+
+*******************************************************************************/
+
+u4 utf_get_number_of_u2s_for_buffer(const char *buffer, u4 blength)
+{
+	const char *endpos;                 /* points behind utf string           */
+	const char *utf_ptr;                /* current position in utf text       */
+	u4 len = 0;                         /* number of unicode characters       */
+
+	utf_ptr = buffer;
+	endpos = utf_ptr + blength;
+
+	while (utf_ptr < endpos) {
+		len++;
+		/* next unicode character */
+		utf_nextu2((char **)&utf_ptr);
+	}
+
+	assert(utf_ptr == endpos);
+
+	return len;
+}
+
+
+/* utf_get_number_of_u2s *******************************************************
+
+   Determine number of UTF-16 u2s in the utf string.
+
+   CAUTION: This function is unsafe for input that was not checked 
+            by is_valid_utf!
+
+   CAUTION: Use this function *only* when you want to convert a utf string
+   to an array of u2s and want to know how many of them you will get.
+   All other uses of this function are probably wrong.
+
+   IN:
+      u............utf string
+
+   OUT:
+      the number of u2s needed to hold this string in UTF-16 encoding.
+	  There is _no_ terminating zero included in this count.
+	  XXX 0 if a NullPointerException has been thrown (see below)
+
+*******************************************************************************/
+
+u4 utf_get_number_of_u2s(utf *u)
 {
 	char *endpos;                       /* points behind utf string           */
 	char *utf_ptr;                      /* current position in utf text       */
 	u4 len = 0;                         /* number of unicode characters       */
 
-	if (!u) {
-		*exceptionptr = new_nullpointerexception();
+	/* XXX this is probably not checked by most callers! Review this after */
+	/* the invalid uses of this function have been eliminated */
+	if (u == NULL) {
+		exceptions_throw_nullpointerexception();
 		return 0;
 	}
 
-	endpos = utf_end(u);
+	endpos = UTF_END(u);
 	utf_ptr = u->text;
 
 	while (utf_ptr < endpos) {
@@ -640,15 +1085,319 @@ u4 utf_strlen(utf *u)
 		utf_nextu2(&utf_ptr);
 	}
 
-	if (utf_ptr != endpos)
+	if (utf_ptr != endpos) {
 		/* string ended abruptly */
-		throw_cacao_exception_exit(string_java_lang_InternalError,
-								   "Illegal utf8 string");
+		exceptions_throw_internalerror("Illegal utf8 string");
+		return 0;
+	}
+
+	return len;
+}
+
+
+/* utf8_safe_number_of_u2s *****************************************************
+
+   Determine number of UTF-16 u2s needed for decoding the given UTF-8 string.
+   (For invalid UTF-8 the U+fffd replacement character will be counted.)
+
+   This function is safe even for invalid UTF-8 strings.
+
+   IN:
+      text..........zero-terminated(!) UTF-8 string (may be invalid)
+	                must NOT be NULL
+	  nbytes........strlen(text). (This is needed to completely emulate
+	                the RI).
+
+   OUT:
+      the number of u2s needed to hold this string in UTF-16 encoding.
+	  There is _no_ terminating zero included in this count.
+
+*******************************************************************************/
+
+s4 utf8_safe_number_of_u2s(const char *text, s4 nbytes) {
+	register const unsigned char *t;
+	register s4 byte;
+	register s4 len;
+	register const unsigned char *tlimit;
+	s4 byte1;
+	s4 byte2;
+	s4 byte3;
+	s4 value;
+	s4 skip;
+
+	assert(text);
+	assert(nbytes >= 0);
+
+	len = 0;
+	t = (const unsigned char *) text;
+	tlimit = t + nbytes;
+
+	/* CAUTION: Keep this code in sync with utf8_safe_convert_to_u2s! */
+
+	while (1) {
+		byte = *t++;
+
+		if (byte & 0x80) {
+			/* highest bit set, non-ASCII character */
+
+			if ((byte & 0xe0) == 0xc0) {
+				/* 2-byte: should be 110..... 10...... ? */
+
+				if ((*t++ & 0xc0) == 0x80)
+					; /* valid 2-byte */
+				else
+					t--; /* invalid */
+			}
+			else if ((byte & 0xf0) == 0xe0) {
+				/* 3-byte: should be 1110.... 10...... 10...... */
+				/*                            ^t                */
+
+				if (t + 2 > tlimit)
+					return len + 1; /* invalid, stop here */
+
+				if ((*t++ & 0xc0) == 0x80) {
+					if ((*t++ & 0xc0) == 0x80)
+						; /* valid 3-byte */
+					else
+						t--; /* invalid */
+				}
+				else
+					t--; /* invalid */
+			}
+			else if ((byte & 0xf8) == 0xf0) {
+				/* 4-byte: should be 11110... 10...... 10...... 10...... */
+				/*                            ^t                         */
+
+				if (t + 3 > tlimit)
+					return len + 1; /* invalid, stop here */
+
+				if (((byte1 = *t++) & 0xc0) == 0x80) {
+					if (((byte2 = *t++) & 0xc0) == 0x80) {
+						if (((byte3 = *t++) & 0xc0) == 0x80) {
+							/* valid 4-byte UTF-8? */
+							value = ((byte  & 0x07) << 18)
+								  | ((byte1 & 0x3f) << 12)
+								  | ((byte2 & 0x3f) <<  6)
+								  | ((byte3 & 0x3f)      );
+
+							if (value > 0x10FFFF)
+								; /* invalid */
+							else if (value > 0xFFFF)
+								len += 1; /* we need surrogates */
+							else
+								; /* 16bit suffice */
+						}
+						else
+							t--; /* invalid */
+					}
+					else
+						t--; /* invalid */
+				}
+				else
+					t--; /* invalid */
+			}
+			else if ((byte & 0xfc) == 0xf8) {
+				/* invalid 5-byte */
+				if (t + 4 > tlimit)
+					return len + 1; /* invalid, stop here */
+
+				skip = 4;
+				for (; skip && ((*t & 0xc0) == 0x80); --skip)
+					t++;
+			}
+			else if ((byte & 0xfe) == 0xfc) {
+				/* invalid 6-byte */
+				if (t + 5 > tlimit)
+					return len + 1; /* invalid, stop here */
+
+				skip = 5;
+				for (; skip && ((*t & 0xc0) == 0x80); --skip)
+					t++;
+			}
+			else
+				; /* invalid */
+		}
+		else {
+			/* NUL */
+
+			if (byte == 0)
+				break;
+
+			/* ASCII character, common case */
+		}
+
+		len++;
+	}
 
 	return len;
 }
 
 
+/* utf8_safe_convert_to_u2s ****************************************************
+
+   Convert the given UTF-8 string to UTF-16 into a pre-allocated buffer.
+   (Invalid UTF-8 will be replaced with the U+fffd replacement character.)
+   Use utf8_safe_number_of_u2s to determine the number of u2s to allocate.
+
+   This function is safe even for invalid UTF-8 strings.
+
+   IN:
+      text..........zero-terminated(!) UTF-8 string (may be invalid)
+	                must NOT be NULL
+	  nbytes........strlen(text). (This is needed to completely emulate
+	  				the RI).
+	  buffer........a preallocated array of u2s to receive the decoded
+	                string. Use utf8_safe_number_of_u2s to get the
+					required number of u2s for allocating this.
+
+*******************************************************************************/
+
+#define UNICODE_REPLACEMENT  0xfffd
+
+void utf8_safe_convert_to_u2s(const char *text, s4 nbytes, u2 *buffer) {
+	register const unsigned char *t;
+	register s4 byte;
+	register const unsigned char *tlimit;
+	s4 byte1;
+	s4 byte2;
+	s4 byte3;
+	s4 value;
+	s4 skip;
+
+	assert(text);
+	assert(nbytes >= 0);
+
+	t = (const unsigned char *) text;
+	tlimit = t + nbytes;
+
+	/* CAUTION: Keep this code in sync with utf8_safe_number_of_u2s! */
+
+	while (1) {
+		byte = *t++;
+
+		if (byte & 0x80) {
+			/* highest bit set, non-ASCII character */
+
+			if ((byte & 0xe0) == 0xc0) {
+				/* 2-byte: should be 110..... 10...... */
+
+				if (((byte1 = *t++) & 0xc0) == 0x80) {
+					/* valid 2-byte UTF-8 */
+					*buffer++ = ((byte  & 0x1f) << 6)
+							  | ((byte1 & 0x3f)     );
+				}
+				else {
+					*buffer++ = UNICODE_REPLACEMENT;
+					t--;
+				}
+			}
+			else if ((byte & 0xf0) == 0xe0) {
+				/* 3-byte: should be 1110.... 10...... 10...... */
+
+				if (t + 2 > tlimit) {
+					*buffer++ = UNICODE_REPLACEMENT;
+					return;
+				}
+
+				if (((byte1 = *t++) & 0xc0) == 0x80) {
+					if (((byte2 = *t++) & 0xc0) == 0x80) {
+						/* valid 3-byte UTF-8 */
+						*buffer++ = ((byte  & 0x0f) << 12)
+								  | ((byte1 & 0x3f) <<  6)
+								  | ((byte2 & 0x3f)      );
+					}
+					else {
+						*buffer++ = UNICODE_REPLACEMENT;
+						t--;
+					}
+				}
+				else {
+					*buffer++ = UNICODE_REPLACEMENT;
+					t--;
+				}
+			}
+			else if ((byte & 0xf8) == 0xf0) {
+				/* 4-byte: should be 11110... 10...... 10...... 10...... */
+
+				if (t + 3 > tlimit) {
+					*buffer++ = UNICODE_REPLACEMENT;
+					return;
+				}
+
+				if (((byte1 = *t++) & 0xc0) == 0x80) {
+					if (((byte2 = *t++) & 0xc0) == 0x80) {
+						if (((byte3 = *t++) & 0xc0) == 0x80) {
+							/* valid 4-byte UTF-8? */
+							value = ((byte  & 0x07) << 18)
+								  | ((byte1 & 0x3f) << 12)
+								  | ((byte2 & 0x3f) <<  6)
+								  | ((byte3 & 0x3f)      );
+
+							if (value > 0x10FFFF) {
+								*buffer++ = UNICODE_REPLACEMENT;
+							}
+							else if (value > 0xFFFF) {
+								/* we need surrogates */
+								*buffer++ = 0xd800 | ((value >> 10) - 0x40);
+								*buffer++ = 0xdc00 | (value & 0x03ff);
+							}
+							else
+								*buffer++ = value; /* 16bit suffice */
+						}
+						else {
+							*buffer++ = UNICODE_REPLACEMENT;
+							t--;
+						}
+					}
+					else {
+						*buffer++ = UNICODE_REPLACEMENT;
+						t--;
+					}
+				}
+				else {
+					*buffer++ = UNICODE_REPLACEMENT;
+					t--;
+				}
+			}
+			else if ((byte & 0xfc) == 0xf8) {
+				if (t + 4 > tlimit) {
+					*buffer++ = UNICODE_REPLACEMENT;
+					return;
+				}
+
+				skip = 4;
+				for (; skip && ((*t & 0xc0) == 0x80); --skip)
+					t++;
+				*buffer++ = UNICODE_REPLACEMENT;
+			}
+			else if ((byte & 0xfe) == 0xfc) {
+				if (t + 5 > tlimit) {
+					*buffer++ = UNICODE_REPLACEMENT;
+					return;
+				}
+
+				skip = 5;
+				for (; skip && ((*t & 0xc0) == 0x80); --skip)
+					t++;
+				*buffer++ = UNICODE_REPLACEMENT;
+			}
+			else
+				*buffer++ = UNICODE_REPLACEMENT;
+		}
+		else {
+			/* NUL */
+
+			if (byte == 0)
+				break;
+
+			/* ASCII character, common case */
+
+			*buffer++ = byte;
+		}
+	}
+}
+
+
 /* u2_utflength ****************************************************************
 
    Returns the utf length in bytes of a u2 array.
@@ -678,77 +1427,172 @@ u4 u2_utflength(u2 *text, u4 u2_length)
 }
 
 
-/* utf_display *****************************************************************
+/* utf_copy ********************************************************************
+
+   Copy the given utf string byte-for-byte to a buffer.
+
+   IN:
+      buffer.......the buffer
+	  u............the utf string
+
+*******************************************************************************/
+
+void utf_copy(char *buffer, utf *u)
+{
+	/* our utf strings are zero-terminated (done by utf_new) */
+	MCOPY(buffer, u->text, char, u->blength + 1);
+}
+
+
+/* utf_cat *********************************************************************
+
+   Append the given utf string byte-for-byte to a buffer.
+
+   IN:
+      buffer.......the buffer
+	  u............the utf string
+
+*******************************************************************************/
+
+void utf_cat(char *buffer, utf *u)
+{
+	/* our utf strings are zero-terminated (done by utf_new) */
+	MCOPY(buffer + strlen(buffer), u->text, char, u->blength + 1);
+}
+
+
+/* utf_copy_classname **********************************************************
+
+   Copy the given utf classname byte-for-byte to a buffer.
+   '/' is replaced by '.'
+
+   IN:
+      buffer.......the buffer
+	  u............the utf string
+
+*******************************************************************************/
+
+void utf_copy_classname(char *buffer, utf *u)
+{
+	char *bufptr;
+	char *srcptr;
+	char *endptr;
+	char ch;
+
+	bufptr = buffer;
+	srcptr = u->text;
+	endptr = UTF_END(u) + 1; /* utfs are zero-terminared by utf_new */
+
+	while (srcptr != endptr) {
+		ch = *srcptr++;
+		if (ch == '/')
+			ch = '.';
+		*bufptr++ = ch;
+	}
+}
+
+
+/* utf_cat *********************************************************************
+
+   Append the given utf classname byte-for-byte to a buffer.
+   '/' is replaced by '.'
+
+   IN:
+      buffer.......the buffer
+	  u............the utf string
+
+*******************************************************************************/
+
+void utf_cat_classname(char *buffer, utf *u)
+{
+	utf_copy_classname(buffer + strlen(buffer), u);
+}
+
+/* utf_display_printable_ascii *************************************************
 
    Write utf symbol to stdout (for debugging purposes).
+   Non-printable and non-ASCII characters are printed as '?'.
 
 *******************************************************************************/
 
-void utf_display(utf *u)
+void utf_display_printable_ascii(utf *u)
 {
 	char *endpos;                       /* points behind utf string           */
 	char *utf_ptr;                      /* current position in utf text       */
 
-	if (!u) {
+	if (u == NULL) {
 		printf("NULL");
 		fflush(stdout);
 		return;
 	}
 
-	endpos = utf_end(u);
+	endpos = UTF_END(u);
 	utf_ptr = u->text;
 
 	while (utf_ptr < endpos) {
-		/* read next unicode character */                
+		/* read next unicode character */
+
 		u2 c = utf_nextu2(&utf_ptr);
-		if (c >= 32 && c <= 127) printf("%c", c);
-		else printf("?");
+
+		if ((c >= 32) && (c <= 127))
+			printf("%c", c);
+		else
+			printf("?");
 	}
 
 	fflush(stdout);
 }
 
 
-/* utf_display_classname *******************************************************
+/* utf_display_printable_ascii_classname ***************************************
 
    Write utf symbol to stdout with `/' converted to `.' (for debugging
    purposes).
+   Non-printable and non-ASCII characters are printed as '?'.
 
 *******************************************************************************/
 
-void utf_display_classname(utf *u)
+void utf_display_printable_ascii_classname(utf *u)
 {
 	char *endpos;                       /* points behind utf string           */
 	char *utf_ptr;                      /* current position in utf text       */
 
-	if (!u) {
+	if (u == NULL) {
 		printf("NULL");
 		fflush(stdout);
 		return;
 	}
 
-	endpos = utf_end(u);
+	endpos = UTF_END(u);
 	utf_ptr = u->text;
 
 	while (utf_ptr < endpos) {
-		/* read next unicode character */                
+		/* read next unicode character */
+
 		u2 c = utf_nextu2(&utf_ptr);
-		if (c == '/') c = '.';
-		if (c >= 32 && c <= 127) printf("%c", c);
-		else printf("?");
+
+		if (c == '/')
+			c = '.';
+
+		if ((c >= 32) && (c <= 127))
+			printf("%c", c);
+		else
+			printf("?");
 	}
 
 	fflush(stdout);
 }
 
 
-/* utf_sprint ******************************************************************
+/* utf_sprint_convert_to_latin1 ************************************************
 	
    Write utf symbol into c-string (for debugging purposes).
+   Characters are converted to 8-bit Latin-1, non-Latin-1 characters yield
+   invalid results.
 
 *******************************************************************************/
 
-void utf_sprint(char *buffer, utf *u)
+void utf_sprint_convert_to_latin1(char *buffer, utf *u)
 {
 	char *endpos;                       /* points behind utf string           */
 	char *utf_ptr;                      /* current position in utf text       */
@@ -759,7 +1603,7 @@ void utf_sprint(char *buffer, utf *u)
 		return;
 	}
 
-	endpos = utf_end(u);
+	endpos = UTF_END(u);
 	utf_ptr = u->text;
 
 	while (utf_ptr < endpos) 
@@ -771,14 +1615,16 @@ void utf_sprint(char *buffer, utf *u)
 }
 
 
-/* utf_sprint_classname ********************************************************
+/* utf_sprint_convert_to_latin1_classname **************************************
 	
    Write utf symbol into c-string with `/' converted to `.' (for debugging
    purposes).
+   Characters are converted to 8-bit Latin-1, non-Latin-1 characters yield
+   invalid results.
 
 *******************************************************************************/
 
-void utf_sprint_classname(char *buffer, utf *u)
+void utf_sprint_convert_to_latin1_classname(char *buffer, utf *u)
 {
 	char *endpos;                       /* points behind utf string           */
 	char *utf_ptr;                      /* current position in utf text       */
@@ -789,7 +1635,7 @@ void utf_sprint_classname(char *buffer, utf *u)
 		return;
 	}
 
-	endpos = utf_end(u);
+	endpos = UTF_END(u);
 	utf_ptr = u->text;
 
 	while (utf_ptr < endpos) {
@@ -804,37 +1650,42 @@ void utf_sprint_classname(char *buffer, utf *u)
 }
 
 
-/* utf_strcat ******************************************************************
+/* utf_strcat_convert_to_latin1 ************************************************
 	
    Like libc strcat, but uses an utf8 string.
+   Characters are converted to 8-bit Latin-1, non-Latin-1 characters yield
+   invalid results.
 
 *******************************************************************************/
 
-void utf_strcat(char *buffer, utf *u)
+void utf_strcat_convert_to_latin1(char *buffer, utf *u)
 {
-	utf_sprint(buffer + strlen(buffer), u);
+	utf_sprint_convert_to_latin1(buffer + strlen(buffer), u);
 }
 
 
-/* utf_strcat_classname ********************************************************
+/* utf_strcat_convert_to_latin1_classname **************************************
 	
    Like libc strcat, but uses an utf8 string.
+   Characters are converted to 8-bit Latin-1, non-Latin-1 characters yield
+   invalid results.
 
 *******************************************************************************/
 
-void utf_strcat_classname(char *buffer, utf *u)
+void utf_strcat_convert_to_latin1_classname(char *buffer, utf *u)
 {
-	utf_sprint_classname(buffer + strlen(buffer), u);
+	utf_sprint_convert_to_latin1_classname(buffer + strlen(buffer), u);
 }
 
 
-/* utf_fprint ******************************************************************
+/* utf_fprint_printable_ascii **************************************************
 	
    Write utf symbol into file.
+   Non-printable and non-ASCII characters are printed as '?'.
 
 *******************************************************************************/
 
-void utf_fprint(FILE *file, utf *u)
+void utf_fprint_printable_ascii(FILE *file, utf *u)
 {
 	char *endpos;                       /* points behind utf string           */
 	char *utf_ptr;                      /* current position in utf text       */
@@ -842,7 +1693,7 @@ void utf_fprint(FILE *file, utf *u)
 	if (!u)
 		return;
 
-	endpos = utf_end(u);
+	endpos = UTF_END(u);
 	utf_ptr = u->text;
 
 	while (utf_ptr < endpos) { 
@@ -855,13 +1706,14 @@ void utf_fprint(FILE *file, utf *u)
 }
 
 
-/* utf_fprint_classname ********************************************************
+/* utf_fprint_printable_ascii_classname ****************************************
 	
    Write utf symbol into file with `/' converted to `.'.
+   Non-printable and non-ASCII characters are printed as '?'.
 
 *******************************************************************************/
 
-void utf_fprint_classname(FILE *file, utf *u)
+void utf_fprint_printable_ascii_classname(FILE *file, utf *u)
 {
 	char *endpos;                       /* points behind utf string           */
 	char *utf_ptr;                      /* current position in utf text       */
@@ -869,7 +1721,7 @@ void utf_fprint_classname(FILE *file, utf *u)
     if (!u)
 		return;
 
-	endpos = utf_end(u);
+	endpos = UTF_END(u);
 	utf_ptr = u->text;
 
 	while (utf_ptr < endpos) { 
@@ -892,7 +1744,7 @@ void utf_fprint_classname(FILE *file, utf *u)
 
 *******************************************************************************/
 
-static unsigned long min_codepoint[6] = {0,1L<<7,1L<<11,1L<<16,1L<<21,1L<<26};
+/*  static unsigned long min_codepoint[6] = {0,1L<<7,1L<<11,1L<<16,1L<<21,1L<<26}; */
 
 bool is_valid_utf(char *utf_ptr, char *end_pos)
 {
@@ -935,11 +1787,8 @@ bool is_valid_utf(char *utf_ptr, char *end_pos)
 		} else {
 			/* Sun Java seems to allow overlong UTF-8 encodings */
 			
-			if (v < min_codepoint[len]) { /* overlong UTF-8 */
-				if (!opt_liberalutf)
-					fprintf(stderr,"WARNING: Overlong UTF-8 sequence found.\n");
-				/* XXX change this to panic? */
-			}
+			/* if (v < min_codepoint[len]) */
+				/* XXX throw exception? */
 		}
 
 		/* surrogates in UTF-8 seem to be allowed in Java classfiles */
@@ -983,7 +1832,7 @@ bool is_valid_name(char *utf_ptr, char *end_pos)
 
 bool is_valid_name_utf(utf *u)
 {
-	return is_valid_name(u->text,utf_end(u));
+	return is_valid_name(u->text, UTF_END(u));
 }
 
 
@@ -995,6 +1844,7 @@ bool is_valid_name_utf(utf *u)
 
 *******************************************************************************/
 
+#if !defined(NDEBUG)
 void utf_show(void)
 {
 
@@ -1006,29 +1856,30 @@ void utf_show(void)
 	u4 beyond_limit = 0;         /* number of utf-symbols in chains with length>=CHAIN_LIMIT-1 */
 	u4 i;
 
-	printf ("UTF-HASH:\n");
+	printf("UTF-HASH:\n");
 
 	/* show element of utf-hashtable */
-	for (i=0; i<utf_hash.size; i++) {
-		utf *u = utf_hash.ptr[i];
+
+	for (i = 0; i < hashtable_utf->size; i++) {
+		utf *u = hashtable_utf->ptr[i];
+
 		if (u) {
-			printf ("SLOT %d: ", (int) i);
+			printf("SLOT %d: ", (int) i);
+
 			while (u) {
-				printf ("'");
-				utf_display (u);
-				printf ("' ");
+				printf("'");
+				utf_display_printable_ascii(u);
+				printf("' ");
 				u = u->hashlink;
 			}	
-			printf ("\n");
+			printf("\n");
 		}
-		
 	}
 
-	printf ("UTF-HASH: %d slots for %d entries\n", 
-			(int) utf_hash.size, (int) utf_hash.entries );
-
+	printf("UTF-HASH: %d slots for %d entries\n", 
+		   (int) hashtable_utf->size, (int) hashtable_utf->entries );
 
-	if (utf_hash.entries == 0)
+	if (hashtable_utf->entries == 0)
 		return;
 
 	printf("chains:\n  chainlength    number of chains    %% of utfstrings\n");
@@ -1037,9 +1888,9 @@ void utf_show(void)
 		chain_count[i]=0;
 
 	/* count numbers of hashchains according to their length */
-	for (i=0; i<utf_hash.size; i++) {
+	for (i=0; i<hashtable_utf->size; i++) {
 		  
-		utf *u = (utf*) utf_hash.ptr[i];
+		utf *u = (utf*) hashtable_utf->ptr[i];
 		u4 chain_length = 0;
 
 		/* determine chainlength */
@@ -1067,16 +1918,17 @@ void utf_show(void)
 
 	/* display results */  
 	for (i=1;i<CHAIN_LIMIT-1;i++) 
-		printf("       %2d %17d %18.2f%%\n",i,chain_count[i],(((float) chain_count[i]*i*100)/utf_hash.entries));
+		printf("       %2d %17d %18.2f%%\n",i,chain_count[i],(((float) chain_count[i]*i*100)/hashtable_utf->entries));
 	  
-	printf("     >=%2d %17d %18.2f%%\n",CHAIN_LIMIT-1,chain_count[CHAIN_LIMIT-1],((float) beyond_limit*100)/utf_hash.entries);
+	printf("     >=%2d %17d %18.2f%%\n",CHAIN_LIMIT-1,chain_count[CHAIN_LIMIT-1],((float) beyond_limit*100)/hashtable_utf->entries);
 
 
 	printf("max. chainlength:%5d\n",max_chainlength);
 
 	/* avg. chainlength = sum of chainlengths / number of chains */
-	printf("avg. chainlength:%5.2f\n",(float) sum_chainlength / (utf_hash.size-chain_count[0]));
+	printf("avg. chainlength:%5.2f\n",(float) sum_chainlength / (hashtable_utf->size-chain_count[0]));
 }
+#endif /* !defined(NDEBUG) */
 
 
 /*
@@ -1090,4 +1942,5 @@ void utf_show(void)
  * c-basic-offset: 4
  * tab-width: 4
  * End:
+ * vim:noexpandtab:sw=4:ts=4:
  */