-/* src/vm/utf.c - utf functions
+/* src/vm/utf8.c - utf8 string functions
- Copyright (C) 1996-2005 R. Grafl, A. Krall, C. Kruegel, C. Oates,
- R. Obermaisser, M. Platter, M. Probst, S. Ring, E. Steiner,
- C. Thalinger, D. Thuernbeck, P. Tomsich, C. Ullrich, J. Wenninger,
- Institut f. Computersprachen - TU Wien
+ Copyright (C) 1996-2005, 2006, 2007, 2008
+ CACAOVM - Verein zur Foerderung der freien virtuellen Maschine CACAO
This file is part of CACAO.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- 02111-1307, USA.
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
- Contact: cacao@complang.tuwien.ac.at
+*/
- Authors: Reinhard Grafl
- Changes: Mark Probst
- Andreas Krall
- Christian Thalinger
+#include "config.h"
- $Id: utf8.c 2061 2005-03-23 11:10:33Z twisti $
+#include <string.h>
+#include <assert.h>
-*/
+#include "vm/types.h"
+#include "mm/memory.hpp"
-#include <string.h>
+#include "threads/mutex.hpp"
+
+#include "toolbox/hashtable.h"
-#include "mm/memory.h"
-#include "vm/exceptions.h"
+#include "vm/exceptions.hpp"
#include "vm/options.h"
-#include "vm/statistics.h"
-#include "vm/tables.h"
+
+#if defined(ENABLE_STATISTICS)
+# include "vm/statistics.h"
+#endif
+
#include "vm/utf8.h"
-hashtable utf_hash; /* hashtable for utf8-symbols */
+/* global variables ***********************************************************/
+
+/* hashsize must be power of 2 */
+
+#define HASHTABLE_UTF_SIZE 16384 /* initial size of utf-hash */
+
+hashtable *hashtable_utf; /* hashtable for utf8-symbols */
/* utf-symbols for pointer comparison of frequently used strings **************/
-utf *utf_java_lang_Object; /* java/lang/Object */
+utf *utf_java_lang_Object;
utf *utf_java_lang_Class;
utf *utf_java_lang_ClassLoader;
utf *utf_java_lang_Cloneable;
utf *utf_java_lang_SecurityManager;
utf *utf_java_lang_String;
-utf *utf_java_lang_System;
+utf *utf_java_lang_ThreadGroup;
+utf *utf_java_lang_ref_SoftReference;
+utf *utf_java_lang_ref_WeakReference;
+utf *utf_java_lang_ref_PhantomReference;
utf *utf_java_io_Serializable;
utf *utf_java_lang_Throwable;
-utf *utf_java_lang_VMThrowable;
-utf *utf_java_lang_Exception;
utf *utf_java_lang_Error;
+
+utf *utf_java_lang_AbstractMethodError;
+utf *utf_java_lang_ClassCircularityError;
+utf *utf_java_lang_ClassFormatError;
+utf *utf_java_lang_ExceptionInInitializerError;
+utf *utf_java_lang_IncompatibleClassChangeError;
+utf *utf_java_lang_InstantiationError;
+utf *utf_java_lang_InternalError;
+utf *utf_java_lang_LinkageError;
+utf *utf_java_lang_NoClassDefFoundError;
+utf *utf_java_lang_NoSuchFieldError;
+utf *utf_java_lang_NoSuchMethodError;
utf *utf_java_lang_OutOfMemoryError;
+utf *utf_java_lang_UnsatisfiedLinkError;
+utf *utf_java_lang_UnsupportedClassVersionError;
+utf *utf_java_lang_VerifyError;
+utf *utf_java_lang_VirtualMachineError;
+
+utf *utf_java_lang_Exception;
+utf *utf_java_lang_ArithmeticException;
+utf *utf_java_lang_ArrayIndexOutOfBoundsException;
+utf *utf_java_lang_ArrayStoreException;
+utf *utf_java_lang_ClassCastException;
+utf *utf_java_lang_ClassNotFoundException;
+utf *utf_java_lang_CloneNotSupportedException;
+utf *utf_java_lang_IllegalAccessException;
+utf *utf_java_lang_IllegalArgumentException;
+utf *utf_java_lang_IllegalMonitorStateException;
+utf *utf_java_lang_InstantiationException;
+utf *utf_java_lang_InterruptedException;
+utf *utf_java_lang_NegativeArraySizeException;
+utf *utf_java_lang_NullPointerException;
+utf *utf_java_lang_RuntimeException;
+utf *utf_java_lang_StringIndexOutOfBoundsException;
+
+utf *utf_java_lang_reflect_InvocationTargetException;
+
+utf *utf_java_security_PrivilegedActionException;
+
+#if defined(ENABLE_JAVASE)
utf* utf_java_lang_Void;
+#endif
+
utf* utf_java_lang_Boolean;
utf* utf_java_lang_Byte;
utf* utf_java_lang_Character;
utf* utf_java_lang_Float;
utf* utf_java_lang_Double;
+#if defined(ENABLE_JAVASE)
+utf *utf_java_lang_StackTraceElement;
+utf *utf_java_lang_reflect_Constructor;
+utf *utf_java_lang_reflect_Field;
+utf *utf_java_lang_reflect_Method;
+
+# if defined(WITH_JAVA_RUNTIME_LIBRARY_GNU_CLASSPATH)
+utf *utf_java_lang_reflect_VMConstructor;
+utf *utf_java_lang_reflect_VMField;
+utf *utf_java_lang_reflect_VMMethod;
+# endif
+
utf *utf_java_util_Vector;
+#endif
utf *utf_InnerClasses; /* InnerClasses */
utf *utf_ConstantValue; /* ConstantValue */
utf *utf_LineNumberTable; /* LineNumberTable */
utf *utf_SourceFile; /* SourceFile */
+#if defined(ENABLE_JAVASE)
+utf *utf_EnclosingMethod;
+utf *utf_Signature;
+utf *utf_StackMapTable;
+
+# if defined(ENABLE_JVMTI)
+utf *utf_LocalVariableTable;
+# endif
+
+# if defined(ENABLE_ANNOTATIONS)
+utf *utf_RuntimeVisibleAnnotations; /* RuntimeVisibleAnnotations */
+utf *utf_RuntimeInvisibleAnnotations; /* RuntimeInvisibleAnnotations */
+utf *utf_RuntimeVisibleParameterAnnotations; /* RuntimeVisibleParameterAnnotations */
+utf *utf_RuntimeInvisibleParameterAnnotations; /* RuntimeInvisibleParameterAnnotations */
+utf *utf_AnnotationDefault; /* AnnotationDefault */
+# endif
+#endif
+
utf *utf_init; /* <init> */
utf *utf_clinit; /* <clinit> */
+utf *utf_clone; /* clone */
utf *utf_finalize; /* finalize */
+utf *utf_invoke;
+utf *utf_main;
+utf *utf_run; /* run */
+
+utf *utf_add;
+utf *utf_dispatch;
+utf *utf_remove;
+utf *utf_addThread;
+utf *utf_removeThread;
+utf *utf_put;
+utf *utf_get;
+utf *utf_uncaughtException;
+utf *utf_value;
-utf *utf_printStackTrace;
utf *utf_fillInStackTrace;
+utf *utf_findNative;
+utf *utf_getSystemClassLoader;
+utf *utf_initCause;
utf *utf_loadClass;
+utf *utf_loadClassInternal;
+utf *utf_printStackTrace;
+
+utf *utf_division_by_zero;
+
+utf *utf_Z; /* Z */
+utf *utf_B; /* B */
+utf *utf_C; /* C */
+utf *utf_S; /* S */
+utf *utf_I; /* I */
+utf *utf_J; /* J */
+utf *utf_F; /* F */
+utf *utf_D; /* D */
utf *utf_void__void; /* ()V */
utf *utf_boolean__void; /* (Z)V */
utf *utf_long__void; /* (J)V */
utf *utf_float__void; /* (F)V */
utf *utf_double__void; /* (D)V */
+
+utf *utf_void__java_lang_ClassLoader; /* ()Ljava/lang/ClassLoader; */
+utf *utf_void__java_lang_Object; /* ()Ljava/lang/Object; */
utf *utf_void__java_lang_Throwable; /* ()Ljava/lang/Throwable; */
+utf *utf_java_lang_ClassLoader_java_lang_String__J;
+utf *utf_java_lang_Exception__V; /* (Ljava/lang/Exception;)V */
+utf *utf_java_lang_Object__java_lang_Object;
utf *utf_java_lang_String__void; /* (Ljava/lang/String;)V */
utf *utf_java_lang_String__java_lang_Class;
+utf *utf_java_lang_Thread__V; /* (Ljava/lang/Thread;)V */
+utf *utf_java_lang_Thread_java_lang_Throwable__V;
+utf *utf_Ljava_lang_ThreadGroup_Ljava_lang_String__V;
utf *utf_java_lang_Throwable__void; /* (Ljava/lang/Throwable;)V */
+utf *utf_java_lang_Throwable__java_lang_Throwable;
+utf *utf_not_named_yet; /* special name for unnamed classes */
+utf *utf_null;
utf *array_packagename;
void utf8_init(void)
{
+ TRACESUBSYSTEMINITIALIZATION("utf8_init");
+
+ /* create utf8 hashtable */
+
+ hashtable_utf = NEW(hashtable);
+
+ hashtable_create(hashtable_utf, HASHTABLE_UTF_SIZE);
+
+#if defined(ENABLE_STATISTICS)
+ if (opt_stat)
+ count_utf_len += sizeof(utf*) * hashtable_utf->size;
+#endif
+
/* create utf-symbols for pointer comparison of frequently used strings */
utf_java_lang_Object = utf_new_char("java/lang/Object");
utf_java_lang_Cloneable = utf_new_char("java/lang/Cloneable");
utf_java_lang_SecurityManager = utf_new_char("java/lang/SecurityManager");
utf_java_lang_String = utf_new_char("java/lang/String");
- utf_java_lang_System = utf_new_char("java/lang/System");
+ utf_java_lang_ThreadGroup = utf_new_char("java/lang/ThreadGroup");
+
+ utf_java_lang_ref_SoftReference =
+ utf_new_char("java/lang/ref/SoftReference");
+
+ utf_java_lang_ref_WeakReference =
+ utf_new_char("java/lang/ref/WeakReference");
+
+ utf_java_lang_ref_PhantomReference =
+ utf_new_char("java/lang/ref/PhantomReference");
+
utf_java_io_Serializable = utf_new_char("java/io/Serializable");
utf_java_lang_Throwable = utf_new_char("java/lang/Throwable");
- utf_java_lang_VMThrowable = utf_new_char("java/lang/VMThrowable");
- utf_java_lang_Exception = utf_new_char("java/lang/Exception");
utf_java_lang_Error = utf_new_char("java/lang/Error");
+
+ utf_java_lang_ClassCircularityError =
+ utf_new_char("java/lang/ClassCircularityError");
+
+ utf_java_lang_ClassFormatError = utf_new_char("java/lang/ClassFormatError");
+
+ utf_java_lang_ExceptionInInitializerError =
+ utf_new_char("java/lang/ExceptionInInitializerError");
+
+ utf_java_lang_IncompatibleClassChangeError =
+ utf_new_char("java/lang/IncompatibleClassChangeError");
+
+ utf_java_lang_InstantiationError =
+ utf_new_char("java/lang/InstantiationError");
+
+ utf_java_lang_InternalError = utf_new_char("java/lang/InternalError");
+ utf_java_lang_LinkageError = utf_new_char("java/lang/LinkageError");
+
+ utf_java_lang_NoClassDefFoundError =
+ utf_new_char("java/lang/NoClassDefFoundError");
+
utf_java_lang_OutOfMemoryError = utf_new_char("java/lang/OutOfMemoryError");
+ utf_java_lang_UnsatisfiedLinkError =
+ utf_new_char("java/lang/UnsatisfiedLinkError");
+
+ utf_java_lang_UnsupportedClassVersionError =
+ utf_new_char("java/lang/UnsupportedClassVersionError");
+
+ utf_java_lang_VerifyError = utf_new_char("java/lang/VerifyError");
+
+ utf_java_lang_VirtualMachineError =
+ utf_new_char("java/lang/VirtualMachineError");
+
+#if defined(ENABLE_JAVASE)
+ utf_java_lang_AbstractMethodError =
+ utf_new_char("java/lang/AbstractMethodError");
+
+ utf_java_lang_NoSuchFieldError =
+ utf_new_char("java/lang/NoSuchFieldError");
+
+ utf_java_lang_NoSuchMethodError =
+ utf_new_char("java/lang/NoSuchMethodError");
+#endif
+
+ utf_java_lang_Exception = utf_new_char("java/lang/Exception");
+
+ utf_java_lang_ArithmeticException =
+ utf_new_char("java/lang/ArithmeticException");
+
+ utf_java_lang_ArrayIndexOutOfBoundsException =
+ utf_new_char("java/lang/ArrayIndexOutOfBoundsException");
+
+ utf_java_lang_ArrayStoreException =
+ utf_new_char("java/lang/ArrayStoreException");
+
+ utf_java_lang_ClassCastException =
+ utf_new_char("java/lang/ClassCastException");
+
+ utf_java_lang_ClassNotFoundException =
+ utf_new_char("java/lang/ClassNotFoundException");
+
+ utf_java_lang_CloneNotSupportedException =
+ utf_new_char("java/lang/CloneNotSupportedException");
+
+ utf_java_lang_IllegalAccessException =
+ utf_new_char("java/lang/IllegalAccessException");
+
+ utf_java_lang_IllegalArgumentException =
+ utf_new_char("java/lang/IllegalArgumentException");
+
+ utf_java_lang_IllegalMonitorStateException =
+ utf_new_char("java/lang/IllegalMonitorStateException");
+
+ utf_java_lang_InstantiationException =
+ utf_new_char("java/lang/InstantiationException");
+
+ utf_java_lang_InterruptedException =
+ utf_new_char("java/lang/InterruptedException");
+
+ utf_java_lang_NegativeArraySizeException =
+ utf_new_char("java/lang/NegativeArraySizeException");
+
+ utf_java_lang_NullPointerException =
+ utf_new_char("java/lang/NullPointerException");
+
+ utf_java_lang_RuntimeException =
+ utf_new_char("java/lang/RuntimeException");
+
+ utf_java_lang_StringIndexOutOfBoundsException =
+ utf_new_char("java/lang/StringIndexOutOfBoundsException");
+
+ utf_java_lang_reflect_InvocationTargetException =
+ utf_new_char("java/lang/reflect/InvocationTargetException");
+
+ utf_java_security_PrivilegedActionException =
+ utf_new_char("java/security/PrivilegedActionException");
+
+#if defined(ENABLE_JAVASE)
utf_java_lang_Void = utf_new_char("java/lang/Void");
+#endif
+
utf_java_lang_Boolean = utf_new_char("java/lang/Boolean");
utf_java_lang_Byte = utf_new_char("java/lang/Byte");
utf_java_lang_Character = utf_new_char("java/lang/Character");
utf_java_lang_Float = utf_new_char("java/lang/Float");
utf_java_lang_Double = utf_new_char("java/lang/Double");
+#if defined(ENABLE_JAVASE)
+ utf_java_lang_StackTraceElement =
+ utf_new_char("java/lang/StackTraceElement");
+
+ utf_java_lang_reflect_Constructor =
+ utf_new_char("java/lang/reflect/Constructor");
+
+ utf_java_lang_reflect_Field = utf_new_char("java/lang/reflect/Field");
+ utf_java_lang_reflect_Method = utf_new_char("java/lang/reflect/Method");
+
+# if defined(WITH_JAVA_RUNTIME_LIBRARY_GNU_CLASSPATH)
+ utf_java_lang_reflect_VMConstructor = utf_new_char("java/lang/reflect/VMConstructor");
+ utf_java_lang_reflect_VMField = utf_new_char("java/lang/reflect/VMField");
+ utf_java_lang_reflect_VMMethod = utf_new_char("java/lang/reflect/VMMethod");
+# endif
+
utf_java_util_Vector = utf_new_char("java/util/Vector");
+#endif
utf_InnerClasses = utf_new_char("InnerClasses");
utf_ConstantValue = utf_new_char("ConstantValue");
utf_LineNumberTable = utf_new_char("LineNumberTable");
utf_SourceFile = utf_new_char("SourceFile");
+#if defined(ENABLE_JAVASE)
+ utf_EnclosingMethod = utf_new_char("EnclosingMethod");
+ utf_Signature = utf_new_char("Signature");
+ utf_StackMapTable = utf_new_char("StackMapTable");
+
+# if defined(ENABLE_JVMTI)
+ utf_LocalVariableTable = utf_new_char("LocalVariableTable");
+# endif
+
+# if defined(ENABLE_ANNOTATIONS)
+ utf_RuntimeVisibleAnnotations = utf_new_char("RuntimeVisibleAnnotations");
+ utf_RuntimeInvisibleAnnotations = utf_new_char("RuntimeInvisibleAnnotations");
+ utf_RuntimeVisibleParameterAnnotations = utf_new_char("RuntimeVisibleParameterAnnotations");
+ utf_RuntimeInvisibleParameterAnnotations = utf_new_char("RuntimeInvisibleParameterAnnotations");
+ utf_AnnotationDefault = utf_new_char("AnnotationDefault");
+# endif
+#endif
+
utf_init = utf_new_char("<init>");
utf_clinit = utf_new_char("<clinit>");
+ utf_clone = utf_new_char("clone");
utf_finalize = utf_new_char("finalize");
+ utf_invoke = utf_new_char("invoke");
+ utf_main = utf_new_char("main");
+ utf_run = utf_new_char("run");
+
+ utf_add = utf_new_char("add");
+ utf_dispatch = utf_new_char("dispatch");
+ utf_remove = utf_new_char("remove");
+ utf_addThread = utf_new_char("addThread");
+ utf_removeThread = utf_new_char("removeThread");
+ utf_put = utf_new_char("put");
+ utf_get = utf_new_char("get");
+ utf_uncaughtException = utf_new_char("uncaughtException");
+ utf_value = utf_new_char("value");
- utf_printStackTrace = utf_new_char("printStackTrace");
utf_fillInStackTrace = utf_new_char("fillInStackTrace");
+ utf_findNative = utf_new_char("findNative");
+ utf_getSystemClassLoader = utf_new_char("getSystemClassLoader");
+ utf_initCause = utf_new_char("initCause");
utf_loadClass = utf_new_char("loadClass");
+ utf_loadClassInternal = utf_new_char("loadClassInternal");
+ utf_printStackTrace = utf_new_char("printStackTrace");
+
+ utf_division_by_zero = utf_new_char("/ by zero");
+
+ utf_Z = utf_new_char("Z");
+ utf_B = utf_new_char("B");
+ utf_C = utf_new_char("C");
+ utf_S = utf_new_char("S");
+ utf_I = utf_new_char("I");
+ utf_J = utf_new_char("J");
+ utf_F = utf_new_char("F");
+ utf_D = utf_new_char("D");
utf_void__void = utf_new_char("()V");
utf_boolean__void = utf_new_char("(Z)V");
utf_long__void = utf_new_char("(J)V");
utf_float__void = utf_new_char("(F)V");
utf_double__void = utf_new_char("(D)V");
+ utf_void__java_lang_Object = utf_new_char("()Ljava/lang/Object;");
utf_void__java_lang_Throwable = utf_new_char("()Ljava/lang/Throwable;");
+
+ utf_void__java_lang_ClassLoader =
+ utf_new_char("()Ljava/lang/ClassLoader;");
+
+ utf_java_lang_ClassLoader_java_lang_String__J =
+ utf_new_char("(Ljava/lang/ClassLoader;Ljava/lang/String;)J");
+
+ utf_java_lang_Exception__V = utf_new_char("(Ljava/lang/Exception;)V");
+
+ utf_java_lang_Object__java_lang_Object =
+ utf_new_char("(Ljava/lang/Object;)Ljava/lang/Object;");
+
utf_java_lang_String__void = utf_new_char("(Ljava/lang/String;)V");
utf_java_lang_String__java_lang_Class =
utf_new_char("(Ljava/lang/String;)Ljava/lang/Class;");
+ utf_java_lang_Thread__V = utf_new_char("(Ljava/lang/Thread;)V");
+
+ utf_java_lang_Thread_java_lang_Throwable__V =
+ utf_new_char("(Ljava/lang/Thread;Ljava/lang/Throwable;)V");
+
+ utf_Ljava_lang_ThreadGroup_Ljava_lang_String__V =
+ utf_new_char("(Ljava/lang/ThreadGroup;Ljava/lang/String;)V");
+
utf_java_lang_Throwable__void = utf_new_char("(Ljava/lang/Throwable;)V");
- array_packagename = utf_new_char("<the array package>");
+
+ utf_java_lang_Throwable__java_lang_Throwable =
+ utf_new_char("(Ljava/lang/Throwable;)Ljava/lang/Throwable;");
+
+ utf_null = utf_new_char("null");
+ utf_not_named_yet = utf_new_char("\t<not_named_yet>");
+ array_packagename = utf_new_char("\t<the array package>");
}
}
}
+/* utf_full_hashkey ************************************************************
-/* utf_hashkey *****************************************************************
+ This function computes a hash value using all bytes in the string.
+
+ The algorithm is the "One-at-a-time" algorithm as published
+ by Bob Jenkins on http://burtleburtle.net/bob/hash/doobs.html.
+
+*******************************************************************************/
+
+u4 utf_full_hashkey(const char *text, u4 length)
+{
+ register const unsigned char *p = (const unsigned char *) text;
+ register u4 hash;
+ register u4 i;
+
+ hash = 0;
+ for (i=length; i--;)
+ {
+ hash += *p++;
+ hash += (hash << 10);
+ hash ^= (hash >> 6);
+ }
+ hash += (hash << 3);
+ hash ^= (hash >> 11);
+ hash += (hash << 15);
+
+ return hash;
+}
+
+/* unicode_hashkey *************************************************************
Compute the hashkey of a unicode string.
*******************************************************************************/
-utf *utf_new_intern(const char *text, u2 length);
-
utf *utf_new(const char *text, u2 length)
-{
- utf *r;
-
-#if defined(USE_THREADS) && defined(NATIVE_THREADS)
- tables_lock();
-#endif
-
- r = utf_new_intern(text, length);
-
-#if defined(USE_THREADS) && defined(NATIVE_THREADS)
- tables_unlock();
-#endif
-
- return r;
-}
-
-
-utf *utf_new_intern(const char *text, u2 length)
{
u4 key; /* hashkey computed from utf-text */
u4 slot; /* slot in hashtable */
utf *u; /* hashtable element */
u2 i;
-#ifdef STATISTICS
+ Mutex_lock(hashtable_utf->mutex);
+
+#if defined(ENABLE_STATISTICS)
if (opt_stat)
count_utf_new++;
#endif
key = utf_hashkey(text, length);
- slot = key & (utf_hash.size - 1);
- u = utf_hash.ptr[slot];
+ slot = key & (hashtable_utf->size - 1);
+ u = hashtable_utf->ptr[slot];
/* search external hash chain for utf-symbol */
+
while (u) {
if (u->blength == length) {
-
/* compare text of hashtable elements */
+
for (i = 0; i < length; i++)
- if (text[i] != u->text[i]) goto nomatch;
+ if (text[i] != u->text[i])
+ goto nomatch;
-#ifdef STATISTICS
+#if defined(ENABLE_STATISTICS)
if (opt_stat)
count_utf_new_found++;
#endif
/* symbol found in hashtable */
+
+ Mutex_unlock(hashtable_utf->mutex);
+
return u;
}
+
nomatch:
u = u->hashlink; /* next element in external chain */
}
-#ifdef STATISTICS
- if (opt_stat)
- count_utf_len += sizeof(utf) + length;
-#endif
-
/* location in hashtable found, create new utf element */
+
u = NEW(utf);
+
u->blength = length; /* length in bytes of utfstring */
- u->hashlink = utf_hash.ptr[slot]; /* link in external hashchain */
+ u->hashlink = hashtable_utf->ptr[slot]; /* link in external hashchain */
u->text = mem_alloc(length + 1);/* allocate memory for utf-text */
+
memcpy(u->text, text, length); /* copy utf-text */
u->text[length] = '\0';
- utf_hash.ptr[slot] = u; /* insert symbol into table */
- utf_hash.entries++; /* update number of entries */
+#if defined(ENABLE_STATISTICS)
+ if (opt_stat)
+ count_utf_len += sizeof(utf) + length + 1;
+#endif
+
+ hashtable_utf->ptr[slot] = u; /* insert symbol into table */
+ hashtable_utf->entries++; /* update number of entries */
- if (utf_hash.entries > (utf_hash.size * 2)) {
+ if (hashtable_utf->entries > (hashtable_utf->size * 2)) {
- /* reorganization of hashtable, average length of
- the external chains is approx. 2 */
+ /* reorganization of hashtable, average length of the external
+ chains is approx. 2 */
- u4 i;
- utf *u;
- hashtable newhash; /* the new hashtable */
+ hashtable *newhash; /* the new hashtable */
+ u4 i;
+ utf *u;
+ utf *nextu;
+ u4 slot;
/* create new hashtable, double the size */
- init_hashtable(&newhash, utf_hash.size * 2);
- newhash.entries = utf_hash.entries;
-#ifdef STATISTICS
+ newhash = hashtable_resize(hashtable_utf, hashtable_utf->size * 2);
+
+#if defined(ENABLE_STATISTICS)
if (opt_stat)
- count_utf_len += sizeof(utf*) * utf_hash.size;
+ count_utf_len += sizeof(utf*) * hashtable_utf->size;
#endif
/* transfer elements to new hashtable */
- for (i = 0; i < utf_hash.size; i++) {
- u = (utf *) utf_hash.ptr[i];
+
+ for (i = 0; i < hashtable_utf->size; i++) {
+ u = hashtable_utf->ptr[i];
+
while (u) {
- utf *nextu = u->hashlink;
- u4 slot = utf_hashkey(u->text, u->blength) & (newhash.size - 1);
+ nextu = u->hashlink;
+ slot = utf_hashkey(u->text, u->blength) & (newhash->size - 1);
- u->hashlink = (utf *) newhash.ptr[slot];
- newhash.ptr[slot] = u;
+ u->hashlink = (utf *) newhash->ptr[slot];
+ newhash->ptr[slot] = u;
/* follow link in external hash chain */
+
u = nextu;
}
}
/* dispose old table */
- MFREE(utf_hash.ptr, void*, utf_hash.size);
- utf_hash = newhash;
+
+ hashtable_free(hashtable_utf);
+
+ hashtable_utf = newhash;
}
+ Mutex_unlock(hashtable_utf->mutex);
+
return u;
}
u4 left; /* unicode characters left */
u4 buflength; /* utf length in bytes of the u2 array */
utf *result; /* resulting utf-string */
- int i;
+ int i;
/* determine utf length in bytes and allocate memory */
Read the next unicode character from the utf string and increment
the utf-string pointer accordingly.
+ CAUTION: This function is unsafe for input that was not checked
+ by is_valid_utf!
+
*******************************************************************************/
u2 utf_nextu2(char **utf_ptr)
}
-/* utf_strlen ******************************************************************
+/* utf_bytes *******************************************************************
- Determine number of unicode characters in the utf string.
+ Determine number of bytes (aka. octets) in the utf string.
+
+ IN:
+ u............utf string
+
+ OUT:
+ The number of octets of this utf string.
+ There is _no_ terminating zero included in this count.
*******************************************************************************/
-u4 utf_strlen(utf *u)
+u4 utf_bytes(utf *u)
+{
+ return u->blength;
+}
+
+
+/* utf_get_number_of_u2s_for_buffer ********************************************
+
+ Determine number of UTF-16 u2s in the given UTF-8 buffer
+
+ CAUTION: This function is unsafe for input that was not checked
+ by is_valid_utf!
+
+ CAUTION: Use this function *only* when you want to convert an UTF-8 buffer
+ to an array of u2s (UTF-16) and want to know how many of them you will get.
+ All other uses of this function are probably wrong.
+
+ IN:
+ buffer........points to first char in buffer
+ blength.......number of _bytes_ in the buffer
+
+ OUT:
+ the number of u2s needed to hold this string in UTF-16 encoding.
+ There is _no_ terminating zero included in this count.
+
+ NOTE: Unlike utf_get_number_of_u2s, this function never throws an
+ exception.
+
+*******************************************************************************/
+
+u4 utf_get_number_of_u2s_for_buffer(const char *buffer, u4 blength)
+{
+ const char *endpos; /* points behind utf string */
+ const char *utf_ptr; /* current position in utf text */
+ u4 len = 0; /* number of unicode characters */
+
+ utf_ptr = buffer;
+ endpos = utf_ptr + blength;
+
+ while (utf_ptr < endpos) {
+ len++;
+ /* next unicode character */
+ utf_nextu2((char **)&utf_ptr);
+ }
+
+ assert(utf_ptr == endpos);
+
+ return len;
+}
+
+
+/* utf_get_number_of_u2s *******************************************************
+
+ Determine number of UTF-16 u2s in the utf string.
+
+ CAUTION: This function is unsafe for input that was not checked
+ by is_valid_utf!
+
+ CAUTION: Use this function *only* when you want to convert a utf string
+ to an array of u2s and want to know how many of them you will get.
+ All other uses of this function are probably wrong.
+
+ IN:
+ u............utf string
+
+ OUT:
+ the number of u2s needed to hold this string in UTF-16 encoding.
+ There is _no_ terminating zero included in this count.
+ XXX 0 if a NullPointerException has been thrown (see below)
+
+*******************************************************************************/
+
+u4 utf_get_number_of_u2s(utf *u)
{
char *endpos; /* points behind utf string */
char *utf_ptr; /* current position in utf text */
u4 len = 0; /* number of unicode characters */
- if (!u) {
- *exceptionptr = new_nullpointerexception();
+ /* XXX this is probably not checked by most callers! Review this after */
+ /* the invalid uses of this function have been eliminated */
+ if (u == NULL) {
+ exceptions_throw_nullpointerexception();
return 0;
}
- endpos = utf_end(u);
+ endpos = UTF_END(u);
utf_ptr = u->text;
while (utf_ptr < endpos) {
utf_nextu2(&utf_ptr);
}
- if (utf_ptr != endpos)
+ if (utf_ptr != endpos) {
/* string ended abruptly */
- throw_cacao_exception_exit(string_java_lang_InternalError,
- "Illegal utf8 string");
+ exceptions_throw_internalerror("Illegal utf8 string");
+ return 0;
+ }
+
+ return len;
+}
+
+
+/* utf8_safe_number_of_u2s *****************************************************
+
+ Determine number of UTF-16 u2s needed for decoding the given UTF-8 string.
+ (For invalid UTF-8 the U+fffd replacement character will be counted.)
+
+ This function is safe even for invalid UTF-8 strings.
+
+ IN:
+ text..........zero-terminated(!) UTF-8 string (may be invalid)
+ must NOT be NULL
+ nbytes........strlen(text). (This is needed to completely emulate
+ the RI).
+
+ OUT:
+ the number of u2s needed to hold this string in UTF-16 encoding.
+ There is _no_ terminating zero included in this count.
+
+*******************************************************************************/
+
+s4 utf8_safe_number_of_u2s(const char *text, s4 nbytes) {
+ register const unsigned char *t;
+ register s4 byte;
+ register s4 len;
+ register const unsigned char *tlimit;
+ s4 byte1;
+ s4 byte2;
+ s4 byte3;
+ s4 value;
+ s4 skip;
+
+ assert(text);
+ assert(nbytes >= 0);
+
+ len = 0;
+ t = (const unsigned char *) text;
+ tlimit = t + nbytes;
+
+ /* CAUTION: Keep this code in sync with utf8_safe_convert_to_u2s! */
+
+ while (1) {
+ byte = *t++;
+
+ if (byte & 0x80) {
+ /* highest bit set, non-ASCII character */
+
+ if ((byte & 0xe0) == 0xc0) {
+ /* 2-byte: should be 110..... 10...... ? */
+
+ if ((*t++ & 0xc0) == 0x80)
+ ; /* valid 2-byte */
+ else
+ t--; /* invalid */
+ }
+ else if ((byte & 0xf0) == 0xe0) {
+ /* 3-byte: should be 1110.... 10...... 10...... */
+ /* ^t */
+
+ if (t + 2 > tlimit)
+ return len + 1; /* invalid, stop here */
+
+ if ((*t++ & 0xc0) == 0x80) {
+ if ((*t++ & 0xc0) == 0x80)
+ ; /* valid 3-byte */
+ else
+ t--; /* invalid */
+ }
+ else
+ t--; /* invalid */
+ }
+ else if ((byte & 0xf8) == 0xf0) {
+ /* 4-byte: should be 11110... 10...... 10...... 10...... */
+ /* ^t */
+
+ if (t + 3 > tlimit)
+ return len + 1; /* invalid, stop here */
+
+ if (((byte1 = *t++) & 0xc0) == 0x80) {
+ if (((byte2 = *t++) & 0xc0) == 0x80) {
+ if (((byte3 = *t++) & 0xc0) == 0x80) {
+ /* valid 4-byte UTF-8? */
+ value = ((byte & 0x07) << 18)
+ | ((byte1 & 0x3f) << 12)
+ | ((byte2 & 0x3f) << 6)
+ | ((byte3 & 0x3f) );
+
+ if (value > 0x10FFFF)
+ ; /* invalid */
+ else if (value > 0xFFFF)
+ len += 1; /* we need surrogates */
+ else
+ ; /* 16bit suffice */
+ }
+ else
+ t--; /* invalid */
+ }
+ else
+ t--; /* invalid */
+ }
+ else
+ t--; /* invalid */
+ }
+ else if ((byte & 0xfc) == 0xf8) {
+ /* invalid 5-byte */
+ if (t + 4 > tlimit)
+ return len + 1; /* invalid, stop here */
+
+ skip = 4;
+ for (; skip && ((*t & 0xc0) == 0x80); --skip)
+ t++;
+ }
+ else if ((byte & 0xfe) == 0xfc) {
+ /* invalid 6-byte */
+ if (t + 5 > tlimit)
+ return len + 1; /* invalid, stop here */
+
+ skip = 5;
+ for (; skip && ((*t & 0xc0) == 0x80); --skip)
+ t++;
+ }
+ else
+ ; /* invalid */
+ }
+ else {
+ /* NUL */
+
+ if (byte == 0)
+ break;
+
+ /* ASCII character, common case */
+ }
+
+ len++;
+ }
return len;
}
+/* utf8_safe_convert_to_u2s ****************************************************
+
+ Convert the given UTF-8 string to UTF-16 into a pre-allocated buffer.
+ (Invalid UTF-8 will be replaced with the U+fffd replacement character.)
+ Use utf8_safe_number_of_u2s to determine the number of u2s to allocate.
+
+ This function is safe even for invalid UTF-8 strings.
+
+ IN:
+ text..........zero-terminated(!) UTF-8 string (may be invalid)
+ must NOT be NULL
+ nbytes........strlen(text). (This is needed to completely emulate
+ the RI).
+ buffer........a preallocated array of u2s to receive the decoded
+ string. Use utf8_safe_number_of_u2s to get the
+ required number of u2s for allocating this.
+
+*******************************************************************************/
+
+#define UNICODE_REPLACEMENT 0xfffd
+
+void utf8_safe_convert_to_u2s(const char *text, s4 nbytes, u2 *buffer) {
+ register const unsigned char *t;
+ register s4 byte;
+ register const unsigned char *tlimit;
+ s4 byte1;
+ s4 byte2;
+ s4 byte3;
+ s4 value;
+ s4 skip;
+
+ assert(text);
+ assert(nbytes >= 0);
+
+ t = (const unsigned char *) text;
+ tlimit = t + nbytes;
+
+ /* CAUTION: Keep this code in sync with utf8_safe_number_of_u2s! */
+
+ while (1) {
+ byte = *t++;
+
+ if (byte & 0x80) {
+ /* highest bit set, non-ASCII character */
+
+ if ((byte & 0xe0) == 0xc0) {
+ /* 2-byte: should be 110..... 10...... */
+
+ if (((byte1 = *t++) & 0xc0) == 0x80) {
+ /* valid 2-byte UTF-8 */
+ *buffer++ = ((byte & 0x1f) << 6)
+ | ((byte1 & 0x3f) );
+ }
+ else {
+ *buffer++ = UNICODE_REPLACEMENT;
+ t--;
+ }
+ }
+ else if ((byte & 0xf0) == 0xe0) {
+ /* 3-byte: should be 1110.... 10...... 10...... */
+
+ if (t + 2 > tlimit) {
+ *buffer++ = UNICODE_REPLACEMENT;
+ return;
+ }
+
+ if (((byte1 = *t++) & 0xc0) == 0x80) {
+ if (((byte2 = *t++) & 0xc0) == 0x80) {
+ /* valid 3-byte UTF-8 */
+ *buffer++ = ((byte & 0x0f) << 12)
+ | ((byte1 & 0x3f) << 6)
+ | ((byte2 & 0x3f) );
+ }
+ else {
+ *buffer++ = UNICODE_REPLACEMENT;
+ t--;
+ }
+ }
+ else {
+ *buffer++ = UNICODE_REPLACEMENT;
+ t--;
+ }
+ }
+ else if ((byte & 0xf8) == 0xf0) {
+ /* 4-byte: should be 11110... 10...... 10...... 10...... */
+
+ if (t + 3 > tlimit) {
+ *buffer++ = UNICODE_REPLACEMENT;
+ return;
+ }
+
+ if (((byte1 = *t++) & 0xc0) == 0x80) {
+ if (((byte2 = *t++) & 0xc0) == 0x80) {
+ if (((byte3 = *t++) & 0xc0) == 0x80) {
+ /* valid 4-byte UTF-8? */
+ value = ((byte & 0x07) << 18)
+ | ((byte1 & 0x3f) << 12)
+ | ((byte2 & 0x3f) << 6)
+ | ((byte3 & 0x3f) );
+
+ if (value > 0x10FFFF) {
+ *buffer++ = UNICODE_REPLACEMENT;
+ }
+ else if (value > 0xFFFF) {
+ /* we need surrogates */
+ *buffer++ = 0xd800 | ((value >> 10) - 0x40);
+ *buffer++ = 0xdc00 | (value & 0x03ff);
+ }
+ else
+ *buffer++ = value; /* 16bit suffice */
+ }
+ else {
+ *buffer++ = UNICODE_REPLACEMENT;
+ t--;
+ }
+ }
+ else {
+ *buffer++ = UNICODE_REPLACEMENT;
+ t--;
+ }
+ }
+ else {
+ *buffer++ = UNICODE_REPLACEMENT;
+ t--;
+ }
+ }
+ else if ((byte & 0xfc) == 0xf8) {
+ if (t + 4 > tlimit) {
+ *buffer++ = UNICODE_REPLACEMENT;
+ return;
+ }
+
+ skip = 4;
+ for (; skip && ((*t & 0xc0) == 0x80); --skip)
+ t++;
+ *buffer++ = UNICODE_REPLACEMENT;
+ }
+ else if ((byte & 0xfe) == 0xfc) {
+ if (t + 5 > tlimit) {
+ *buffer++ = UNICODE_REPLACEMENT;
+ return;
+ }
+
+ skip = 5;
+ for (; skip && ((*t & 0xc0) == 0x80); --skip)
+ t++;
+ *buffer++ = UNICODE_REPLACEMENT;
+ }
+ else
+ *buffer++ = UNICODE_REPLACEMENT;
+ }
+ else {
+ /* NUL */
+
+ if (byte == 0)
+ break;
+
+ /* ASCII character, common case */
+
+ *buffer++ = byte;
+ }
+ }
+}
+
+
/* u2_utflength ****************************************************************
Returns the utf length in bytes of a u2 array.
}
-/* utf_display *****************************************************************
+/* utf_copy ********************************************************************
+
+ Copy the given utf string byte-for-byte to a buffer.
+
+ IN:
+ buffer.......the buffer
+ u............the utf string
+
+*******************************************************************************/
+
+void utf_copy(char *buffer, utf *u)
+{
+ /* our utf strings are zero-terminated (done by utf_new) */
+ MCOPY(buffer, u->text, char, u->blength + 1);
+}
+
+
+/* utf_cat *********************************************************************
+
+ Append the given utf string byte-for-byte to a buffer.
+
+ IN:
+ buffer.......the buffer
+ u............the utf string
+
+*******************************************************************************/
+
+void utf_cat(char *buffer, utf *u)
+{
+ /* our utf strings are zero-terminated (done by utf_new) */
+ MCOPY(buffer + strlen(buffer), u->text, char, u->blength + 1);
+}
+
+
+/* utf_copy_classname **********************************************************
+
+ Copy the given utf classname byte-for-byte to a buffer.
+ '/' is replaced by '.'
+
+ IN:
+ buffer.......the buffer
+ u............the utf string
+
+*******************************************************************************/
+
+void utf_copy_classname(char *buffer, utf *u)
+{
+ char *bufptr;
+ char *srcptr;
+ char *endptr;
+ char ch;
+
+ bufptr = buffer;
+ srcptr = u->text;
+ endptr = UTF_END(u) + 1; /* utfs are zero-terminared by utf_new */
+
+ while (srcptr != endptr) {
+ ch = *srcptr++;
+ if (ch == '/')
+ ch = '.';
+ *bufptr++ = ch;
+ }
+}
+
+
+/* utf_cat *********************************************************************
+
+ Append the given utf classname byte-for-byte to a buffer.
+ '/' is replaced by '.'
+
+ IN:
+ buffer.......the buffer
+ u............the utf string
+
+*******************************************************************************/
+
+void utf_cat_classname(char *buffer, utf *u)
+{
+ utf_copy_classname(buffer + strlen(buffer), u);
+}
+
+/* utf_display_printable_ascii *************************************************
Write utf symbol to stdout (for debugging purposes).
+ Non-printable and non-ASCII characters are printed as '?'.
*******************************************************************************/
-void utf_display(utf *u)
+void utf_display_printable_ascii(utf *u)
{
char *endpos; /* points behind utf string */
char *utf_ptr; /* current position in utf text */
- if (!u) {
+ if (u == NULL) {
printf("NULL");
fflush(stdout);
return;
}
- endpos = utf_end(u);
+ endpos = UTF_END(u);
utf_ptr = u->text;
while (utf_ptr < endpos) {
- /* read next unicode character */
+ /* read next unicode character */
+
u2 c = utf_nextu2(&utf_ptr);
- if (c >= 32 && c <= 127) printf("%c", c);
- else printf("?");
+
+ if ((c >= 32) && (c <= 127))
+ printf("%c", c);
+ else
+ printf("?");
}
fflush(stdout);
}
-/* utf_display_classname *******************************************************
+/* utf_display_printable_ascii_classname ***************************************
Write utf symbol to stdout with `/' converted to `.' (for debugging
purposes).
+ Non-printable and non-ASCII characters are printed as '?'.
*******************************************************************************/
-void utf_display_classname(utf *u)
+void utf_display_printable_ascii_classname(utf *u)
{
char *endpos; /* points behind utf string */
char *utf_ptr; /* current position in utf text */
- if (!u) {
+ if (u == NULL) {
printf("NULL");
fflush(stdout);
return;
}
- endpos = utf_end(u);
+ endpos = UTF_END(u);
utf_ptr = u->text;
while (utf_ptr < endpos) {
- /* read next unicode character */
+ /* read next unicode character */
+
u2 c = utf_nextu2(&utf_ptr);
- if (c == '/') c = '.';
- if (c >= 32 && c <= 127) printf("%c", c);
- else printf("?");
+
+ if (c == '/')
+ c = '.';
+
+ if ((c >= 32) && (c <= 127))
+ printf("%c", c);
+ else
+ printf("?");
}
fflush(stdout);
}
-/* utf_sprint ******************************************************************
+/* utf_sprint_convert_to_latin1 ************************************************
Write utf symbol into c-string (for debugging purposes).
+ Characters are converted to 8-bit Latin-1, non-Latin-1 characters yield
+ invalid results.
*******************************************************************************/
-void utf_sprint(char *buffer, utf *u)
+void utf_sprint_convert_to_latin1(char *buffer, utf *u)
{
char *endpos; /* points behind utf string */
char *utf_ptr; /* current position in utf text */
u2 pos = 0; /* position in c-string */
if (!u) {
- memcpy(buffer, "NULL", 5); /* 4 chars + terminating \0 */
+ strcpy(buffer, "NULL");
return;
}
- endpos = utf_end(u);
+ endpos = UTF_END(u);
utf_ptr = u->text;
while (utf_ptr < endpos)
}
-/* utf_sprint_classname ********************************************************
+/* utf_sprint_convert_to_latin1_classname **************************************
Write utf symbol into c-string with `/' converted to `.' (for debugging
purposes).
+ Characters are converted to 8-bit Latin-1, non-Latin-1 characters yield
+ invalid results.
*******************************************************************************/
-void utf_sprint_classname(char *buffer, utf *u)
+void utf_sprint_convert_to_latin1_classname(char *buffer, utf *u)
{
char *endpos; /* points behind utf string */
char *utf_ptr; /* current position in utf text */
u2 pos = 0; /* position in c-string */
if (!u) {
- memcpy(buffer, "NULL", 5); /* 4 chars + terminating \0 */
+ strcpy(buffer, "NULL");
return;
}
- endpos = utf_end(u);
+ endpos = UTF_END(u);
utf_ptr = u->text;
while (utf_ptr < endpos) {
}
-/* utf_fprint ******************************************************************
+/* utf_strcat_convert_to_latin1 ************************************************
+
+ Like libc strcat, but uses an utf8 string.
+ Characters are converted to 8-bit Latin-1, non-Latin-1 characters yield
+ invalid results.
+
+*******************************************************************************/
+
+void utf_strcat_convert_to_latin1(char *buffer, utf *u)
+{
+ utf_sprint_convert_to_latin1(buffer + strlen(buffer), u);
+}
+
+
+/* utf_strcat_convert_to_latin1_classname **************************************
+
+ Like libc strcat, but uses an utf8 string.
+ Characters are converted to 8-bit Latin-1, non-Latin-1 characters yield
+ invalid results.
+
+*******************************************************************************/
+
+void utf_strcat_convert_to_latin1_classname(char *buffer, utf *u)
+{
+ utf_sprint_convert_to_latin1_classname(buffer + strlen(buffer), u);
+}
+
+
+/* utf_fprint_printable_ascii **************************************************
Write utf symbol into file.
+ Non-printable and non-ASCII characters are printed as '?'.
*******************************************************************************/
-void utf_fprint(FILE *file, utf *u)
+void utf_fprint_printable_ascii(FILE *file, utf *u)
{
char *endpos; /* points behind utf string */
char *utf_ptr; /* current position in utf text */
if (!u)
return;
- endpos = utf_end(u);
+ endpos = UTF_END(u);
utf_ptr = u->text;
while (utf_ptr < endpos) {
}
-/* utf_fprint_classname ********************************************************
+/* utf_fprint_printable_ascii_classname ****************************************
Write utf symbol into file with `/' converted to `.'.
+ Non-printable and non-ASCII characters are printed as '?'.
*******************************************************************************/
-void utf_fprint_classname(FILE *file, utf *u)
+void utf_fprint_printable_ascii_classname(FILE *file, utf *u)
{
char *endpos; /* points behind utf string */
char *utf_ptr; /* current position in utf text */
if (!u)
return;
- endpos = utf_end(u);
+ endpos = UTF_END(u);
utf_ptr = u->text;
while (utf_ptr < endpos) {
*******************************************************************************/
-static unsigned long min_codepoint[6] = {0,1L<<7,1L<<11,1L<<16,1L<<21,1L<<26};
+/* static unsigned long min_codepoint[6] = {0,1L<<7,1L<<11,1L<<16,1L<<21,1L<<26}; */
bool is_valid_utf(char *utf_ptr, char *end_pos)
{
} else {
/* Sun Java seems to allow overlong UTF-8 encodings */
- if (v < min_codepoint[len]) { /* overlong UTF-8 */
- if (!opt_liberalutf)
- fprintf(stderr,"WARNING: Overlong UTF-8 sequence found.\n");
- /* XXX change this to panic? */
- }
+ /* if (v < min_codepoint[len]) */
+ /* XXX throw exception? */
}
/* surrogates in UTF-8 seem to be allowed in Java classfiles */
bool is_valid_name_utf(utf *u)
{
- return is_valid_name(u->text,utf_end(u));
+ return is_valid_name(u->text, UTF_END(u));
}
*******************************************************************************/
+#if !defined(NDEBUG)
void utf_show(void)
{
u4 beyond_limit = 0; /* number of utf-symbols in chains with length>=CHAIN_LIMIT-1 */
u4 i;
- printf ("UTF-HASH:\n");
+ printf("UTF-HASH:\n");
/* show element of utf-hashtable */
- for (i=0; i<utf_hash.size; i++) {
- utf *u = utf_hash.ptr[i];
+
+ for (i = 0; i < hashtable_utf->size; i++) {
+ utf *u = hashtable_utf->ptr[i];
+
if (u) {
- printf ("SLOT %d: ", (int) i);
+ printf("SLOT %d: ", (int) i);
+
while (u) {
- printf ("'");
- utf_display (u);
- printf ("' ");
+ printf("'");
+ utf_display_printable_ascii(u);
+ printf("' ");
u = u->hashlink;
}
- printf ("\n");
+ printf("\n");
}
-
}
- printf ("UTF-HASH: %d slots for %d entries\n",
- (int) utf_hash.size, (int) utf_hash.entries );
-
+ printf("UTF-HASH: %d slots for %d entries\n",
+ (int) hashtable_utf->size, (int) hashtable_utf->entries );
- if (utf_hash.entries == 0)
+ if (hashtable_utf->entries == 0)
return;
printf("chains:\n chainlength number of chains %% of utfstrings\n");
chain_count[i]=0;
/* count numbers of hashchains according to their length */
- for (i=0; i<utf_hash.size; i++) {
+ for (i=0; i<hashtable_utf->size; i++) {
- utf *u = (utf*) utf_hash.ptr[i];
+ utf *u = (utf*) hashtable_utf->ptr[i];
u4 chain_length = 0;
/* determine chainlength */
/* display results */
for (i=1;i<CHAIN_LIMIT-1;i++)
- printf(" %2d %17d %18.2f%%\n",i,chain_count[i],(((float) chain_count[i]*i*100)/utf_hash.entries));
+ printf(" %2d %17d %18.2f%%\n",i,chain_count[i],(((float) chain_count[i]*i*100)/hashtable_utf->entries));
- printf(" >=%2d %17d %18.2f%%\n",CHAIN_LIMIT-1,chain_count[CHAIN_LIMIT-1],((float) beyond_limit*100)/utf_hash.entries);
+ printf(" >=%2d %17d %18.2f%%\n",CHAIN_LIMIT-1,chain_count[CHAIN_LIMIT-1],((float) beyond_limit*100)/hashtable_utf->entries);
printf("max. chainlength:%5d\n",max_chainlength);
/* avg. chainlength = sum of chainlengths / number of chains */
- printf("avg. chainlength:%5.2f\n",(float) sum_chainlength / (utf_hash.size-chain_count[0]));
+ printf("avg. chainlength:%5.2f\n",(float) sum_chainlength / (hashtable_utf->size-chain_count[0]));
}
+#endif /* !defined(NDEBUG) */
/*
* c-basic-offset: 4
* tab-width: 4
* End:
+ * vim:noexpandtab:sw=4:ts=4:
*/