1 /* src/vmcore/utf8.c - utf8 string functions
3 Copyright (C) 1996-2005, 2006, 2007, 2008
4 CACAOVM - Verein zur Foerderung der freien virtuellen Maschine CACAO
6 This file is part of CACAO.
8 This program is free software; you can redistribute it and/or
9 modify it under the terms of the GNU General Public License as
10 published by the Free Software Foundation; either version 2, or (at
11 your option) any later version.
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
33 #include "mm/memory.h"
35 #include "threads/lock-common.h"
37 #include "toolbox/hashtable.h"
39 #include "vm/exceptions.h"
41 #include "vmcore/options.h"
43 #if defined(ENABLE_STATISTICS)
44 # include "vmcore/statistics.h"
47 #include "vmcore/utf8.h"
50 /* global variables ***********************************************************/
52 /* hashsize must be power of 2 */
54 #define HASHTABLE_UTF_SIZE 16384 /* initial size of utf-hash */
56 hashtable *hashtable_utf; /* hashtable for utf8-symbols */
59 /* utf-symbols for pointer comparison of frequently used strings **************/
61 utf *utf_java_lang_Object;
63 utf *utf_java_lang_Class;
64 utf *utf_java_lang_ClassLoader;
65 utf *utf_java_lang_Cloneable;
66 utf *utf_java_lang_SecurityManager;
67 utf *utf_java_lang_String;
68 utf *utf_java_lang_ThreadGroup;
69 utf *utf_java_lang_ref_SoftReference;
70 utf *utf_java_lang_ref_WeakReference;
71 utf *utf_java_lang_ref_PhantomReference;
72 utf *utf_java_io_Serializable;
74 utf *utf_java_lang_Throwable;
75 utf *utf_java_lang_Error;
77 utf *utf_java_lang_AbstractMethodError;
78 utf *utf_java_lang_ClassCircularityError;
79 utf *utf_java_lang_ClassFormatError;
80 utf *utf_java_lang_ExceptionInInitializerError;
81 utf *utf_java_lang_IncompatibleClassChangeError;
82 utf *utf_java_lang_InstantiationError;
83 utf *utf_java_lang_InternalError;
84 utf *utf_java_lang_LinkageError;
85 utf *utf_java_lang_NoClassDefFoundError;
86 utf *utf_java_lang_NoSuchFieldError;
87 utf *utf_java_lang_NoSuchMethodError;
88 utf *utf_java_lang_OutOfMemoryError;
89 utf *utf_java_lang_UnsatisfiedLinkError;
90 utf *utf_java_lang_UnsupportedClassVersionError;
91 utf *utf_java_lang_VerifyError;
92 utf *utf_java_lang_VirtualMachineError;
94 utf *utf_java_lang_Exception;
96 utf *utf_java_lang_ArithmeticException;
97 utf *utf_java_lang_ArrayIndexOutOfBoundsException;
98 utf *utf_java_lang_ArrayStoreException;
99 utf *utf_java_lang_ClassCastException;
100 utf *utf_java_lang_ClassNotFoundException;
101 utf *utf_java_lang_CloneNotSupportedException;
102 utf *utf_java_lang_IllegalAccessException;
103 utf *utf_java_lang_IllegalArgumentException;
104 utf *utf_java_lang_IllegalMonitorStateException;
105 utf *utf_java_lang_InstantiationException;
106 utf *utf_java_lang_InterruptedException;
107 utf *utf_java_lang_NegativeArraySizeException;
108 utf *utf_java_lang_NullPointerException;
109 utf *utf_java_lang_StringIndexOutOfBoundsException;
111 utf *utf_java_lang_reflect_InvocationTargetException;
113 utf *utf_java_security_PrivilegedActionException;
115 #if defined(ENABLE_JAVASE)
116 utf* utf_java_lang_Void;
119 utf* utf_java_lang_Boolean;
120 utf* utf_java_lang_Byte;
121 utf* utf_java_lang_Character;
122 utf* utf_java_lang_Short;
123 utf* utf_java_lang_Integer;
124 utf* utf_java_lang_Long;
125 utf* utf_java_lang_Float;
126 utf* utf_java_lang_Double;
128 #if defined(ENABLE_JAVASE)
129 utf *utf_java_lang_StackTraceElement;
130 utf *utf_java_lang_reflect_Constructor;
131 utf *utf_java_lang_reflect_Field;
132 utf *utf_java_lang_reflect_Method;
133 utf *utf_java_util_Vector;
136 utf *utf_InnerClasses; /* InnerClasses */
137 utf *utf_ConstantValue; /* ConstantValue */
138 utf *utf_Code; /* Code */
139 utf *utf_Exceptions; /* Exceptions */
140 utf *utf_LineNumberTable; /* LineNumberTable */
141 utf *utf_SourceFile; /* SourceFile */
143 #if defined(ENABLE_JAVASE)
144 utf *utf_EnclosingMethod;
146 utf *utf_StackMapTable;
148 #if defined(ENABLE_ANNOTATIONS)
149 utf *utf_RuntimeVisibleAnnotations; /* RuntimeVisibleAnnotations */
150 utf *utf_RuntimeInvisibleAnnotations; /* RuntimeInvisibleAnnotations */
151 utf *utf_RuntimeVisibleParameterAnnotations; /* RuntimeVisibleParameterAnnotations */
152 utf *utf_RuntimeInvisibleParameterAnnotations; /* RuntimeInvisibleParameterAnnotations */
153 utf *utf_AnnotationDefault; /* AnnotationDefault */
157 utf *utf_init; /* <init> */
158 utf *utf_clinit; /* <clinit> */
159 utf *utf_clone; /* clone */
160 utf *utf_finalize; /* finalize */
162 utf *utf_run; /* run */
167 utf *utf_removeThread;
170 utf *utf_uncaughtException;
173 utf *utf_fillInStackTrace;
175 utf *utf_getSystemClassLoader;
178 utf *utf_loadClassInternal;
179 utf *utf_printStackTrace;
181 utf *utf_division_by_zero;
192 utf *utf_void__void; /* ()V */
193 utf *utf_boolean__void; /* (Z)V */
194 utf *utf_byte__void; /* (B)V */
195 utf *utf_char__void; /* (C)V */
196 utf *utf_short__void; /* (S)V */
197 utf *utf_int__void; /* (I)V */
198 utf *utf_long__void; /* (J)V */
199 utf *utf_float__void; /* (F)V */
200 utf *utf_double__void; /* (D)V */
202 utf *utf_void__java_lang_ClassLoader; /* ()Ljava/lang/ClassLoader; */
203 utf *utf_void__java_lang_Object; /* ()Ljava/lang/Object; */
204 utf *utf_void__java_lang_Throwable; /* ()Ljava/lang/Throwable; */
205 utf *utf_java_lang_ClassLoader_java_lang_String__J;
206 utf *utf_java_lang_Exception__V; /* (Ljava/lang/Exception;)V */
207 utf *utf_java_lang_Object__java_lang_Object;
208 utf *utf_java_lang_String__void; /* (Ljava/lang/String;)V */
209 utf *utf_java_lang_String__java_lang_Class;
210 utf *utf_java_lang_Thread__V; /* (Ljava/lang/Thread;)V */
211 utf *utf_java_lang_Thread_java_lang_Throwable__V;
212 utf *utf_java_lang_Throwable__void; /* (Ljava/lang/Throwable;)V */
213 utf *utf_java_lang_Throwable__java_lang_Throwable;
215 utf *utf_not_named_yet; /* special name for unnamed classes */
217 utf *array_packagename;
220 /* utf_init ********************************************************************
222 Initializes the utf8 subsystem.
224 *******************************************************************************/
228 TRACESUBSYSTEMINITIALIZATION("utf8_init");
230 /* create utf8 hashtable */
232 hashtable_utf = NEW(hashtable);
234 hashtable_create(hashtable_utf, HASHTABLE_UTF_SIZE);
236 #if defined(ENABLE_STATISTICS)
238 count_utf_len += sizeof(utf*) * hashtable_utf->size;
241 /* create utf-symbols for pointer comparison of frequently used strings */
243 utf_java_lang_Object = utf_new_char("java/lang/Object");
245 utf_java_lang_Class = utf_new_char("java/lang/Class");
246 utf_java_lang_ClassLoader = utf_new_char("java/lang/ClassLoader");
247 utf_java_lang_Cloneable = utf_new_char("java/lang/Cloneable");
248 utf_java_lang_SecurityManager = utf_new_char("java/lang/SecurityManager");
249 utf_java_lang_String = utf_new_char("java/lang/String");
250 utf_java_lang_ThreadGroup = utf_new_char("java/lang/ThreadGroup");
252 utf_java_lang_ref_SoftReference =
253 utf_new_char("java/lang/ref/SoftReference");
255 utf_java_lang_ref_WeakReference =
256 utf_new_char("java/lang/ref/WeakReference");
258 utf_java_lang_ref_PhantomReference =
259 utf_new_char("java/lang/ref/PhantomReference");
261 utf_java_io_Serializable = utf_new_char("java/io/Serializable");
263 utf_java_lang_Throwable = utf_new_char("java/lang/Throwable");
264 utf_java_lang_Error = utf_new_char("java/lang/Error");
266 utf_java_lang_ClassCircularityError =
267 utf_new_char("java/lang/ClassCircularityError");
269 utf_java_lang_ClassFormatError = utf_new_char("java/lang/ClassFormatError");
271 utf_java_lang_ExceptionInInitializerError =
272 utf_new_char("java/lang/ExceptionInInitializerError");
274 utf_java_lang_IncompatibleClassChangeError =
275 utf_new_char("java/lang/IncompatibleClassChangeError");
277 utf_java_lang_InstantiationError =
278 utf_new_char("java/lang/InstantiationError");
280 utf_java_lang_InternalError = utf_new_char("java/lang/InternalError");
281 utf_java_lang_LinkageError = utf_new_char("java/lang/LinkageError");
283 utf_java_lang_NoClassDefFoundError =
284 utf_new_char("java/lang/NoClassDefFoundError");
286 utf_java_lang_OutOfMemoryError = utf_new_char("java/lang/OutOfMemoryError");
288 utf_java_lang_UnsatisfiedLinkError =
289 utf_new_char("java/lang/UnsatisfiedLinkError");
291 utf_java_lang_UnsupportedClassVersionError =
292 utf_new_char("java/lang/UnsupportedClassVersionError");
294 utf_java_lang_VerifyError = utf_new_char("java/lang/VerifyError");
296 utf_java_lang_VirtualMachineError =
297 utf_new_char("java/lang/VirtualMachineError");
299 #if defined(ENABLE_JAVASE)
300 utf_java_lang_AbstractMethodError =
301 utf_new_char("java/lang/AbstractMethodError");
303 utf_java_lang_NoSuchFieldError =
304 utf_new_char("java/lang/NoSuchFieldError");
306 utf_java_lang_NoSuchMethodError =
307 utf_new_char("java/lang/NoSuchMethodError");
310 utf_java_lang_Exception = utf_new_char("java/lang/Exception");
312 utf_java_lang_ArithmeticException =
313 utf_new_char("java/lang/ArithmeticException");
315 utf_java_lang_ArrayIndexOutOfBoundsException =
316 utf_new_char("java/lang/ArrayIndexOutOfBoundsException");
318 utf_java_lang_ArrayStoreException =
319 utf_new_char("java/lang/ArrayStoreException");
321 utf_java_lang_ClassCastException =
322 utf_new_char("java/lang/ClassCastException");
324 utf_java_lang_ClassNotFoundException =
325 utf_new_char("java/lang/ClassNotFoundException");
327 utf_java_lang_CloneNotSupportedException =
328 utf_new_char("java/lang/CloneNotSupportedException");
330 utf_java_lang_IllegalAccessException =
331 utf_new_char("java/lang/IllegalAccessException");
333 utf_java_lang_IllegalArgumentException =
334 utf_new_char("java/lang/IllegalArgumentException");
336 utf_java_lang_IllegalMonitorStateException =
337 utf_new_char("java/lang/IllegalMonitorStateException");
339 utf_java_lang_InstantiationException =
340 utf_new_char("java/lang/InstantiationException");
342 utf_java_lang_InterruptedException =
343 utf_new_char("java/lang/InterruptedException");
345 utf_java_lang_NegativeArraySizeException =
346 utf_new_char("java/lang/NegativeArraySizeException");
348 utf_java_lang_NullPointerException =
349 utf_new_char("java/lang/NullPointerException");
351 utf_java_lang_StringIndexOutOfBoundsException =
352 utf_new_char("java/lang/StringIndexOutOfBoundsException");
354 utf_java_lang_reflect_InvocationTargetException =
355 utf_new_char("java/lang/reflect/InvocationTargetException");
357 utf_java_security_PrivilegedActionException =
358 utf_new_char("java/security/PrivilegedActionException");
360 #if defined(ENABLE_JAVASE)
361 utf_java_lang_Void = utf_new_char("java/lang/Void");
364 utf_java_lang_Boolean = utf_new_char("java/lang/Boolean");
365 utf_java_lang_Byte = utf_new_char("java/lang/Byte");
366 utf_java_lang_Character = utf_new_char("java/lang/Character");
367 utf_java_lang_Short = utf_new_char("java/lang/Short");
368 utf_java_lang_Integer = utf_new_char("java/lang/Integer");
369 utf_java_lang_Long = utf_new_char("java/lang/Long");
370 utf_java_lang_Float = utf_new_char("java/lang/Float");
371 utf_java_lang_Double = utf_new_char("java/lang/Double");
373 #if defined(ENABLE_JAVASE)
374 utf_java_lang_StackTraceElement =
375 utf_new_char("java/lang/StackTraceElement");
377 utf_java_lang_reflect_Constructor =
378 utf_new_char("java/lang/reflect/Constructor");
380 utf_java_lang_reflect_Field = utf_new_char("java/lang/reflect/Field");
381 utf_java_lang_reflect_Method = utf_new_char("java/lang/reflect/Method");
382 utf_java_util_Vector = utf_new_char("java/util/Vector");
385 utf_InnerClasses = utf_new_char("InnerClasses");
386 utf_ConstantValue = utf_new_char("ConstantValue");
387 utf_Code = utf_new_char("Code");
388 utf_Exceptions = utf_new_char("Exceptions");
389 utf_LineNumberTable = utf_new_char("LineNumberTable");
390 utf_SourceFile = utf_new_char("SourceFile");
392 #if defined(ENABLE_JAVASE)
393 utf_EnclosingMethod = utf_new_char("EnclosingMethod");
394 utf_Signature = utf_new_char("Signature");
395 utf_StackMapTable = utf_new_char("StackMapTable");
397 #if defined(ENABLE_ANNOTATIONS)
398 utf_RuntimeVisibleAnnotations = utf_new_char("RuntimeVisibleAnnotations");
399 utf_RuntimeInvisibleAnnotations = utf_new_char("RuntimeInvisibleAnnotations");
400 utf_RuntimeVisibleParameterAnnotations = utf_new_char("RuntimeVisibleParameterAnnotations");
401 utf_RuntimeInvisibleParameterAnnotations = utf_new_char("RuntimeInvisibleParameterAnnotations");
402 utf_AnnotationDefault = utf_new_char("AnnotationDefault");
406 utf_init = utf_new_char("<init>");
407 utf_clinit = utf_new_char("<clinit>");
408 utf_clone = utf_new_char("clone");
409 utf_finalize = utf_new_char("finalize");
410 utf_main = utf_new_char("main");
411 utf_run = utf_new_char("run");
413 utf_add = utf_new_char("add");
414 utf_remove = utf_new_char("remove");
415 utf_addThread = utf_new_char("addThread");
416 utf_removeThread = utf_new_char("removeThread");
417 utf_put = utf_new_char("put");
418 utf_get = utf_new_char("get");
419 utf_uncaughtException = utf_new_char("uncaughtException");
420 utf_value = utf_new_char("value");
422 utf_fillInStackTrace = utf_new_char("fillInStackTrace");
423 utf_findNative = utf_new_char("findNative");
424 utf_getSystemClassLoader = utf_new_char("getSystemClassLoader");
425 utf_initCause = utf_new_char("initCause");
426 utf_loadClass = utf_new_char("loadClass");
427 utf_loadClassInternal = utf_new_char("loadClassInternal");
428 utf_printStackTrace = utf_new_char("printStackTrace");
430 utf_division_by_zero = utf_new_char("/ by zero");
432 utf_Z = utf_new_char("Z");
433 utf_B = utf_new_char("B");
434 utf_C = utf_new_char("C");
435 utf_S = utf_new_char("S");
436 utf_I = utf_new_char("I");
437 utf_J = utf_new_char("J");
438 utf_F = utf_new_char("F");
439 utf_D = utf_new_char("D");
441 utf_void__void = utf_new_char("()V");
442 utf_boolean__void = utf_new_char("(Z)V");
443 utf_byte__void = utf_new_char("(B)V");
444 utf_char__void = utf_new_char("(C)V");
445 utf_short__void = utf_new_char("(S)V");
446 utf_int__void = utf_new_char("(I)V");
447 utf_long__void = utf_new_char("(J)V");
448 utf_float__void = utf_new_char("(F)V");
449 utf_double__void = utf_new_char("(D)V");
450 utf_void__java_lang_Object = utf_new_char("()Ljava/lang/Object;");
451 utf_void__java_lang_Throwable = utf_new_char("()Ljava/lang/Throwable;");
453 utf_void__java_lang_ClassLoader =
454 utf_new_char("()Ljava/lang/ClassLoader;");
456 utf_java_lang_ClassLoader_java_lang_String__J =
457 utf_new_char("(Ljava/lang/ClassLoader;Ljava/lang/String;)J");
459 utf_java_lang_Exception__V = utf_new_char("(Ljava/lang/Exception;)V");
461 utf_java_lang_Object__java_lang_Object =
462 utf_new_char("(Ljava/lang/Object;)Ljava/lang/Object;");
464 utf_java_lang_String__void = utf_new_char("(Ljava/lang/String;)V");
466 utf_java_lang_String__java_lang_Class =
467 utf_new_char("(Ljava/lang/String;)Ljava/lang/Class;");
469 utf_java_lang_Thread__V = utf_new_char("(Ljava/lang/Thread;)V");
471 utf_java_lang_Thread_java_lang_Throwable__V =
472 utf_new_char("(Ljava/lang/Thread;Ljava/lang/Throwable;)V");
474 utf_java_lang_Throwable__void = utf_new_char("(Ljava/lang/Throwable;)V");
476 utf_java_lang_Throwable__java_lang_Throwable =
477 utf_new_char("(Ljava/lang/Throwable;)Ljava/lang/Throwable;");
479 utf_null = utf_new_char("null");
480 utf_not_named_yet = utf_new_char("\t<not_named_yet>");
481 array_packagename = utf_new_char("\t<the array package>");
485 /* utf_hashkey *****************************************************************
487 The hashkey is computed from the utf-text by using up to 8
488 characters. For utf-symbols longer than 15 characters 3 characters
489 are taken from the beginning and the end, 2 characters are taken
492 *******************************************************************************/
494 #define nbs(val) ((u4) *(++text) << val) /* get next byte, left shift by val */
495 #define fbs(val) ((u4) *( text) << val) /* get first byte, left shift by val */
497 u4 utf_hashkey(const char *text, u4 length)
499 const char *start_pos = text; /* pointer to utf text */
503 case 0: /* empty string */
506 case 1: return fbs(0);
507 case 2: return fbs(0) ^ nbs(3);
508 case 3: return fbs(0) ^ nbs(3) ^ nbs(5);
509 case 4: return fbs(0) ^ nbs(2) ^ nbs(4) ^ nbs(6);
510 case 5: return fbs(0) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(6);
511 case 6: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(5) ^ nbs(6);
512 case 7: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(5) ^ nbs(6);
513 case 8: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(5) ^ nbs(6) ^ nbs(7);
520 return a ^ nbs(4) ^ nbs(5) ^ nbs(6) ^ nbs(7) ^ nbs(8);
529 return a ^ nbs(6) ^ nbs(7) ^ nbs(8) ^ nbs(9);
538 return a ^ nbs(6) ^ nbs(7) ^ nbs(8) ^ nbs(9) ^ nbs(10);
550 return a ^ nbs(9) ^ nbs(10);
562 return a ^ nbs(9) ^ nbs(10);
573 return a ^ nbs(9) ^ nbs(10) ^ nbs(11);
584 return a ^ nbs(9) ^ nbs(10) ^ nbs(11);
586 default: /* 3 characters from beginning */
592 /* 2 characters from middle */
593 text = start_pos + (length / 2);
598 /* 3 characters from end */
599 text = start_pos + length - 4;
604 return a ^ nbs(10) ^ nbs(11);
608 /* utf_full_hashkey ************************************************************
610 This function computes a hash value using all bytes in the string.
612 The algorithm is the "One-at-a-time" algorithm as published
613 by Bob Jenkins on http://burtleburtle.net/bob/hash/doobs.html.
615 *******************************************************************************/
617 u4 utf_full_hashkey(const char *text, u4 length)
619 register const unsigned char *p = (const unsigned char *) text;
627 hash += (hash << 10);
631 hash ^= (hash >> 11);
632 hash += (hash << 15);
637 /* unicode_hashkey *************************************************************
639 Compute the hashkey of a unicode string.
641 *******************************************************************************/
643 u4 unicode_hashkey(u2 *text, u2 len)
645 return utf_hashkey((char *) text, len);
649 /* utf_new *********************************************************************
651 Creates a new utf-symbol, the text of the symbol is passed as a
652 u1-array. The function searches the utf-hashtable for a utf-symbol
653 with this text. On success the element returned, otherwise a new
654 hashtable element is created.
656 If the number of entries in the hashtable exceeds twice the size of
657 the hashtable slots a reorganization of the hashtable is done and
658 the utf symbols are copied to a new hashtable with doubled size.
660 *******************************************************************************/
662 utf *utf_new(const char *text, u2 length)
664 u4 key; /* hashkey computed from utf-text */
665 u4 slot; /* slot in hashtable */
666 utf *u; /* hashtable element */
669 LOCK_MONITOR_ENTER(hashtable_utf->header);
671 #if defined(ENABLE_STATISTICS)
676 key = utf_hashkey(text, length);
677 slot = key & (hashtable_utf->size - 1);
678 u = hashtable_utf->ptr[slot];
680 /* search external hash chain for utf-symbol */
683 if (u->blength == length) {
684 /* compare text of hashtable elements */
686 for (i = 0; i < length; i++)
687 if (text[i] != u->text[i])
690 #if defined(ENABLE_STATISTICS)
692 count_utf_new_found++;
695 /* symbol found in hashtable */
697 LOCK_MONITOR_EXIT(hashtable_utf->header);
703 u = u->hashlink; /* next element in external chain */
706 /* location in hashtable found, create new utf element */
710 u->blength = length; /* length in bytes of utfstring */
711 u->hashlink = hashtable_utf->ptr[slot]; /* link in external hashchain */
712 u->text = mem_alloc(length + 1);/* allocate memory for utf-text */
714 memcpy(u->text, text, length); /* copy utf-text */
715 u->text[length] = '\0';
717 #if defined(ENABLE_STATISTICS)
719 count_utf_len += sizeof(utf) + length + 1;
722 hashtable_utf->ptr[slot] = u; /* insert symbol into table */
723 hashtable_utf->entries++; /* update number of entries */
725 if (hashtable_utf->entries > (hashtable_utf->size * 2)) {
727 /* reorganization of hashtable, average length of the external
728 chains is approx. 2 */
730 hashtable *newhash; /* the new hashtable */
736 /* create new hashtable, double the size */
738 newhash = hashtable_resize(hashtable_utf, hashtable_utf->size * 2);
740 #if defined(ENABLE_STATISTICS)
742 count_utf_len += sizeof(utf*) * hashtable_utf->size;
745 /* transfer elements to new hashtable */
747 for (i = 0; i < hashtable_utf->size; i++) {
748 u = hashtable_utf->ptr[i];
752 slot = utf_hashkey(u->text, u->blength) & (newhash->size - 1);
754 u->hashlink = (utf *) newhash->ptr[slot];
755 newhash->ptr[slot] = u;
757 /* follow link in external hash chain */
763 /* dispose old table */
765 hashtable_free(hashtable_utf);
767 hashtable_utf = newhash;
770 LOCK_MONITOR_EXIT(hashtable_utf->header);
776 /* utf_new_u2 ******************************************************************
778 Make utf symbol from u2 array, if isclassname is true '.' is
781 *******************************************************************************/
783 utf *utf_new_u2(u2 *unicode_pos, u4 unicode_length, bool isclassname)
785 char *buffer; /* memory buffer for unicode characters */
786 char *pos; /* pointer to current position in buffer */
787 u4 left; /* unicode characters left */
788 u4 buflength; /* utf length in bytes of the u2 array */
789 utf *result; /* resulting utf-string */
792 /* determine utf length in bytes and allocate memory */
794 buflength = u2_utflength(unicode_pos, unicode_length);
795 buffer = MNEW(char, buflength);
800 for (i = 0; i++ < unicode_length; unicode_pos++) {
801 /* next unicode character */
804 if ((c != 0) && (c < 0x80)) {
807 if ((int) left < 0) break;
808 /* convert classname */
809 if (isclassname && c == '.')
814 } else if (c < 0x800) {
816 unsigned char high = c >> 6;
817 unsigned char low = c & 0x3F;
819 if ((int) left < 0) break;
820 *pos++ = high | 0xC0;
826 char mid = (c >> 6) & 0x3F;
829 if ((int) left < 0) break;
830 *pos++ = high | 0xE0;
836 /* insert utf-string into symbol-table */
837 result = utf_new(buffer,buflength);
839 MFREE(buffer, char, buflength);
845 /* utf_new_char ****************************************************************
847 Creates a new utf symbol, the text for this symbol is passed as a
848 c-string ( = char* ).
850 *******************************************************************************/
852 utf *utf_new_char(const char *text)
854 return utf_new(text, strlen(text));
858 /* utf_new_char_classname ******************************************************
860 Creates a new utf symbol, the text for this symbol is passed as a
861 c-string ( = char* ) "." characters are going to be replaced by
862 "/". Since the above function is used often, this is a separte
863 function, instead of an if.
865 *******************************************************************************/
867 utf *utf_new_char_classname(const char *text)
869 if (strchr(text, '.')) {
870 char *txt = strdup(text);
871 char *end = txt + strlen(txt);
875 for (c = txt; c < end; c++)
876 if (*c == '.') *c = '/';
878 tmpRes = utf_new(txt, strlen(txt));
884 return utf_new(text, strlen(text));
888 /* utf_nextu2 ******************************************************************
890 Read the next unicode character from the utf string and increment
891 the utf-string pointer accordingly.
893 CAUTION: This function is unsafe for input that was not checked
896 *******************************************************************************/
898 u2 utf_nextu2(char **utf_ptr)
900 /* uncompressed unicode character */
902 /* current position in utf text */
903 unsigned char *utf = (unsigned char *) (*utf_ptr);
904 /* bytes representing the unicode character */
905 unsigned char ch1, ch2, ch3;
906 /* number of bytes used to represent the unicode character */
909 switch ((ch1 = utf[0]) >> 4) {
910 default: /* 1 byte */
914 case 0xD: /* 2 bytes */
915 if (((ch2 = utf[1]) & 0xC0) == 0x80) {
916 unsigned char high = ch1 & 0x1F;
917 unsigned char low = ch2 & 0x3F;
918 unicode_char = (high << 6) + low;
923 case 0xE: /* 2 or 3 bytes */
924 if (((ch2 = utf[1]) & 0xC0) == 0x80) {
925 if (((ch3 = utf[2]) & 0xC0) == 0x80) {
926 unsigned char low = ch3 & 0x3f;
927 unsigned char mid = ch2 & 0x3f;
928 unsigned char high = ch1 & 0x0f;
929 unicode_char = (((high << 6) + mid) << 6) + low;
937 /* update position in utf-text */
938 *utf_ptr = (char *) (utf + len);
944 /* utf_bytes *******************************************************************
946 Determine number of bytes (aka. octets) in the utf string.
949 u............utf string
952 The number of octets of this utf string.
953 There is _no_ terminating zero included in this count.
955 *******************************************************************************/
963 /* utf_get_number_of_u2s_for_buffer ********************************************
965 Determine number of UTF-16 u2s in the given UTF-8 buffer
967 CAUTION: This function is unsafe for input that was not checked
970 CAUTION: Use this function *only* when you want to convert an UTF-8 buffer
971 to an array of u2s (UTF-16) and want to know how many of them you will get.
972 All other uses of this function are probably wrong.
975 buffer........points to first char in buffer
976 blength.......number of _bytes_ in the buffer
979 the number of u2s needed to hold this string in UTF-16 encoding.
980 There is _no_ terminating zero included in this count.
982 NOTE: Unlike utf_get_number_of_u2s, this function never throws an
985 *******************************************************************************/
987 u4 utf_get_number_of_u2s_for_buffer(const char *buffer, u4 blength)
989 const char *endpos; /* points behind utf string */
990 const char *utf_ptr; /* current position in utf text */
991 u4 len = 0; /* number of unicode characters */
994 endpos = utf_ptr + blength;
996 while (utf_ptr < endpos) {
998 /* next unicode character */
999 utf_nextu2((char **)&utf_ptr);
1002 assert(utf_ptr == endpos);
1008 /* utf_get_number_of_u2s *******************************************************
1010 Determine number of UTF-16 u2s in the utf string.
1012 CAUTION: This function is unsafe for input that was not checked
1015 CAUTION: Use this function *only* when you want to convert a utf string
1016 to an array of u2s and want to know how many of them you will get.
1017 All other uses of this function are probably wrong.
1020 u............utf string
1023 the number of u2s needed to hold this string in UTF-16 encoding.
1024 There is _no_ terminating zero included in this count.
1025 XXX 0 if a NullPointerException has been thrown (see below)
1027 *******************************************************************************/
1029 u4 utf_get_number_of_u2s(utf *u)
1031 char *endpos; /* points behind utf string */
1032 char *utf_ptr; /* current position in utf text */
1033 u4 len = 0; /* number of unicode characters */
1035 /* XXX this is probably not checked by most callers! Review this after */
1036 /* the invalid uses of this function have been eliminated */
1038 exceptions_throw_nullpointerexception();
1042 endpos = UTF_END(u);
1045 while (utf_ptr < endpos) {
1047 /* next unicode character */
1048 utf_nextu2(&utf_ptr);
1051 if (utf_ptr != endpos) {
1052 /* string ended abruptly */
1053 exceptions_throw_internalerror("Illegal utf8 string");
1061 /* utf8_safe_number_of_u2s *****************************************************
1063 Determine number of UTF-16 u2s needed for decoding the given UTF-8 string.
1064 (For invalid UTF-8 the U+fffd replacement character will be counted.)
1066 This function is safe even for invalid UTF-8 strings.
1069 text..........zero-terminated(!) UTF-8 string (may be invalid)
1071 nbytes........strlen(text). (This is needed to completely emulate
1075 the number of u2s needed to hold this string in UTF-16 encoding.
1076 There is _no_ terminating zero included in this count.
1078 *******************************************************************************/
1080 s4 utf8_safe_number_of_u2s(const char *text, s4 nbytes) {
1081 register const unsigned char *t;
1084 register const unsigned char *tlimit;
1092 assert(nbytes >= 0);
1095 t = (const unsigned char *) text;
1096 tlimit = t + nbytes;
1098 /* CAUTION: Keep this code in sync with utf8_safe_convert_to_u2s! */
1104 /* highest bit set, non-ASCII character */
1106 if ((byte & 0xe0) == 0xc0) {
1107 /* 2-byte: should be 110..... 10...... ? */
1109 if ((*t++ & 0xc0) == 0x80)
1110 ; /* valid 2-byte */
1114 else if ((byte & 0xf0) == 0xe0) {
1115 /* 3-byte: should be 1110.... 10...... 10...... */
1119 return len + 1; /* invalid, stop here */
1121 if ((*t++ & 0xc0) == 0x80) {
1122 if ((*t++ & 0xc0) == 0x80)
1123 ; /* valid 3-byte */
1130 else if ((byte & 0xf8) == 0xf0) {
1131 /* 4-byte: should be 11110... 10...... 10...... 10...... */
1135 return len + 1; /* invalid, stop here */
1137 if (((byte1 = *t++) & 0xc0) == 0x80) {
1138 if (((byte2 = *t++) & 0xc0) == 0x80) {
1139 if (((byte3 = *t++) & 0xc0) == 0x80) {
1140 /* valid 4-byte UTF-8? */
1141 value = ((byte & 0x07) << 18)
1142 | ((byte1 & 0x3f) << 12)
1143 | ((byte2 & 0x3f) << 6)
1144 | ((byte3 & 0x3f) );
1146 if (value > 0x10FFFF)
1148 else if (value > 0xFFFF)
1149 len += 1; /* we need surrogates */
1151 ; /* 16bit suffice */
1162 else if ((byte & 0xfc) == 0xf8) {
1163 /* invalid 5-byte */
1165 return len + 1; /* invalid, stop here */
1168 for (; skip && ((*t & 0xc0) == 0x80); --skip)
1171 else if ((byte & 0xfe) == 0xfc) {
1172 /* invalid 6-byte */
1174 return len + 1; /* invalid, stop here */
1177 for (; skip && ((*t & 0xc0) == 0x80); --skip)
1189 /* ASCII character, common case */
1199 /* utf8_safe_convert_to_u2s ****************************************************
1201 Convert the given UTF-8 string to UTF-16 into a pre-allocated buffer.
1202 (Invalid UTF-8 will be replaced with the U+fffd replacement character.)
1203 Use utf8_safe_number_of_u2s to determine the number of u2s to allocate.
1205 This function is safe even for invalid UTF-8 strings.
1208 text..........zero-terminated(!) UTF-8 string (may be invalid)
1210 nbytes........strlen(text). (This is needed to completely emulate
1212 buffer........a preallocated array of u2s to receive the decoded
1213 string. Use utf8_safe_number_of_u2s to get the
1214 required number of u2s for allocating this.
1216 *******************************************************************************/
1218 #define UNICODE_REPLACEMENT 0xfffd
1220 void utf8_safe_convert_to_u2s(const char *text, s4 nbytes, u2 *buffer) {
1221 register const unsigned char *t;
1223 register const unsigned char *tlimit;
1231 assert(nbytes >= 0);
1233 t = (const unsigned char *) text;
1234 tlimit = t + nbytes;
1236 /* CAUTION: Keep this code in sync with utf8_safe_number_of_u2s! */
1242 /* highest bit set, non-ASCII character */
1244 if ((byte & 0xe0) == 0xc0) {
1245 /* 2-byte: should be 110..... 10...... */
1247 if (((byte1 = *t++) & 0xc0) == 0x80) {
1248 /* valid 2-byte UTF-8 */
1249 *buffer++ = ((byte & 0x1f) << 6)
1250 | ((byte1 & 0x3f) );
1253 *buffer++ = UNICODE_REPLACEMENT;
1257 else if ((byte & 0xf0) == 0xe0) {
1258 /* 3-byte: should be 1110.... 10...... 10...... */
1260 if (t + 2 > tlimit) {
1261 *buffer++ = UNICODE_REPLACEMENT;
1265 if (((byte1 = *t++) & 0xc0) == 0x80) {
1266 if (((byte2 = *t++) & 0xc0) == 0x80) {
1267 /* valid 3-byte UTF-8 */
1268 *buffer++ = ((byte & 0x0f) << 12)
1269 | ((byte1 & 0x3f) << 6)
1270 | ((byte2 & 0x3f) );
1273 *buffer++ = UNICODE_REPLACEMENT;
1278 *buffer++ = UNICODE_REPLACEMENT;
1282 else if ((byte & 0xf8) == 0xf0) {
1283 /* 4-byte: should be 11110... 10...... 10...... 10...... */
1285 if (t + 3 > tlimit) {
1286 *buffer++ = UNICODE_REPLACEMENT;
1290 if (((byte1 = *t++) & 0xc0) == 0x80) {
1291 if (((byte2 = *t++) & 0xc0) == 0x80) {
1292 if (((byte3 = *t++) & 0xc0) == 0x80) {
1293 /* valid 4-byte UTF-8? */
1294 value = ((byte & 0x07) << 18)
1295 | ((byte1 & 0x3f) << 12)
1296 | ((byte2 & 0x3f) << 6)
1297 | ((byte3 & 0x3f) );
1299 if (value > 0x10FFFF) {
1300 *buffer++ = UNICODE_REPLACEMENT;
1302 else if (value > 0xFFFF) {
1303 /* we need surrogates */
1304 *buffer++ = 0xd800 | ((value >> 10) - 0x40);
1305 *buffer++ = 0xdc00 | (value & 0x03ff);
1308 *buffer++ = value; /* 16bit suffice */
1311 *buffer++ = UNICODE_REPLACEMENT;
1316 *buffer++ = UNICODE_REPLACEMENT;
1321 *buffer++ = UNICODE_REPLACEMENT;
1325 else if ((byte & 0xfc) == 0xf8) {
1326 if (t + 4 > tlimit) {
1327 *buffer++ = UNICODE_REPLACEMENT;
1332 for (; skip && ((*t & 0xc0) == 0x80); --skip)
1334 *buffer++ = UNICODE_REPLACEMENT;
1336 else if ((byte & 0xfe) == 0xfc) {
1337 if (t + 5 > tlimit) {
1338 *buffer++ = UNICODE_REPLACEMENT;
1343 for (; skip && ((*t & 0xc0) == 0x80); --skip)
1345 *buffer++ = UNICODE_REPLACEMENT;
1348 *buffer++ = UNICODE_REPLACEMENT;
1356 /* ASCII character, common case */
1364 /* u2_utflength ****************************************************************
1366 Returns the utf length in bytes of a u2 array.
1368 *******************************************************************************/
1370 u4 u2_utflength(u2 *text, u4 u2_length)
1372 u4 result_len = 0; /* utf length in bytes */
1373 u2 ch; /* current unicode character */
1376 for (len = 0; len < u2_length; len++) {
1377 /* next unicode character */
1380 /* determine bytes required to store unicode character as utf */
1381 if (ch && (ch < 0x80))
1383 else if (ch < 0x800)
1393 /* utf_copy ********************************************************************
1395 Copy the given utf string byte-for-byte to a buffer.
1398 buffer.......the buffer
1399 u............the utf string
1401 *******************************************************************************/
1403 void utf_copy(char *buffer, utf *u)
1405 /* our utf strings are zero-terminated (done by utf_new) */
1406 MCOPY(buffer, u->text, char, u->blength + 1);
1410 /* utf_cat *********************************************************************
1412 Append the given utf string byte-for-byte to a buffer.
1415 buffer.......the buffer
1416 u............the utf string
1418 *******************************************************************************/
1420 void utf_cat(char *buffer, utf *u)
1422 /* our utf strings are zero-terminated (done by utf_new) */
1423 MCOPY(buffer + strlen(buffer), u->text, char, u->blength + 1);
1427 /* utf_copy_classname **********************************************************
1429 Copy the given utf classname byte-for-byte to a buffer.
1430 '/' is replaced by '.'
1433 buffer.......the buffer
1434 u............the utf string
1436 *******************************************************************************/
1438 void utf_copy_classname(char *buffer, utf *u)
1447 endptr = UTF_END(u) + 1; /* utfs are zero-terminared by utf_new */
1449 while (srcptr != endptr) {
1458 /* utf_cat *********************************************************************
1460 Append the given utf classname byte-for-byte to a buffer.
1461 '/' is replaced by '.'
1464 buffer.......the buffer
1465 u............the utf string
1467 *******************************************************************************/
1469 void utf_cat_classname(char *buffer, utf *u)
1471 utf_copy_classname(buffer + strlen(buffer), u);
1474 /* utf_display_printable_ascii *************************************************
1476 Write utf symbol to stdout (for debugging purposes).
1477 Non-printable and non-ASCII characters are printed as '?'.
1479 *******************************************************************************/
1481 void utf_display_printable_ascii(utf *u)
1483 char *endpos; /* points behind utf string */
1484 char *utf_ptr; /* current position in utf text */
1492 endpos = UTF_END(u);
1495 while (utf_ptr < endpos) {
1496 /* read next unicode character */
1498 u2 c = utf_nextu2(&utf_ptr);
1500 if ((c >= 32) && (c <= 127))
1510 /* utf_display_printable_ascii_classname ***************************************
1512 Write utf symbol to stdout with `/' converted to `.' (for debugging
1514 Non-printable and non-ASCII characters are printed as '?'.
1516 *******************************************************************************/
1518 void utf_display_printable_ascii_classname(utf *u)
1520 char *endpos; /* points behind utf string */
1521 char *utf_ptr; /* current position in utf text */
1529 endpos = UTF_END(u);
1532 while (utf_ptr < endpos) {
1533 /* read next unicode character */
1535 u2 c = utf_nextu2(&utf_ptr);
1540 if ((c >= 32) && (c <= 127))
1550 /* utf_sprint_convert_to_latin1 ************************************************
1552 Write utf symbol into c-string (for debugging purposes).
1553 Characters are converted to 8-bit Latin-1, non-Latin-1 characters yield
1556 *******************************************************************************/
1558 void utf_sprint_convert_to_latin1(char *buffer, utf *u)
1560 char *endpos; /* points behind utf string */
1561 char *utf_ptr; /* current position in utf text */
1562 u2 pos = 0; /* position in c-string */
1565 strcpy(buffer, "NULL");
1569 endpos = UTF_END(u);
1572 while (utf_ptr < endpos)
1573 /* copy next unicode character */
1574 buffer[pos++] = utf_nextu2(&utf_ptr);
1576 /* terminate string */
1581 /* utf_sprint_convert_to_latin1_classname **************************************
1583 Write utf symbol into c-string with `/' converted to `.' (for debugging
1585 Characters are converted to 8-bit Latin-1, non-Latin-1 characters yield
1588 *******************************************************************************/
1590 void utf_sprint_convert_to_latin1_classname(char *buffer, utf *u)
1592 char *endpos; /* points behind utf string */
1593 char *utf_ptr; /* current position in utf text */
1594 u2 pos = 0; /* position in c-string */
1597 strcpy(buffer, "NULL");
1601 endpos = UTF_END(u);
1604 while (utf_ptr < endpos) {
1605 /* copy next unicode character */
1606 u2 c = utf_nextu2(&utf_ptr);
1607 if (c == '/') c = '.';
1611 /* terminate string */
1616 /* utf_strcat_convert_to_latin1 ************************************************
1618 Like libc strcat, but uses an utf8 string.
1619 Characters are converted to 8-bit Latin-1, non-Latin-1 characters yield
1622 *******************************************************************************/
1624 void utf_strcat_convert_to_latin1(char *buffer, utf *u)
1626 utf_sprint_convert_to_latin1(buffer + strlen(buffer), u);
1630 /* utf_strcat_convert_to_latin1_classname **************************************
1632 Like libc strcat, but uses an utf8 string.
1633 Characters are converted to 8-bit Latin-1, non-Latin-1 characters yield
1636 *******************************************************************************/
1638 void utf_strcat_convert_to_latin1_classname(char *buffer, utf *u)
1640 utf_sprint_convert_to_latin1_classname(buffer + strlen(buffer), u);
1644 /* utf_fprint_printable_ascii **************************************************
1646 Write utf symbol into file.
1647 Non-printable and non-ASCII characters are printed as '?'.
1649 *******************************************************************************/
1651 void utf_fprint_printable_ascii(FILE *file, utf *u)
1653 char *endpos; /* points behind utf string */
1654 char *utf_ptr; /* current position in utf text */
1659 endpos = UTF_END(u);
1662 while (utf_ptr < endpos) {
1663 /* read next unicode character */
1664 u2 c = utf_nextu2(&utf_ptr);
1666 if (c >= 32 && c <= 127) fprintf(file, "%c", c);
1667 else fprintf(file, "?");
1672 /* utf_fprint_printable_ascii_classname ****************************************
1674 Write utf symbol into file with `/' converted to `.'.
1675 Non-printable and non-ASCII characters are printed as '?'.
1677 *******************************************************************************/
1679 void utf_fprint_printable_ascii_classname(FILE *file, utf *u)
1681 char *endpos; /* points behind utf string */
1682 char *utf_ptr; /* current position in utf text */
1687 endpos = UTF_END(u);
1690 while (utf_ptr < endpos) {
1691 /* read next unicode character */
1692 u2 c = utf_nextu2(&utf_ptr);
1693 if (c == '/') c = '.';
1695 if (c >= 32 && c <= 127) fprintf(file, "%c", c);
1696 else fprintf(file, "?");
1701 /* is_valid_utf ****************************************************************
1703 Return true if the given string is a valid UTF-8 string.
1705 utf_ptr...points to first character
1706 end_pos...points after last character
1708 *******************************************************************************/
1710 /* static unsigned long min_codepoint[6] = {0,1L<<7,1L<<11,1L<<16,1L<<21,1L<<26}; */
1712 bool is_valid_utf(char *utf_ptr, char *end_pos)
1719 if (end_pos < utf_ptr) return false;
1720 bytes = end_pos - utf_ptr;
1724 if (!c) return false; /* 0x00 is not allowed */
1725 if ((c & 0x80) == 0) continue; /* ASCII */
1727 if ((c & 0xe0) == 0xc0) len = 1; /* 110x xxxx */
1728 else if ((c & 0xf0) == 0xe0) len = 2; /* 1110 xxxx */
1729 else if ((c & 0xf8) == 0xf0) len = 3; /* 1111 0xxx */
1730 else if ((c & 0xfc) == 0xf8) len = 4; /* 1111 10xx */
1731 else if ((c & 0xfe) == 0xfc) len = 5; /* 1111 110x */
1732 else return false; /* invalid leading byte */
1734 if (len > 2) return false; /* Java limitation */
1736 v = (unsigned long)c & (0x3f >> len);
1738 if ((bytes -= len) < 0) return false; /* missing bytes */
1740 for (i = len; i--; ) {
1742 if ((c & 0xc0) != 0x80) /* 10xx xxxx */
1744 v = (v << 6) | (c & 0x3f);
1748 if (len != 1) return false; /* Java special */
1751 /* Sun Java seems to allow overlong UTF-8 encodings */
1753 /* if (v < min_codepoint[len]) */
1754 /* XXX throw exception? */
1757 /* surrogates in UTF-8 seem to be allowed in Java classfiles */
1758 /* if (v >= 0xd800 && v <= 0xdfff) return false; */ /* surrogates */
1760 /* even these seem to be allowed */
1761 /* if (v == 0xfffe || v == 0xffff) return false; */ /* invalid codepoints */
1768 /* is_valid_name ***************************************************************
1770 Return true if the given string may be used as a class/field/method
1771 name. (Currently this only disallows empty strings and control
1774 NOTE: The string is assumed to have passed is_valid_utf!
1776 utf_ptr...points to first character
1777 end_pos...points after last character
1779 *******************************************************************************/
1781 bool is_valid_name(char *utf_ptr, char *end_pos)
1783 if (end_pos <= utf_ptr) return false; /* disallow empty names */
1785 while (utf_ptr < end_pos) {
1786 unsigned char c = *utf_ptr++;
1788 if (c < 0x20) return false; /* disallow control characters */
1789 if (c == 0xc0 && (unsigned char) *utf_ptr == 0x80) /* disallow zero */
1796 bool is_valid_name_utf(utf *u)
1798 return is_valid_name(u->text, UTF_END(u));
1802 /* utf_show ********************************************************************
1804 Writes the utf symbols in the utfhash to stdout and displays the
1805 number of external hash chains grouped according to the chainlength
1806 (for debugging purposes).
1808 *******************************************************************************/
1810 #if !defined(NDEBUG)
1814 #define CHAIN_LIMIT 20 /* limit for seperated enumeration */
1816 u4 chain_count[CHAIN_LIMIT]; /* numbers of chains */
1817 u4 max_chainlength = 0; /* maximum length of the chains */
1818 u4 sum_chainlength = 0; /* sum of the chainlengths */
1819 u4 beyond_limit = 0; /* number of utf-symbols in chains with length>=CHAIN_LIMIT-1 */
1822 printf("UTF-HASH:\n");
1824 /* show element of utf-hashtable */
1826 for (i = 0; i < hashtable_utf->size; i++) {
1827 utf *u = hashtable_utf->ptr[i];
1830 printf("SLOT %d: ", (int) i);
1834 utf_display_printable_ascii(u);
1842 printf("UTF-HASH: %d slots for %d entries\n",
1843 (int) hashtable_utf->size, (int) hashtable_utf->entries );
1845 if (hashtable_utf->entries == 0)
1848 printf("chains:\n chainlength number of chains %% of utfstrings\n");
1850 for (i=0;i<CHAIN_LIMIT;i++)
1853 /* count numbers of hashchains according to their length */
1854 for (i=0; i<hashtable_utf->size; i++) {
1856 utf *u = (utf*) hashtable_utf->ptr[i];
1857 u4 chain_length = 0;
1859 /* determine chainlength */
1865 /* update sum of all chainlengths */
1866 sum_chainlength+=chain_length;
1868 /* determine the maximum length of the chains */
1869 if (chain_length>max_chainlength)
1870 max_chainlength = chain_length;
1872 /* update number of utf-symbols in chains with length>=CHAIN_LIMIT-1 */
1873 if (chain_length>=CHAIN_LIMIT) {
1874 beyond_limit+=chain_length;
1875 chain_length=CHAIN_LIMIT-1;
1878 /* update number of hashchains of current length */
1879 chain_count[chain_length]++;
1882 /* display results */
1883 for (i=1;i<CHAIN_LIMIT-1;i++)
1884 printf(" %2d %17d %18.2f%%\n",i,chain_count[i],(((float) chain_count[i]*i*100)/hashtable_utf->entries));
1886 printf(" >=%2d %17d %18.2f%%\n",CHAIN_LIMIT-1,chain_count[CHAIN_LIMIT-1],((float) beyond_limit*100)/hashtable_utf->entries);
1889 printf("max. chainlength:%5d\n",max_chainlength);
1891 /* avg. chainlength = sum of chainlengths / number of chains */
1892 printf("avg. chainlength:%5.2f\n",(float) sum_chainlength / (hashtable_utf->size-chain_count[0]));
1894 #endif /* !defined(NDEBUG) */
1898 * These are local overrides for various environment variables in Emacs.
1899 * Please do not remove this and leave it at the end of the file, where
1900 * Emacs will automagically detect them.
1901 * ---------------------------------------------------------------------
1904 * indent-tabs-mode: t
1908 * vim:noexpandtab:sw=4:ts=4: