1 /* src/vmcore/utf8.c - utf8 string functions
3 Copyright (C) 1996-2005, 2006, 2007, 2008
4 CACAOVM - Verein zur Foerderung der freien virtuellen Maschine CACAO
6 This file is part of CACAO.
8 This program is free software; you can redistribute it and/or
9 modify it under the terms of the GNU General Public License as
10 published by the Free Software Foundation; either version 2, or (at
11 your option) any later version.
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
33 #include "mm/memory.h"
35 #include "threads/lock-common.h"
37 #include "toolbox/hashtable.h"
39 #include "vm/exceptions.h"
41 #include "vmcore/options.h"
43 #if defined(ENABLE_STATISTICS)
44 # include "vmcore/statistics.h"
47 #include "vmcore/utf8.h"
50 /* global variables ***********************************************************/
52 /* hashsize must be power of 2 */
54 #define HASHTABLE_UTF_SIZE 16384 /* initial size of utf-hash */
56 hashtable *hashtable_utf; /* hashtable for utf8-symbols */
59 /* utf-symbols for pointer comparison of frequently used strings **************/
61 utf *utf_java_lang_Object;
63 utf *utf_java_lang_Class;
64 utf *utf_java_lang_ClassLoader;
65 utf *utf_java_lang_Cloneable;
66 utf *utf_java_lang_SecurityManager;
67 utf *utf_java_lang_String;
68 utf *utf_java_lang_ThreadGroup;
69 utf *utf_java_lang_ref_SoftReference;
70 utf *utf_java_lang_ref_WeakReference;
71 utf *utf_java_lang_ref_PhantomReference;
72 utf *utf_java_io_Serializable;
74 utf *utf_java_lang_Throwable;
75 utf *utf_java_lang_Error;
77 utf *utf_java_lang_AbstractMethodError;
78 utf *utf_java_lang_ClassCircularityError;
79 utf *utf_java_lang_ClassFormatError;
80 utf *utf_java_lang_ExceptionInInitializerError;
81 utf *utf_java_lang_IncompatibleClassChangeError;
82 utf *utf_java_lang_InstantiationError;
83 utf *utf_java_lang_InternalError;
84 utf *utf_java_lang_LinkageError;
85 utf *utf_java_lang_NoClassDefFoundError;
86 utf *utf_java_lang_NoSuchFieldError;
87 utf *utf_java_lang_NoSuchMethodError;
88 utf *utf_java_lang_OutOfMemoryError;
89 utf *utf_java_lang_UnsatisfiedLinkError;
90 utf *utf_java_lang_UnsupportedClassVersionError;
91 utf *utf_java_lang_VerifyError;
92 utf *utf_java_lang_VirtualMachineError;
94 utf *utf_java_lang_Exception;
96 utf *utf_java_lang_ArithmeticException;
97 utf *utf_java_lang_ArrayIndexOutOfBoundsException;
98 utf *utf_java_lang_ArrayStoreException;
99 utf *utf_java_lang_ClassCastException;
100 utf *utf_java_lang_ClassNotFoundException;
101 utf *utf_java_lang_CloneNotSupportedException;
102 utf *utf_java_lang_IllegalAccessException;
103 utf *utf_java_lang_IllegalArgumentException;
104 utf *utf_java_lang_IllegalMonitorStateException;
105 utf *utf_java_lang_InstantiationException;
106 utf *utf_java_lang_InterruptedException;
107 utf *utf_java_lang_NegativeArraySizeException;
108 utf *utf_java_lang_NullPointerException;
109 utf *utf_java_lang_StringIndexOutOfBoundsException;
111 utf *utf_java_lang_reflect_InvocationTargetException;
113 utf *utf_java_security_PrivilegedActionException;
115 #if defined(ENABLE_JAVASE)
116 utf* utf_java_lang_Void;
119 utf* utf_java_lang_Boolean;
120 utf* utf_java_lang_Byte;
121 utf* utf_java_lang_Character;
122 utf* utf_java_lang_Short;
123 utf* utf_java_lang_Integer;
124 utf* utf_java_lang_Long;
125 utf* utf_java_lang_Float;
126 utf* utf_java_lang_Double;
128 #if defined(ENABLE_JAVASE)
129 utf *utf_java_lang_StackTraceElement;
130 utf *utf_java_lang_reflect_Constructor;
131 utf *utf_java_lang_reflect_Field;
132 utf *utf_java_lang_reflect_Method;
133 utf *utf_java_util_Vector;
136 utf *utf_InnerClasses; /* InnerClasses */
137 utf *utf_ConstantValue; /* ConstantValue */
138 utf *utf_Code; /* Code */
139 utf *utf_Exceptions; /* Exceptions */
140 utf *utf_LineNumberTable; /* LineNumberTable */
141 utf *utf_SourceFile; /* SourceFile */
143 #if defined(ENABLE_JAVASE)
144 utf *utf_EnclosingMethod;
146 utf *utf_StackMapTable;
148 #if defined(ENABLE_ANNOTATIONS)
149 utf *utf_RuntimeVisibleAnnotations; /* RuntimeVisibleAnnotations */
150 utf *utf_RuntimeInvisibleAnnotations; /* RuntimeInvisibleAnnotations */
151 utf *utf_RuntimeVisibleParameterAnnotations; /* RuntimeVisibleParameterAnnotations */
152 utf *utf_RuntimeInvisibleParameterAnnotations; /* RuntimeInvisibleParameterAnnotations */
153 utf *utf_AnnotationDefault; /* AnnotationDefault */
157 utf *utf_init; /* <init> */
158 utf *utf_clinit; /* <clinit> */
159 utf *utf_clone; /* clone */
160 utf *utf_finalize; /* finalize */
161 utf *utf_run; /* run */
166 utf *utf_removeThread;
169 utf *utf_uncaughtException;
172 utf *utf_fillInStackTrace;
174 utf *utf_getSystemClassLoader;
177 utf *utf_loadClassInternal;
178 utf *utf_printStackTrace;
180 utf *utf_division_by_zero;
191 utf *utf_void__void; /* ()V */
192 utf *utf_boolean__void; /* (Z)V */
193 utf *utf_byte__void; /* (B)V */
194 utf *utf_char__void; /* (C)V */
195 utf *utf_short__void; /* (S)V */
196 utf *utf_int__void; /* (I)V */
197 utf *utf_long__void; /* (J)V */
198 utf *utf_float__void; /* (F)V */
199 utf *utf_double__void; /* (D)V */
201 utf *utf_void__java_lang_ClassLoader; /* ()Ljava/lang/ClassLoader; */
202 utf *utf_void__java_lang_Object; /* ()Ljava/lang/Object; */
203 utf *utf_void__java_lang_Throwable; /* ()Ljava/lang/Throwable; */
204 utf *utf_java_lang_ClassLoader_java_lang_String__J;
205 utf *utf_java_lang_Exception__V; /* (Ljava/lang/Exception;)V */
206 utf *utf_java_lang_Object__java_lang_Object;
207 utf *utf_java_lang_String__void; /* (Ljava/lang/String;)V */
208 utf *utf_java_lang_String__java_lang_Class;
209 utf *utf_java_lang_Thread__V; /* (Ljava/lang/Thread;)V */
210 utf *utf_java_lang_Thread_java_lang_Throwable__V;
211 utf *utf_java_lang_Throwable__void; /* (Ljava/lang/Throwable;)V */
212 utf *utf_java_lang_Throwable__java_lang_Throwable;
214 utf *utf_not_named_yet; /* special name for unnamed classes */
216 utf *array_packagename;
219 /* utf_init ********************************************************************
221 Initializes the utf8 subsystem.
223 *******************************************************************************/
227 TRACESUBSYSTEMINITIALIZATION("utf8_init");
229 /* create utf8 hashtable */
231 hashtable_utf = NEW(hashtable);
233 hashtable_create(hashtable_utf, HASHTABLE_UTF_SIZE);
235 #if defined(ENABLE_STATISTICS)
237 count_utf_len += sizeof(utf*) * hashtable_utf->size;
240 /* create utf-symbols for pointer comparison of frequently used strings */
242 utf_java_lang_Object = utf_new_char("java/lang/Object");
244 utf_java_lang_Class = utf_new_char("java/lang/Class");
245 utf_java_lang_ClassLoader = utf_new_char("java/lang/ClassLoader");
246 utf_java_lang_Cloneable = utf_new_char("java/lang/Cloneable");
247 utf_java_lang_SecurityManager = utf_new_char("java/lang/SecurityManager");
248 utf_java_lang_String = utf_new_char("java/lang/String");
249 utf_java_lang_ThreadGroup = utf_new_char("java/lang/ThreadGroup");
251 utf_java_lang_ref_SoftReference =
252 utf_new_char("java/lang/ref/SoftReference");
254 utf_java_lang_ref_WeakReference =
255 utf_new_char("java/lang/ref/WeakReference");
257 utf_java_lang_ref_PhantomReference =
258 utf_new_char("java/lang/ref/PhantomReference");
260 utf_java_io_Serializable = utf_new_char("java/io/Serializable");
262 utf_java_lang_Throwable = utf_new_char("java/lang/Throwable");
263 utf_java_lang_Error = utf_new_char("java/lang/Error");
265 utf_java_lang_ClassCircularityError =
266 utf_new_char("java/lang/ClassCircularityError");
268 utf_java_lang_ClassFormatError = utf_new_char("java/lang/ClassFormatError");
270 utf_java_lang_ExceptionInInitializerError =
271 utf_new_char("java/lang/ExceptionInInitializerError");
273 utf_java_lang_IncompatibleClassChangeError =
274 utf_new_char("java/lang/IncompatibleClassChangeError");
276 utf_java_lang_InstantiationError =
277 utf_new_char("java/lang/InstantiationError");
279 utf_java_lang_InternalError = utf_new_char("java/lang/InternalError");
280 utf_java_lang_LinkageError = utf_new_char("java/lang/LinkageError");
282 utf_java_lang_NoClassDefFoundError =
283 utf_new_char("java/lang/NoClassDefFoundError");
285 utf_java_lang_OutOfMemoryError = utf_new_char("java/lang/OutOfMemoryError");
287 utf_java_lang_UnsatisfiedLinkError =
288 utf_new_char("java/lang/UnsatisfiedLinkError");
290 utf_java_lang_UnsupportedClassVersionError =
291 utf_new_char("java/lang/UnsupportedClassVersionError");
293 utf_java_lang_VerifyError = utf_new_char("java/lang/VerifyError");
295 utf_java_lang_VirtualMachineError =
296 utf_new_char("java/lang/VirtualMachineError");
298 #if defined(ENABLE_JAVASE)
299 utf_java_lang_AbstractMethodError =
300 utf_new_char("java/lang/AbstractMethodError");
302 utf_java_lang_NoSuchFieldError =
303 utf_new_char("java/lang/NoSuchFieldError");
305 utf_java_lang_NoSuchMethodError =
306 utf_new_char("java/lang/NoSuchMethodError");
309 utf_java_lang_Exception = utf_new_char("java/lang/Exception");
311 utf_java_lang_ArithmeticException =
312 utf_new_char("java/lang/ArithmeticException");
314 utf_java_lang_ArrayIndexOutOfBoundsException =
315 utf_new_char("java/lang/ArrayIndexOutOfBoundsException");
317 utf_java_lang_ArrayStoreException =
318 utf_new_char("java/lang/ArrayStoreException");
320 utf_java_lang_ClassCastException =
321 utf_new_char("java/lang/ClassCastException");
323 utf_java_lang_ClassNotFoundException =
324 utf_new_char("java/lang/ClassNotFoundException");
326 utf_java_lang_CloneNotSupportedException =
327 utf_new_char("java/lang/CloneNotSupportedException");
329 utf_java_lang_IllegalAccessException =
330 utf_new_char("java/lang/IllegalAccessException");
332 utf_java_lang_IllegalArgumentException =
333 utf_new_char("java/lang/IllegalArgumentException");
335 utf_java_lang_IllegalMonitorStateException =
336 utf_new_char("java/lang/IllegalMonitorStateException");
338 utf_java_lang_InstantiationException =
339 utf_new_char("java/lang/InstantiationException");
341 utf_java_lang_InterruptedException =
342 utf_new_char("java/lang/InterruptedException");
344 utf_java_lang_NegativeArraySizeException =
345 utf_new_char("java/lang/NegativeArraySizeException");
347 utf_java_lang_NullPointerException =
348 utf_new_char("java/lang/NullPointerException");
350 utf_java_lang_StringIndexOutOfBoundsException =
351 utf_new_char("java/lang/StringIndexOutOfBoundsException");
353 utf_java_lang_reflect_InvocationTargetException =
354 utf_new_char("java/lang/reflect/InvocationTargetException");
356 utf_java_security_PrivilegedActionException =
357 utf_new_char("java/security/PrivilegedActionException");
359 #if defined(ENABLE_JAVASE)
360 utf_java_lang_Void = utf_new_char("java/lang/Void");
363 utf_java_lang_Boolean = utf_new_char("java/lang/Boolean");
364 utf_java_lang_Byte = utf_new_char("java/lang/Byte");
365 utf_java_lang_Character = utf_new_char("java/lang/Character");
366 utf_java_lang_Short = utf_new_char("java/lang/Short");
367 utf_java_lang_Integer = utf_new_char("java/lang/Integer");
368 utf_java_lang_Long = utf_new_char("java/lang/Long");
369 utf_java_lang_Float = utf_new_char("java/lang/Float");
370 utf_java_lang_Double = utf_new_char("java/lang/Double");
372 #if defined(ENABLE_JAVASE)
373 utf_java_lang_StackTraceElement =
374 utf_new_char("java/lang/StackTraceElement");
376 utf_java_lang_reflect_Constructor =
377 utf_new_char("java/lang/reflect/Constructor");
379 utf_java_lang_reflect_Field = utf_new_char("java/lang/reflect/Field");
380 utf_java_lang_reflect_Method = utf_new_char("java/lang/reflect/Method");
381 utf_java_util_Vector = utf_new_char("java/util/Vector");
384 utf_InnerClasses = utf_new_char("InnerClasses");
385 utf_ConstantValue = utf_new_char("ConstantValue");
386 utf_Code = utf_new_char("Code");
387 utf_Exceptions = utf_new_char("Exceptions");
388 utf_LineNumberTable = utf_new_char("LineNumberTable");
389 utf_SourceFile = utf_new_char("SourceFile");
391 #if defined(ENABLE_JAVASE)
392 utf_EnclosingMethod = utf_new_char("EnclosingMethod");
393 utf_Signature = utf_new_char("Signature");
394 utf_StackMapTable = utf_new_char("StackMapTable");
396 #if defined(ENABLE_ANNOTATIONS)
397 utf_RuntimeVisibleAnnotations = utf_new_char("RuntimeVisibleAnnotations");
398 utf_RuntimeInvisibleAnnotations = utf_new_char("RuntimeInvisibleAnnotations");
399 utf_RuntimeVisibleParameterAnnotations = utf_new_char("RuntimeVisibleParameterAnnotations");
400 utf_RuntimeInvisibleParameterAnnotations = utf_new_char("RuntimeInvisibleParameterAnnotations");
401 utf_AnnotationDefault = utf_new_char("AnnotationDefault");
405 utf_init = utf_new_char("<init>");
406 utf_clinit = utf_new_char("<clinit>");
407 utf_clone = utf_new_char("clone");
408 utf_finalize = utf_new_char("finalize");
409 utf_run = utf_new_char("run");
411 utf_add = utf_new_char("add");
412 utf_remove = utf_new_char("remove");
413 utf_addThread = utf_new_char("addThread");
414 utf_removeThread = utf_new_char("removeThread");
415 utf_put = utf_new_char("put");
416 utf_get = utf_new_char("get");
417 utf_uncaughtException = utf_new_char("uncaughtException");
418 utf_value = utf_new_char("value");
420 utf_fillInStackTrace = utf_new_char("fillInStackTrace");
421 utf_findNative = utf_new_char("findNative");
422 utf_getSystemClassLoader = utf_new_char("getSystemClassLoader");
423 utf_initCause = utf_new_char("initCause");
424 utf_loadClass = utf_new_char("loadClass");
425 utf_loadClassInternal = utf_new_char("loadClassInternal");
426 utf_printStackTrace = utf_new_char("printStackTrace");
428 utf_division_by_zero = utf_new_char("/ by zero");
430 utf_Z = utf_new_char("Z");
431 utf_B = utf_new_char("B");
432 utf_C = utf_new_char("C");
433 utf_S = utf_new_char("S");
434 utf_I = utf_new_char("I");
435 utf_J = utf_new_char("J");
436 utf_F = utf_new_char("F");
437 utf_D = utf_new_char("D");
439 utf_void__void = utf_new_char("()V");
440 utf_boolean__void = utf_new_char("(Z)V");
441 utf_byte__void = utf_new_char("(B)V");
442 utf_char__void = utf_new_char("(C)V");
443 utf_short__void = utf_new_char("(S)V");
444 utf_int__void = utf_new_char("(I)V");
445 utf_long__void = utf_new_char("(J)V");
446 utf_float__void = utf_new_char("(F)V");
447 utf_double__void = utf_new_char("(D)V");
448 utf_void__java_lang_Object = utf_new_char("()Ljava/lang/Object;");
449 utf_void__java_lang_Throwable = utf_new_char("()Ljava/lang/Throwable;");
451 utf_void__java_lang_ClassLoader =
452 utf_new_char("()Ljava/lang/ClassLoader;");
454 utf_java_lang_ClassLoader_java_lang_String__J =
455 utf_new_char("(Ljava/lang/ClassLoader;Ljava/lang/String;)J");
457 utf_java_lang_Exception__V = utf_new_char("(Ljava/lang/Exception;)V");
459 utf_java_lang_Object__java_lang_Object =
460 utf_new_char("(Ljava/lang/Object;)Ljava/lang/Object;");
462 utf_java_lang_String__void = utf_new_char("(Ljava/lang/String;)V");
464 utf_java_lang_String__java_lang_Class =
465 utf_new_char("(Ljava/lang/String;)Ljava/lang/Class;");
467 utf_java_lang_Thread__V = utf_new_char("(Ljava/lang/Thread;)V");
469 utf_java_lang_Thread_java_lang_Throwable__V =
470 utf_new_char("(Ljava/lang/Thread;Ljava/lang/Throwable;)V");
472 utf_java_lang_Throwable__void = utf_new_char("(Ljava/lang/Throwable;)V");
474 utf_java_lang_Throwable__java_lang_Throwable =
475 utf_new_char("(Ljava/lang/Throwable;)Ljava/lang/Throwable;");
477 utf_null = utf_new_char("null");
478 utf_not_named_yet = utf_new_char("\t<not_named_yet>");
479 array_packagename = utf_new_char("\t<the array package>");
481 /* everything's ok */
487 /* utf_hashkey *****************************************************************
489 The hashkey is computed from the utf-text by using up to 8
490 characters. For utf-symbols longer than 15 characters 3 characters
491 are taken from the beginning and the end, 2 characters are taken
494 *******************************************************************************/
496 #define nbs(val) ((u4) *(++text) << val) /* get next byte, left shift by val */
497 #define fbs(val) ((u4) *( text) << val) /* get first byte, left shift by val */
499 u4 utf_hashkey(const char *text, u4 length)
501 const char *start_pos = text; /* pointer to utf text */
505 case 0: /* empty string */
508 case 1: return fbs(0);
509 case 2: return fbs(0) ^ nbs(3);
510 case 3: return fbs(0) ^ nbs(3) ^ nbs(5);
511 case 4: return fbs(0) ^ nbs(2) ^ nbs(4) ^ nbs(6);
512 case 5: return fbs(0) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(6);
513 case 6: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(5) ^ nbs(6);
514 case 7: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(5) ^ nbs(6);
515 case 8: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(5) ^ nbs(6) ^ nbs(7);
522 return a ^ nbs(4) ^ nbs(5) ^ nbs(6) ^ nbs(7) ^ nbs(8);
531 return a ^ nbs(6) ^ nbs(7) ^ nbs(8) ^ nbs(9);
540 return a ^ nbs(6) ^ nbs(7) ^ nbs(8) ^ nbs(9) ^ nbs(10);
552 return a ^ nbs(9) ^ nbs(10);
564 return a ^ nbs(9) ^ nbs(10);
575 return a ^ nbs(9) ^ nbs(10) ^ nbs(11);
586 return a ^ nbs(9) ^ nbs(10) ^ nbs(11);
588 default: /* 3 characters from beginning */
594 /* 2 characters from middle */
595 text = start_pos + (length / 2);
600 /* 3 characters from end */
601 text = start_pos + length - 4;
606 return a ^ nbs(10) ^ nbs(11);
610 /* utf_full_hashkey ************************************************************
612 This function computes a hash value using all bytes in the string.
614 The algorithm is the "One-at-a-time" algorithm as published
615 by Bob Jenkins on http://burtleburtle.net/bob/hash/doobs.html.
617 *******************************************************************************/
619 u4 utf_full_hashkey(const char *text, u4 length)
621 register const unsigned char *p = (const unsigned char *) text;
629 hash += (hash << 10);
633 hash ^= (hash >> 11);
634 hash += (hash << 15);
639 /* unicode_hashkey *************************************************************
641 Compute the hashkey of a unicode string.
643 *******************************************************************************/
645 u4 unicode_hashkey(u2 *text, u2 len)
647 return utf_hashkey((char *) text, len);
651 /* utf_new *********************************************************************
653 Creates a new utf-symbol, the text of the symbol is passed as a
654 u1-array. The function searches the utf-hashtable for a utf-symbol
655 with this text. On success the element returned, otherwise a new
656 hashtable element is created.
658 If the number of entries in the hashtable exceeds twice the size of
659 the hashtable slots a reorganization of the hashtable is done and
660 the utf symbols are copied to a new hashtable with doubled size.
662 *******************************************************************************/
664 utf *utf_new(const char *text, u2 length)
666 u4 key; /* hashkey computed from utf-text */
667 u4 slot; /* slot in hashtable */
668 utf *u; /* hashtable element */
671 LOCK_MONITOR_ENTER(hashtable_utf->header);
673 #if defined(ENABLE_STATISTICS)
678 key = utf_hashkey(text, length);
679 slot = key & (hashtable_utf->size - 1);
680 u = hashtable_utf->ptr[slot];
682 /* search external hash chain for utf-symbol */
685 if (u->blength == length) {
686 /* compare text of hashtable elements */
688 for (i = 0; i < length; i++)
689 if (text[i] != u->text[i])
692 #if defined(ENABLE_STATISTICS)
694 count_utf_new_found++;
697 /* symbol found in hashtable */
699 LOCK_MONITOR_EXIT(hashtable_utf->header);
705 u = u->hashlink; /* next element in external chain */
708 /* location in hashtable found, create new utf element */
712 u->blength = length; /* length in bytes of utfstring */
713 u->hashlink = hashtable_utf->ptr[slot]; /* link in external hashchain */
714 u->text = mem_alloc(length + 1);/* allocate memory for utf-text */
716 memcpy(u->text, text, length); /* copy utf-text */
717 u->text[length] = '\0';
719 #if defined(ENABLE_STATISTICS)
721 count_utf_len += sizeof(utf) + length + 1;
724 hashtable_utf->ptr[slot] = u; /* insert symbol into table */
725 hashtable_utf->entries++; /* update number of entries */
727 if (hashtable_utf->entries > (hashtable_utf->size * 2)) {
729 /* reorganization of hashtable, average length of the external
730 chains is approx. 2 */
732 hashtable *newhash; /* the new hashtable */
738 /* create new hashtable, double the size */
740 newhash = hashtable_resize(hashtable_utf, hashtable_utf->size * 2);
742 #if defined(ENABLE_STATISTICS)
744 count_utf_len += sizeof(utf*) * hashtable_utf->size;
747 /* transfer elements to new hashtable */
749 for (i = 0; i < hashtable_utf->size; i++) {
750 u = hashtable_utf->ptr[i];
754 slot = utf_hashkey(u->text, u->blength) & (newhash->size - 1);
756 u->hashlink = (utf *) newhash->ptr[slot];
757 newhash->ptr[slot] = u;
759 /* follow link in external hash chain */
765 /* dispose old table */
767 hashtable_free(hashtable_utf);
769 hashtable_utf = newhash;
772 LOCK_MONITOR_EXIT(hashtable_utf->header);
778 /* utf_new_u2 ******************************************************************
780 Make utf symbol from u2 array, if isclassname is true '.' is
783 *******************************************************************************/
785 utf *utf_new_u2(u2 *unicode_pos, u4 unicode_length, bool isclassname)
787 char *buffer; /* memory buffer for unicode characters */
788 char *pos; /* pointer to current position in buffer */
789 u4 left; /* unicode characters left */
790 u4 buflength; /* utf length in bytes of the u2 array */
791 utf *result; /* resulting utf-string */
794 /* determine utf length in bytes and allocate memory */
796 buflength = u2_utflength(unicode_pos, unicode_length);
797 buffer = MNEW(char, buflength);
802 for (i = 0; i++ < unicode_length; unicode_pos++) {
803 /* next unicode character */
806 if ((c != 0) && (c < 0x80)) {
809 if ((int) left < 0) break;
810 /* convert classname */
811 if (isclassname && c == '.')
816 } else if (c < 0x800) {
818 unsigned char high = c >> 6;
819 unsigned char low = c & 0x3F;
821 if ((int) left < 0) break;
822 *pos++ = high | 0xC0;
828 char mid = (c >> 6) & 0x3F;
831 if ((int) left < 0) break;
832 *pos++ = high | 0xE0;
838 /* insert utf-string into symbol-table */
839 result = utf_new(buffer,buflength);
841 MFREE(buffer, char, buflength);
847 /* utf_new_char ****************************************************************
849 Creates a new utf symbol, the text for this symbol is passed as a
850 c-string ( = char* ).
852 *******************************************************************************/
854 utf *utf_new_char(const char *text)
856 return utf_new(text, strlen(text));
860 /* utf_new_char_classname ******************************************************
862 Creates a new utf symbol, the text for this symbol is passed as a
863 c-string ( = char* ) "." characters are going to be replaced by
864 "/". Since the above function is used often, this is a separte
865 function, instead of an if.
867 *******************************************************************************/
869 utf *utf_new_char_classname(const char *text)
871 if (strchr(text, '.')) {
872 char *txt = strdup(text);
873 char *end = txt + strlen(txt);
877 for (c = txt; c < end; c++)
878 if (*c == '.') *c = '/';
880 tmpRes = utf_new(txt, strlen(txt));
886 return utf_new(text, strlen(text));
890 /* utf_nextu2 ******************************************************************
892 Read the next unicode character from the utf string and increment
893 the utf-string pointer accordingly.
895 CAUTION: This function is unsafe for input that was not checked
898 *******************************************************************************/
900 u2 utf_nextu2(char **utf_ptr)
902 /* uncompressed unicode character */
904 /* current position in utf text */
905 unsigned char *utf = (unsigned char *) (*utf_ptr);
906 /* bytes representing the unicode character */
907 unsigned char ch1, ch2, ch3;
908 /* number of bytes used to represent the unicode character */
911 switch ((ch1 = utf[0]) >> 4) {
912 default: /* 1 byte */
916 case 0xD: /* 2 bytes */
917 if (((ch2 = utf[1]) & 0xC0) == 0x80) {
918 unsigned char high = ch1 & 0x1F;
919 unsigned char low = ch2 & 0x3F;
920 unicode_char = (high << 6) + low;
925 case 0xE: /* 2 or 3 bytes */
926 if (((ch2 = utf[1]) & 0xC0) == 0x80) {
927 if (((ch3 = utf[2]) & 0xC0) == 0x80) {
928 unsigned char low = ch3 & 0x3f;
929 unsigned char mid = ch2 & 0x3f;
930 unsigned char high = ch1 & 0x0f;
931 unicode_char = (((high << 6) + mid) << 6) + low;
939 /* update position in utf-text */
940 *utf_ptr = (char *) (utf + len);
946 /* utf_bytes *******************************************************************
948 Determine number of bytes (aka. octets) in the utf string.
951 u............utf string
954 The number of octets of this utf string.
955 There is _no_ terminating zero included in this count.
957 *******************************************************************************/
965 /* utf_get_number_of_u2s_for_buffer ********************************************
967 Determine number of UTF-16 u2s in the given UTF-8 buffer
969 CAUTION: This function is unsafe for input that was not checked
972 CAUTION: Use this function *only* when you want to convert an UTF-8 buffer
973 to an array of u2s (UTF-16) and want to know how many of them you will get.
974 All other uses of this function are probably wrong.
977 buffer........points to first char in buffer
978 blength.......number of _bytes_ in the buffer
981 the number of u2s needed to hold this string in UTF-16 encoding.
982 There is _no_ terminating zero included in this count.
984 NOTE: Unlike utf_get_number_of_u2s, this function never throws an
987 *******************************************************************************/
989 u4 utf_get_number_of_u2s_for_buffer(const char *buffer, u4 blength)
991 const char *endpos; /* points behind utf string */
992 const char *utf_ptr; /* current position in utf text */
993 u4 len = 0; /* number of unicode characters */
996 endpos = utf_ptr + blength;
998 while (utf_ptr < endpos) {
1000 /* next unicode character */
1001 utf_nextu2((char **)&utf_ptr);
1004 assert(utf_ptr == endpos);
1010 /* utf_get_number_of_u2s *******************************************************
1012 Determine number of UTF-16 u2s in the utf string.
1014 CAUTION: This function is unsafe for input that was not checked
1017 CAUTION: Use this function *only* when you want to convert a utf string
1018 to an array of u2s and want to know how many of them you will get.
1019 All other uses of this function are probably wrong.
1022 u............utf string
1025 the number of u2s needed to hold this string in UTF-16 encoding.
1026 There is _no_ terminating zero included in this count.
1027 XXX 0 if a NullPointerException has been thrown (see below)
1029 *******************************************************************************/
1031 u4 utf_get_number_of_u2s(utf *u)
1033 char *endpos; /* points behind utf string */
1034 char *utf_ptr; /* current position in utf text */
1035 u4 len = 0; /* number of unicode characters */
1037 /* XXX this is probably not checked by most callers! Review this after */
1038 /* the invalid uses of this function have been eliminated */
1040 exceptions_throw_nullpointerexception();
1044 endpos = UTF_END(u);
1047 while (utf_ptr < endpos) {
1049 /* next unicode character */
1050 utf_nextu2(&utf_ptr);
1053 if (utf_ptr != endpos) {
1054 /* string ended abruptly */
1055 exceptions_throw_internalerror("Illegal utf8 string");
1063 /* utf8_safe_number_of_u2s *****************************************************
1065 Determine number of UTF-16 u2s needed for decoding the given UTF-8 string.
1066 (For invalid UTF-8 the U+fffd replacement character will be counted.)
1068 This function is safe even for invalid UTF-8 strings.
1071 text..........zero-terminated(!) UTF-8 string (may be invalid)
1073 nbytes........strlen(text). (This is needed to completely emulate
1077 the number of u2s needed to hold this string in UTF-16 encoding.
1078 There is _no_ terminating zero included in this count.
1080 *******************************************************************************/
1082 s4 utf8_safe_number_of_u2s(const char *text, s4 nbytes) {
1083 register const unsigned char *t;
1086 register const unsigned char *tlimit;
1094 assert(nbytes >= 0);
1097 t = (const unsigned char *) text;
1098 tlimit = t + nbytes;
1100 /* CAUTION: Keep this code in sync with utf8_safe_convert_to_u2s! */
1106 /* highest bit set, non-ASCII character */
1108 if ((byte & 0xe0) == 0xc0) {
1109 /* 2-byte: should be 110..... 10...... ? */
1111 if ((*t++ & 0xc0) == 0x80)
1112 ; /* valid 2-byte */
1116 else if ((byte & 0xf0) == 0xe0) {
1117 /* 3-byte: should be 1110.... 10...... 10...... */
1121 return len + 1; /* invalid, stop here */
1123 if ((*t++ & 0xc0) == 0x80) {
1124 if ((*t++ & 0xc0) == 0x80)
1125 ; /* valid 3-byte */
1132 else if ((byte & 0xf8) == 0xf0) {
1133 /* 4-byte: should be 11110... 10...... 10...... 10...... */
1137 return len + 1; /* invalid, stop here */
1139 if (((byte1 = *t++) & 0xc0) == 0x80) {
1140 if (((byte2 = *t++) & 0xc0) == 0x80) {
1141 if (((byte3 = *t++) & 0xc0) == 0x80) {
1142 /* valid 4-byte UTF-8? */
1143 value = ((byte & 0x07) << 18)
1144 | ((byte1 & 0x3f) << 12)
1145 | ((byte2 & 0x3f) << 6)
1146 | ((byte3 & 0x3f) );
1148 if (value > 0x10FFFF)
1150 else if (value > 0xFFFF)
1151 len += 1; /* we need surrogates */
1153 ; /* 16bit suffice */
1164 else if ((byte & 0xfc) == 0xf8) {
1165 /* invalid 5-byte */
1167 return len + 1; /* invalid, stop here */
1170 for (; skip && ((*t & 0xc0) == 0x80); --skip)
1173 else if ((byte & 0xfe) == 0xfc) {
1174 /* invalid 6-byte */
1176 return len + 1; /* invalid, stop here */
1179 for (; skip && ((*t & 0xc0) == 0x80); --skip)
1191 /* ASCII character, common case */
1201 /* utf8_safe_convert_to_u2s ****************************************************
1203 Convert the given UTF-8 string to UTF-16 into a pre-allocated buffer.
1204 (Invalid UTF-8 will be replaced with the U+fffd replacement character.)
1205 Use utf8_safe_number_of_u2s to determine the number of u2s to allocate.
1207 This function is safe even for invalid UTF-8 strings.
1210 text..........zero-terminated(!) UTF-8 string (may be invalid)
1212 nbytes........strlen(text). (This is needed to completely emulate
1214 buffer........a preallocated array of u2s to receive the decoded
1215 string. Use utf8_safe_number_of_u2s to get the
1216 required number of u2s for allocating this.
1218 *******************************************************************************/
1220 #define UNICODE_REPLACEMENT 0xfffd
1222 void utf8_safe_convert_to_u2s(const char *text, s4 nbytes, u2 *buffer) {
1223 register const unsigned char *t;
1225 register const unsigned char *tlimit;
1233 assert(nbytes >= 0);
1235 t = (const unsigned char *) text;
1236 tlimit = t + nbytes;
1238 /* CAUTION: Keep this code in sync with utf8_safe_number_of_u2s! */
1244 /* highest bit set, non-ASCII character */
1246 if ((byte & 0xe0) == 0xc0) {
1247 /* 2-byte: should be 110..... 10...... */
1249 if (((byte1 = *t++) & 0xc0) == 0x80) {
1250 /* valid 2-byte UTF-8 */
1251 *buffer++ = ((byte & 0x1f) << 6)
1252 | ((byte1 & 0x3f) );
1255 *buffer++ = UNICODE_REPLACEMENT;
1259 else if ((byte & 0xf0) == 0xe0) {
1260 /* 3-byte: should be 1110.... 10...... 10...... */
1262 if (t + 2 > tlimit) {
1263 *buffer++ = UNICODE_REPLACEMENT;
1267 if (((byte1 = *t++) & 0xc0) == 0x80) {
1268 if (((byte2 = *t++) & 0xc0) == 0x80) {
1269 /* valid 3-byte UTF-8 */
1270 *buffer++ = ((byte & 0x0f) << 12)
1271 | ((byte1 & 0x3f) << 6)
1272 | ((byte2 & 0x3f) );
1275 *buffer++ = UNICODE_REPLACEMENT;
1280 *buffer++ = UNICODE_REPLACEMENT;
1284 else if ((byte & 0xf8) == 0xf0) {
1285 /* 4-byte: should be 11110... 10...... 10...... 10...... */
1287 if (t + 3 > tlimit) {
1288 *buffer++ = UNICODE_REPLACEMENT;
1292 if (((byte1 = *t++) & 0xc0) == 0x80) {
1293 if (((byte2 = *t++) & 0xc0) == 0x80) {
1294 if (((byte3 = *t++) & 0xc0) == 0x80) {
1295 /* valid 4-byte UTF-8? */
1296 value = ((byte & 0x07) << 18)
1297 | ((byte1 & 0x3f) << 12)
1298 | ((byte2 & 0x3f) << 6)
1299 | ((byte3 & 0x3f) );
1301 if (value > 0x10FFFF) {
1302 *buffer++ = UNICODE_REPLACEMENT;
1304 else if (value > 0xFFFF) {
1305 /* we need surrogates */
1306 *buffer++ = 0xd800 | ((value >> 10) - 0x40);
1307 *buffer++ = 0xdc00 | (value & 0x03ff);
1310 *buffer++ = value; /* 16bit suffice */
1313 *buffer++ = UNICODE_REPLACEMENT;
1318 *buffer++ = UNICODE_REPLACEMENT;
1323 *buffer++ = UNICODE_REPLACEMENT;
1327 else if ((byte & 0xfc) == 0xf8) {
1328 if (t + 4 > tlimit) {
1329 *buffer++ = UNICODE_REPLACEMENT;
1334 for (; skip && ((*t & 0xc0) == 0x80); --skip)
1336 *buffer++ = UNICODE_REPLACEMENT;
1338 else if ((byte & 0xfe) == 0xfc) {
1339 if (t + 5 > tlimit) {
1340 *buffer++ = UNICODE_REPLACEMENT;
1345 for (; skip && ((*t & 0xc0) == 0x80); --skip)
1347 *buffer++ = UNICODE_REPLACEMENT;
1350 *buffer++ = UNICODE_REPLACEMENT;
1358 /* ASCII character, common case */
1366 /* u2_utflength ****************************************************************
1368 Returns the utf length in bytes of a u2 array.
1370 *******************************************************************************/
1372 u4 u2_utflength(u2 *text, u4 u2_length)
1374 u4 result_len = 0; /* utf length in bytes */
1375 u2 ch; /* current unicode character */
1378 for (len = 0; len < u2_length; len++) {
1379 /* next unicode character */
1382 /* determine bytes required to store unicode character as utf */
1383 if (ch && (ch < 0x80))
1385 else if (ch < 0x800)
1395 /* utf_copy ********************************************************************
1397 Copy the given utf string byte-for-byte to a buffer.
1400 buffer.......the buffer
1401 u............the utf string
1403 *******************************************************************************/
1405 void utf_copy(char *buffer, utf *u)
1407 /* our utf strings are zero-terminated (done by utf_new) */
1408 MCOPY(buffer, u->text, char, u->blength + 1);
1412 /* utf_cat *********************************************************************
1414 Append the given utf string byte-for-byte to a buffer.
1417 buffer.......the buffer
1418 u............the utf string
1420 *******************************************************************************/
1422 void utf_cat(char *buffer, utf *u)
1424 /* our utf strings are zero-terminated (done by utf_new) */
1425 MCOPY(buffer + strlen(buffer), u->text, char, u->blength + 1);
1429 /* utf_copy_classname **********************************************************
1431 Copy the given utf classname byte-for-byte to a buffer.
1432 '/' is replaced by '.'
1435 buffer.......the buffer
1436 u............the utf string
1438 *******************************************************************************/
1440 void utf_copy_classname(char *buffer, utf *u)
1449 endptr = UTF_END(u) + 1; /* utfs are zero-terminared by utf_new */
1451 while (srcptr != endptr) {
1460 /* utf_cat *********************************************************************
1462 Append the given utf classname byte-for-byte to a buffer.
1463 '/' is replaced by '.'
1466 buffer.......the buffer
1467 u............the utf string
1469 *******************************************************************************/
1471 void utf_cat_classname(char *buffer, utf *u)
1473 utf_copy_classname(buffer + strlen(buffer), u);
1476 /* utf_display_printable_ascii *************************************************
1478 Write utf symbol to stdout (for debugging purposes).
1479 Non-printable and non-ASCII characters are printed as '?'.
1481 *******************************************************************************/
1483 void utf_display_printable_ascii(utf *u)
1485 char *endpos; /* points behind utf string */
1486 char *utf_ptr; /* current position in utf text */
1494 endpos = UTF_END(u);
1497 while (utf_ptr < endpos) {
1498 /* read next unicode character */
1500 u2 c = utf_nextu2(&utf_ptr);
1502 if ((c >= 32) && (c <= 127))
1512 /* utf_display_printable_ascii_classname ***************************************
1514 Write utf symbol to stdout with `/' converted to `.' (for debugging
1516 Non-printable and non-ASCII characters are printed as '?'.
1518 *******************************************************************************/
1520 void utf_display_printable_ascii_classname(utf *u)
1522 char *endpos; /* points behind utf string */
1523 char *utf_ptr; /* current position in utf text */
1531 endpos = UTF_END(u);
1534 while (utf_ptr < endpos) {
1535 /* read next unicode character */
1537 u2 c = utf_nextu2(&utf_ptr);
1542 if ((c >= 32) && (c <= 127))
1552 /* utf_sprint_convert_to_latin1 ************************************************
1554 Write utf symbol into c-string (for debugging purposes).
1555 Characters are converted to 8-bit Latin-1, non-Latin-1 characters yield
1558 *******************************************************************************/
1560 void utf_sprint_convert_to_latin1(char *buffer, utf *u)
1562 char *endpos; /* points behind utf string */
1563 char *utf_ptr; /* current position in utf text */
1564 u2 pos = 0; /* position in c-string */
1567 strcpy(buffer, "NULL");
1571 endpos = UTF_END(u);
1574 while (utf_ptr < endpos)
1575 /* copy next unicode character */
1576 buffer[pos++] = utf_nextu2(&utf_ptr);
1578 /* terminate string */
1583 /* utf_sprint_convert_to_latin1_classname **************************************
1585 Write utf symbol into c-string with `/' converted to `.' (for debugging
1587 Characters are converted to 8-bit Latin-1, non-Latin-1 characters yield
1590 *******************************************************************************/
1592 void utf_sprint_convert_to_latin1_classname(char *buffer, utf *u)
1594 char *endpos; /* points behind utf string */
1595 char *utf_ptr; /* current position in utf text */
1596 u2 pos = 0; /* position in c-string */
1599 strcpy(buffer, "NULL");
1603 endpos = UTF_END(u);
1606 while (utf_ptr < endpos) {
1607 /* copy next unicode character */
1608 u2 c = utf_nextu2(&utf_ptr);
1609 if (c == '/') c = '.';
1613 /* terminate string */
1618 /* utf_strcat_convert_to_latin1 ************************************************
1620 Like libc strcat, but uses an utf8 string.
1621 Characters are converted to 8-bit Latin-1, non-Latin-1 characters yield
1624 *******************************************************************************/
1626 void utf_strcat_convert_to_latin1(char *buffer, utf *u)
1628 utf_sprint_convert_to_latin1(buffer + strlen(buffer), u);
1632 /* utf_strcat_convert_to_latin1_classname **************************************
1634 Like libc strcat, but uses an utf8 string.
1635 Characters are converted to 8-bit Latin-1, non-Latin-1 characters yield
1638 *******************************************************************************/
1640 void utf_strcat_convert_to_latin1_classname(char *buffer, utf *u)
1642 utf_sprint_convert_to_latin1_classname(buffer + strlen(buffer), u);
1646 /* utf_fprint_printable_ascii **************************************************
1648 Write utf symbol into file.
1649 Non-printable and non-ASCII characters are printed as '?'.
1651 *******************************************************************************/
1653 void utf_fprint_printable_ascii(FILE *file, utf *u)
1655 char *endpos; /* points behind utf string */
1656 char *utf_ptr; /* current position in utf text */
1661 endpos = UTF_END(u);
1664 while (utf_ptr < endpos) {
1665 /* read next unicode character */
1666 u2 c = utf_nextu2(&utf_ptr);
1668 if (c >= 32 && c <= 127) fprintf(file, "%c", c);
1669 else fprintf(file, "?");
1674 /* utf_fprint_printable_ascii_classname ****************************************
1676 Write utf symbol into file with `/' converted to `.'.
1677 Non-printable and non-ASCII characters are printed as '?'.
1679 *******************************************************************************/
1681 void utf_fprint_printable_ascii_classname(FILE *file, utf *u)
1683 char *endpos; /* points behind utf string */
1684 char *utf_ptr; /* current position in utf text */
1689 endpos = UTF_END(u);
1692 while (utf_ptr < endpos) {
1693 /* read next unicode character */
1694 u2 c = utf_nextu2(&utf_ptr);
1695 if (c == '/') c = '.';
1697 if (c >= 32 && c <= 127) fprintf(file, "%c", c);
1698 else fprintf(file, "?");
1703 /* is_valid_utf ****************************************************************
1705 Return true if the given string is a valid UTF-8 string.
1707 utf_ptr...points to first character
1708 end_pos...points after last character
1710 *******************************************************************************/
1712 /* static unsigned long min_codepoint[6] = {0,1L<<7,1L<<11,1L<<16,1L<<21,1L<<26}; */
1714 bool is_valid_utf(char *utf_ptr, char *end_pos)
1721 if (end_pos < utf_ptr) return false;
1722 bytes = end_pos - utf_ptr;
1726 if (!c) return false; /* 0x00 is not allowed */
1727 if ((c & 0x80) == 0) continue; /* ASCII */
1729 if ((c & 0xe0) == 0xc0) len = 1; /* 110x xxxx */
1730 else if ((c & 0xf0) == 0xe0) len = 2; /* 1110 xxxx */
1731 else if ((c & 0xf8) == 0xf0) len = 3; /* 1111 0xxx */
1732 else if ((c & 0xfc) == 0xf8) len = 4; /* 1111 10xx */
1733 else if ((c & 0xfe) == 0xfc) len = 5; /* 1111 110x */
1734 else return false; /* invalid leading byte */
1736 if (len > 2) return false; /* Java limitation */
1738 v = (unsigned long)c & (0x3f >> len);
1740 if ((bytes -= len) < 0) return false; /* missing bytes */
1742 for (i = len; i--; ) {
1744 if ((c & 0xc0) != 0x80) /* 10xx xxxx */
1746 v = (v << 6) | (c & 0x3f);
1750 if (len != 1) return false; /* Java special */
1753 /* Sun Java seems to allow overlong UTF-8 encodings */
1755 /* if (v < min_codepoint[len]) */
1756 /* XXX throw exception? */
1759 /* surrogates in UTF-8 seem to be allowed in Java classfiles */
1760 /* if (v >= 0xd800 && v <= 0xdfff) return false; */ /* surrogates */
1762 /* even these seem to be allowed */
1763 /* if (v == 0xfffe || v == 0xffff) return false; */ /* invalid codepoints */
1770 /* is_valid_name ***************************************************************
1772 Return true if the given string may be used as a class/field/method
1773 name. (Currently this only disallows empty strings and control
1776 NOTE: The string is assumed to have passed is_valid_utf!
1778 utf_ptr...points to first character
1779 end_pos...points after last character
1781 *******************************************************************************/
1783 bool is_valid_name(char *utf_ptr, char *end_pos)
1785 if (end_pos <= utf_ptr) return false; /* disallow empty names */
1787 while (utf_ptr < end_pos) {
1788 unsigned char c = *utf_ptr++;
1790 if (c < 0x20) return false; /* disallow control characters */
1791 if (c == 0xc0 && (unsigned char) *utf_ptr == 0x80) /* disallow zero */
1798 bool is_valid_name_utf(utf *u)
1800 return is_valid_name(u->text, UTF_END(u));
1804 /* utf_show ********************************************************************
1806 Writes the utf symbols in the utfhash to stdout and displays the
1807 number of external hash chains grouped according to the chainlength
1808 (for debugging purposes).
1810 *******************************************************************************/
1812 #if !defined(NDEBUG)
1816 #define CHAIN_LIMIT 20 /* limit for seperated enumeration */
1818 u4 chain_count[CHAIN_LIMIT]; /* numbers of chains */
1819 u4 max_chainlength = 0; /* maximum length of the chains */
1820 u4 sum_chainlength = 0; /* sum of the chainlengths */
1821 u4 beyond_limit = 0; /* number of utf-symbols in chains with length>=CHAIN_LIMIT-1 */
1824 printf("UTF-HASH:\n");
1826 /* show element of utf-hashtable */
1828 for (i = 0; i < hashtable_utf->size; i++) {
1829 utf *u = hashtable_utf->ptr[i];
1832 printf("SLOT %d: ", (int) i);
1836 utf_display_printable_ascii(u);
1844 printf("UTF-HASH: %d slots for %d entries\n",
1845 (int) hashtable_utf->size, (int) hashtable_utf->entries );
1847 if (hashtable_utf->entries == 0)
1850 printf("chains:\n chainlength number of chains %% of utfstrings\n");
1852 for (i=0;i<CHAIN_LIMIT;i++)
1855 /* count numbers of hashchains according to their length */
1856 for (i=0; i<hashtable_utf->size; i++) {
1858 utf *u = (utf*) hashtable_utf->ptr[i];
1859 u4 chain_length = 0;
1861 /* determine chainlength */
1867 /* update sum of all chainlengths */
1868 sum_chainlength+=chain_length;
1870 /* determine the maximum length of the chains */
1871 if (chain_length>max_chainlength)
1872 max_chainlength = chain_length;
1874 /* update number of utf-symbols in chains with length>=CHAIN_LIMIT-1 */
1875 if (chain_length>=CHAIN_LIMIT) {
1876 beyond_limit+=chain_length;
1877 chain_length=CHAIN_LIMIT-1;
1880 /* update number of hashchains of current length */
1881 chain_count[chain_length]++;
1884 /* display results */
1885 for (i=1;i<CHAIN_LIMIT-1;i++)
1886 printf(" %2d %17d %18.2f%%\n",i,chain_count[i],(((float) chain_count[i]*i*100)/hashtable_utf->entries));
1888 printf(" >=%2d %17d %18.2f%%\n",CHAIN_LIMIT-1,chain_count[CHAIN_LIMIT-1],((float) beyond_limit*100)/hashtable_utf->entries);
1891 printf("max. chainlength:%5d\n",max_chainlength);
1893 /* avg. chainlength = sum of chainlengths / number of chains */
1894 printf("avg. chainlength:%5.2f\n",(float) sum_chainlength / (hashtable_utf->size-chain_count[0]));
1896 #endif /* !defined(NDEBUG) */
1900 * These are local overrides for various environment variables in Emacs.
1901 * Please do not remove this and leave it at the end of the file, where
1902 * Emacs will automagically detect them.
1903 * ---------------------------------------------------------------------
1906 * indent-tabs-mode: t
1910 * vim:noexpandtab:sw=4:ts=4: