1 /* src/vmcore/utf8.c - utf8 string functions
3 Copyright (C) 1996-2005, 2006, 2007, 2008
4 CACAOVM - Verein zur Foerderung der freien virtuellen Maschine CACAO
6 This file is part of CACAO.
8 This program is free software; you can redistribute it and/or
9 modify it under the terms of the GNU General Public License as
10 published by the Free Software Foundation; either version 2, or (at
11 your option) any later version.
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
33 #include "mm/memory.h"
35 #include "threads/lock-common.h"
37 #include "toolbox/hashtable.h"
39 #include "vm/exceptions.h"
41 #include "vmcore/options.h"
43 #if defined(ENABLE_STATISTICS)
44 # include "vmcore/statistics.h"
47 #include "vmcore/utf8.h"
50 /* global variables ***********************************************************/
52 /* hashsize must be power of 2 */
54 #define HASHTABLE_UTF_SIZE 16384 /* initial size of utf-hash */
56 hashtable *hashtable_utf; /* hashtable for utf8-symbols */
59 /* utf-symbols for pointer comparison of frequently used strings **************/
61 utf *utf_java_lang_Object;
63 utf *utf_java_lang_Class;
64 utf *utf_java_lang_ClassLoader;
65 utf *utf_java_lang_Cloneable;
66 utf *utf_java_lang_SecurityManager;
67 utf *utf_java_lang_String;
68 utf *utf_java_lang_ThreadGroup;
69 utf *utf_java_lang_ref_SoftReference;
70 utf *utf_java_lang_ref_WeakReference;
71 utf *utf_java_lang_ref_PhantomReference;
72 utf *utf_java_io_Serializable;
74 utf *utf_java_lang_Throwable;
75 utf *utf_java_lang_Error;
77 utf *utf_java_lang_AbstractMethodError;
78 utf *utf_java_lang_ClassCircularityError;
79 utf *utf_java_lang_ClassFormatError;
80 utf *utf_java_lang_ExceptionInInitializerError;
81 utf *utf_java_lang_IncompatibleClassChangeError;
82 utf *utf_java_lang_InstantiationError;
83 utf *utf_java_lang_InternalError;
84 utf *utf_java_lang_LinkageError;
85 utf *utf_java_lang_NoClassDefFoundError;
86 utf *utf_java_lang_NoSuchFieldError;
87 utf *utf_java_lang_NoSuchMethodError;
88 utf *utf_java_lang_OutOfMemoryError;
89 utf *utf_java_lang_UnsatisfiedLinkError;
90 utf *utf_java_lang_UnsupportedClassVersionError;
91 utf *utf_java_lang_VerifyError;
92 utf *utf_java_lang_VirtualMachineError;
94 utf *utf_java_lang_Exception;
96 utf *utf_java_lang_ArithmeticException;
97 utf *utf_java_lang_ArrayIndexOutOfBoundsException;
98 utf *utf_java_lang_ArrayStoreException;
99 utf *utf_java_lang_ClassCastException;
100 utf *utf_java_lang_ClassNotFoundException;
101 utf *utf_java_lang_CloneNotSupportedException;
102 utf *utf_java_lang_IllegalAccessException;
103 utf *utf_java_lang_IllegalArgumentException;
104 utf *utf_java_lang_IllegalMonitorStateException;
105 utf *utf_java_lang_InstantiationException;
106 utf *utf_java_lang_InterruptedException;
107 utf *utf_java_lang_NegativeArraySizeException;
108 utf *utf_java_lang_NullPointerException;
109 utf *utf_java_lang_StringIndexOutOfBoundsException;
111 utf *utf_java_lang_reflect_InvocationTargetException;
113 utf *utf_java_security_PrivilegedActionException;
115 #if defined(ENABLE_JAVASE)
116 utf* utf_java_lang_Void;
119 utf* utf_java_lang_Boolean;
120 utf* utf_java_lang_Byte;
121 utf* utf_java_lang_Character;
122 utf* utf_java_lang_Short;
123 utf* utf_java_lang_Integer;
124 utf* utf_java_lang_Long;
125 utf* utf_java_lang_Float;
126 utf* utf_java_lang_Double;
128 #if defined(ENABLE_JAVASE)
129 utf *utf_java_lang_StackTraceElement;
130 utf *utf_java_lang_reflect_Constructor;
131 utf *utf_java_lang_reflect_Field;
132 utf *utf_java_lang_reflect_Method;
133 utf *utf_java_util_Vector;
136 utf *utf_InnerClasses; /* InnerClasses */
137 utf *utf_ConstantValue; /* ConstantValue */
138 utf *utf_Code; /* Code */
139 utf *utf_Exceptions; /* Exceptions */
140 utf *utf_LineNumberTable; /* LineNumberTable */
141 utf *utf_SourceFile; /* SourceFile */
143 #if defined(ENABLE_JAVASE)
144 utf *utf_EnclosingMethod;
146 utf *utf_StackMapTable;
148 #if defined(ENABLE_ANNOTATIONS)
149 utf *utf_RuntimeVisibleAnnotations; /* RuntimeVisibleAnnotations */
150 utf *utf_RuntimeInvisibleAnnotations; /* RuntimeInvisibleAnnotations */
151 utf *utf_RuntimeVisibleParameterAnnotations; /* RuntimeVisibleParameterAnnotations */
152 utf *utf_RuntimeInvisibleParameterAnnotations; /* RuntimeInvisibleParameterAnnotations */
153 utf *utf_AnnotationDefault; /* AnnotationDefault */
157 utf *utf_init; /* <init> */
158 utf *utf_clinit; /* <clinit> */
159 utf *utf_clone; /* clone */
160 utf *utf_finalize; /* finalize */
163 utf *utf_run; /* run */
168 utf *utf_removeThread;
171 utf *utf_uncaughtException;
174 utf *utf_fillInStackTrace;
176 utf *utf_getSystemClassLoader;
179 utf *utf_loadClassInternal;
180 utf *utf_printStackTrace;
182 utf *utf_division_by_zero;
193 utf *utf_void__void; /* ()V */
194 utf *utf_boolean__void; /* (Z)V */
195 utf *utf_byte__void; /* (B)V */
196 utf *utf_char__void; /* (C)V */
197 utf *utf_short__void; /* (S)V */
198 utf *utf_int__void; /* (I)V */
199 utf *utf_long__void; /* (J)V */
200 utf *utf_float__void; /* (F)V */
201 utf *utf_double__void; /* (D)V */
203 utf *utf_void__java_lang_ClassLoader; /* ()Ljava/lang/ClassLoader; */
204 utf *utf_void__java_lang_Object; /* ()Ljava/lang/Object; */
205 utf *utf_void__java_lang_Throwable; /* ()Ljava/lang/Throwable; */
206 utf *utf_java_lang_ClassLoader_java_lang_String__J;
207 utf *utf_java_lang_Exception__V; /* (Ljava/lang/Exception;)V */
208 utf *utf_java_lang_Object__java_lang_Object;
209 utf *utf_java_lang_String__void; /* (Ljava/lang/String;)V */
210 utf *utf_java_lang_String__java_lang_Class;
211 utf *utf_java_lang_Thread__V; /* (Ljava/lang/Thread;)V */
212 utf *utf_java_lang_Thread_java_lang_Throwable__V;
213 utf *utf_Ljava_lang_ThreadGroup_Ljava_lang_String__V;
214 utf *utf_java_lang_Throwable__void; /* (Ljava/lang/Throwable;)V */
215 utf *utf_java_lang_Throwable__java_lang_Throwable;
217 utf *utf_not_named_yet; /* special name for unnamed classes */
219 utf *array_packagename;
222 /* utf_init ********************************************************************
224 Initializes the utf8 subsystem.
226 *******************************************************************************/
230 TRACESUBSYSTEMINITIALIZATION("utf8_init");
232 /* create utf8 hashtable */
234 hashtable_utf = NEW(hashtable);
236 hashtable_create(hashtable_utf, HASHTABLE_UTF_SIZE);
238 #if defined(ENABLE_STATISTICS)
240 count_utf_len += sizeof(utf*) * hashtable_utf->size;
243 /* create utf-symbols for pointer comparison of frequently used strings */
245 utf_java_lang_Object = utf_new_char("java/lang/Object");
247 utf_java_lang_Class = utf_new_char("java/lang/Class");
248 utf_java_lang_ClassLoader = utf_new_char("java/lang/ClassLoader");
249 utf_java_lang_Cloneable = utf_new_char("java/lang/Cloneable");
250 utf_java_lang_SecurityManager = utf_new_char("java/lang/SecurityManager");
251 utf_java_lang_String = utf_new_char("java/lang/String");
252 utf_java_lang_ThreadGroup = utf_new_char("java/lang/ThreadGroup");
254 utf_java_lang_ref_SoftReference =
255 utf_new_char("java/lang/ref/SoftReference");
257 utf_java_lang_ref_WeakReference =
258 utf_new_char("java/lang/ref/WeakReference");
260 utf_java_lang_ref_PhantomReference =
261 utf_new_char("java/lang/ref/PhantomReference");
263 utf_java_io_Serializable = utf_new_char("java/io/Serializable");
265 utf_java_lang_Throwable = utf_new_char("java/lang/Throwable");
266 utf_java_lang_Error = utf_new_char("java/lang/Error");
268 utf_java_lang_ClassCircularityError =
269 utf_new_char("java/lang/ClassCircularityError");
271 utf_java_lang_ClassFormatError = utf_new_char("java/lang/ClassFormatError");
273 utf_java_lang_ExceptionInInitializerError =
274 utf_new_char("java/lang/ExceptionInInitializerError");
276 utf_java_lang_IncompatibleClassChangeError =
277 utf_new_char("java/lang/IncompatibleClassChangeError");
279 utf_java_lang_InstantiationError =
280 utf_new_char("java/lang/InstantiationError");
282 utf_java_lang_InternalError = utf_new_char("java/lang/InternalError");
283 utf_java_lang_LinkageError = utf_new_char("java/lang/LinkageError");
285 utf_java_lang_NoClassDefFoundError =
286 utf_new_char("java/lang/NoClassDefFoundError");
288 utf_java_lang_OutOfMemoryError = utf_new_char("java/lang/OutOfMemoryError");
290 utf_java_lang_UnsatisfiedLinkError =
291 utf_new_char("java/lang/UnsatisfiedLinkError");
293 utf_java_lang_UnsupportedClassVersionError =
294 utf_new_char("java/lang/UnsupportedClassVersionError");
296 utf_java_lang_VerifyError = utf_new_char("java/lang/VerifyError");
298 utf_java_lang_VirtualMachineError =
299 utf_new_char("java/lang/VirtualMachineError");
301 #if defined(ENABLE_JAVASE)
302 utf_java_lang_AbstractMethodError =
303 utf_new_char("java/lang/AbstractMethodError");
305 utf_java_lang_NoSuchFieldError =
306 utf_new_char("java/lang/NoSuchFieldError");
308 utf_java_lang_NoSuchMethodError =
309 utf_new_char("java/lang/NoSuchMethodError");
312 utf_java_lang_Exception = utf_new_char("java/lang/Exception");
314 utf_java_lang_ArithmeticException =
315 utf_new_char("java/lang/ArithmeticException");
317 utf_java_lang_ArrayIndexOutOfBoundsException =
318 utf_new_char("java/lang/ArrayIndexOutOfBoundsException");
320 utf_java_lang_ArrayStoreException =
321 utf_new_char("java/lang/ArrayStoreException");
323 utf_java_lang_ClassCastException =
324 utf_new_char("java/lang/ClassCastException");
326 utf_java_lang_ClassNotFoundException =
327 utf_new_char("java/lang/ClassNotFoundException");
329 utf_java_lang_CloneNotSupportedException =
330 utf_new_char("java/lang/CloneNotSupportedException");
332 utf_java_lang_IllegalAccessException =
333 utf_new_char("java/lang/IllegalAccessException");
335 utf_java_lang_IllegalArgumentException =
336 utf_new_char("java/lang/IllegalArgumentException");
338 utf_java_lang_IllegalMonitorStateException =
339 utf_new_char("java/lang/IllegalMonitorStateException");
341 utf_java_lang_InstantiationException =
342 utf_new_char("java/lang/InstantiationException");
344 utf_java_lang_InterruptedException =
345 utf_new_char("java/lang/InterruptedException");
347 utf_java_lang_NegativeArraySizeException =
348 utf_new_char("java/lang/NegativeArraySizeException");
350 utf_java_lang_NullPointerException =
351 utf_new_char("java/lang/NullPointerException");
353 utf_java_lang_StringIndexOutOfBoundsException =
354 utf_new_char("java/lang/StringIndexOutOfBoundsException");
356 utf_java_lang_reflect_InvocationTargetException =
357 utf_new_char("java/lang/reflect/InvocationTargetException");
359 utf_java_security_PrivilegedActionException =
360 utf_new_char("java/security/PrivilegedActionException");
362 #if defined(ENABLE_JAVASE)
363 utf_java_lang_Void = utf_new_char("java/lang/Void");
366 utf_java_lang_Boolean = utf_new_char("java/lang/Boolean");
367 utf_java_lang_Byte = utf_new_char("java/lang/Byte");
368 utf_java_lang_Character = utf_new_char("java/lang/Character");
369 utf_java_lang_Short = utf_new_char("java/lang/Short");
370 utf_java_lang_Integer = utf_new_char("java/lang/Integer");
371 utf_java_lang_Long = utf_new_char("java/lang/Long");
372 utf_java_lang_Float = utf_new_char("java/lang/Float");
373 utf_java_lang_Double = utf_new_char("java/lang/Double");
375 #if defined(ENABLE_JAVASE)
376 utf_java_lang_StackTraceElement =
377 utf_new_char("java/lang/StackTraceElement");
379 utf_java_lang_reflect_Constructor =
380 utf_new_char("java/lang/reflect/Constructor");
382 utf_java_lang_reflect_Field = utf_new_char("java/lang/reflect/Field");
383 utf_java_lang_reflect_Method = utf_new_char("java/lang/reflect/Method");
384 utf_java_util_Vector = utf_new_char("java/util/Vector");
387 utf_InnerClasses = utf_new_char("InnerClasses");
388 utf_ConstantValue = utf_new_char("ConstantValue");
389 utf_Code = utf_new_char("Code");
390 utf_Exceptions = utf_new_char("Exceptions");
391 utf_LineNumberTable = utf_new_char("LineNumberTable");
392 utf_SourceFile = utf_new_char("SourceFile");
394 #if defined(ENABLE_JAVASE)
395 utf_EnclosingMethod = utf_new_char("EnclosingMethod");
396 utf_Signature = utf_new_char("Signature");
397 utf_StackMapTable = utf_new_char("StackMapTable");
399 #if defined(ENABLE_ANNOTATIONS)
400 utf_RuntimeVisibleAnnotations = utf_new_char("RuntimeVisibleAnnotations");
401 utf_RuntimeInvisibleAnnotations = utf_new_char("RuntimeInvisibleAnnotations");
402 utf_RuntimeVisibleParameterAnnotations = utf_new_char("RuntimeVisibleParameterAnnotations");
403 utf_RuntimeInvisibleParameterAnnotations = utf_new_char("RuntimeInvisibleParameterAnnotations");
404 utf_AnnotationDefault = utf_new_char("AnnotationDefault");
408 utf_init = utf_new_char("<init>");
409 utf_clinit = utf_new_char("<clinit>");
410 utf_clone = utf_new_char("clone");
411 utf_finalize = utf_new_char("finalize");
412 utf_invoke = utf_new_char("invoke");
413 utf_main = utf_new_char("main");
414 utf_run = utf_new_char("run");
416 utf_add = utf_new_char("add");
417 utf_remove = utf_new_char("remove");
418 utf_addThread = utf_new_char("addThread");
419 utf_removeThread = utf_new_char("removeThread");
420 utf_put = utf_new_char("put");
421 utf_get = utf_new_char("get");
422 utf_uncaughtException = utf_new_char("uncaughtException");
423 utf_value = utf_new_char("value");
425 utf_fillInStackTrace = utf_new_char("fillInStackTrace");
426 utf_findNative = utf_new_char("findNative");
427 utf_getSystemClassLoader = utf_new_char("getSystemClassLoader");
428 utf_initCause = utf_new_char("initCause");
429 utf_loadClass = utf_new_char("loadClass");
430 utf_loadClassInternal = utf_new_char("loadClassInternal");
431 utf_printStackTrace = utf_new_char("printStackTrace");
433 utf_division_by_zero = utf_new_char("/ by zero");
435 utf_Z = utf_new_char("Z");
436 utf_B = utf_new_char("B");
437 utf_C = utf_new_char("C");
438 utf_S = utf_new_char("S");
439 utf_I = utf_new_char("I");
440 utf_J = utf_new_char("J");
441 utf_F = utf_new_char("F");
442 utf_D = utf_new_char("D");
444 utf_void__void = utf_new_char("()V");
445 utf_boolean__void = utf_new_char("(Z)V");
446 utf_byte__void = utf_new_char("(B)V");
447 utf_char__void = utf_new_char("(C)V");
448 utf_short__void = utf_new_char("(S)V");
449 utf_int__void = utf_new_char("(I)V");
450 utf_long__void = utf_new_char("(J)V");
451 utf_float__void = utf_new_char("(F)V");
452 utf_double__void = utf_new_char("(D)V");
453 utf_void__java_lang_Object = utf_new_char("()Ljava/lang/Object;");
454 utf_void__java_lang_Throwable = utf_new_char("()Ljava/lang/Throwable;");
456 utf_void__java_lang_ClassLoader =
457 utf_new_char("()Ljava/lang/ClassLoader;");
459 utf_java_lang_ClassLoader_java_lang_String__J =
460 utf_new_char("(Ljava/lang/ClassLoader;Ljava/lang/String;)J");
462 utf_java_lang_Exception__V = utf_new_char("(Ljava/lang/Exception;)V");
464 utf_java_lang_Object__java_lang_Object =
465 utf_new_char("(Ljava/lang/Object;)Ljava/lang/Object;");
467 utf_java_lang_String__void = utf_new_char("(Ljava/lang/String;)V");
469 utf_java_lang_String__java_lang_Class =
470 utf_new_char("(Ljava/lang/String;)Ljava/lang/Class;");
472 utf_java_lang_Thread__V = utf_new_char("(Ljava/lang/Thread;)V");
474 utf_java_lang_Thread_java_lang_Throwable__V =
475 utf_new_char("(Ljava/lang/Thread;Ljava/lang/Throwable;)V");
477 utf_Ljava_lang_ThreadGroup_Ljava_lang_String__V =
478 utf_new_char("(Ljava/lang/ThreadGroup;Ljava/lang/String;)V");
480 utf_java_lang_Throwable__void = utf_new_char("(Ljava/lang/Throwable;)V");
482 utf_java_lang_Throwable__java_lang_Throwable =
483 utf_new_char("(Ljava/lang/Throwable;)Ljava/lang/Throwable;");
485 utf_null = utf_new_char("null");
486 utf_not_named_yet = utf_new_char("\t<not_named_yet>");
487 array_packagename = utf_new_char("\t<the array package>");
491 /* utf_hashkey *****************************************************************
493 The hashkey is computed from the utf-text by using up to 8
494 characters. For utf-symbols longer than 15 characters 3 characters
495 are taken from the beginning and the end, 2 characters are taken
498 *******************************************************************************/
500 #define nbs(val) ((u4) *(++text) << val) /* get next byte, left shift by val */
501 #define fbs(val) ((u4) *( text) << val) /* get first byte, left shift by val */
503 u4 utf_hashkey(const char *text, u4 length)
505 const char *start_pos = text; /* pointer to utf text */
509 case 0: /* empty string */
512 case 1: return fbs(0);
513 case 2: return fbs(0) ^ nbs(3);
514 case 3: return fbs(0) ^ nbs(3) ^ nbs(5);
515 case 4: return fbs(0) ^ nbs(2) ^ nbs(4) ^ nbs(6);
516 case 5: return fbs(0) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(6);
517 case 6: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(5) ^ nbs(6);
518 case 7: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(5) ^ nbs(6);
519 case 8: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(5) ^ nbs(6) ^ nbs(7);
526 return a ^ nbs(4) ^ nbs(5) ^ nbs(6) ^ nbs(7) ^ nbs(8);
535 return a ^ nbs(6) ^ nbs(7) ^ nbs(8) ^ nbs(9);
544 return a ^ nbs(6) ^ nbs(7) ^ nbs(8) ^ nbs(9) ^ nbs(10);
556 return a ^ nbs(9) ^ nbs(10);
568 return a ^ nbs(9) ^ nbs(10);
579 return a ^ nbs(9) ^ nbs(10) ^ nbs(11);
590 return a ^ nbs(9) ^ nbs(10) ^ nbs(11);
592 default: /* 3 characters from beginning */
598 /* 2 characters from middle */
599 text = start_pos + (length / 2);
604 /* 3 characters from end */
605 text = start_pos + length - 4;
610 return a ^ nbs(10) ^ nbs(11);
614 /* utf_full_hashkey ************************************************************
616 This function computes a hash value using all bytes in the string.
618 The algorithm is the "One-at-a-time" algorithm as published
619 by Bob Jenkins on http://burtleburtle.net/bob/hash/doobs.html.
621 *******************************************************************************/
623 u4 utf_full_hashkey(const char *text, u4 length)
625 register const unsigned char *p = (const unsigned char *) text;
633 hash += (hash << 10);
637 hash ^= (hash >> 11);
638 hash += (hash << 15);
643 /* unicode_hashkey *************************************************************
645 Compute the hashkey of a unicode string.
647 *******************************************************************************/
649 u4 unicode_hashkey(u2 *text, u2 len)
651 return utf_hashkey((char *) text, len);
655 /* utf_new *********************************************************************
657 Creates a new utf-symbol, the text of the symbol is passed as a
658 u1-array. The function searches the utf-hashtable for a utf-symbol
659 with this text. On success the element returned, otherwise a new
660 hashtable element is created.
662 If the number of entries in the hashtable exceeds twice the size of
663 the hashtable slots a reorganization of the hashtable is done and
664 the utf symbols are copied to a new hashtable with doubled size.
666 *******************************************************************************/
668 utf *utf_new(const char *text, u2 length)
670 u4 key; /* hashkey computed from utf-text */
671 u4 slot; /* slot in hashtable */
672 utf *u; /* hashtable element */
675 LOCK_MONITOR_ENTER(hashtable_utf->header);
677 #if defined(ENABLE_STATISTICS)
682 key = utf_hashkey(text, length);
683 slot = key & (hashtable_utf->size - 1);
684 u = hashtable_utf->ptr[slot];
686 /* search external hash chain for utf-symbol */
689 if (u->blength == length) {
690 /* compare text of hashtable elements */
692 for (i = 0; i < length; i++)
693 if (text[i] != u->text[i])
696 #if defined(ENABLE_STATISTICS)
698 count_utf_new_found++;
701 /* symbol found in hashtable */
703 LOCK_MONITOR_EXIT(hashtable_utf->header);
709 u = u->hashlink; /* next element in external chain */
712 /* location in hashtable found, create new utf element */
716 u->blength = length; /* length in bytes of utfstring */
717 u->hashlink = hashtable_utf->ptr[slot]; /* link in external hashchain */
718 u->text = mem_alloc(length + 1);/* allocate memory for utf-text */
720 memcpy(u->text, text, length); /* copy utf-text */
721 u->text[length] = '\0';
723 #if defined(ENABLE_STATISTICS)
725 count_utf_len += sizeof(utf) + length + 1;
728 hashtable_utf->ptr[slot] = u; /* insert symbol into table */
729 hashtable_utf->entries++; /* update number of entries */
731 if (hashtable_utf->entries > (hashtable_utf->size * 2)) {
733 /* reorganization of hashtable, average length of the external
734 chains is approx. 2 */
736 hashtable *newhash; /* the new hashtable */
742 /* create new hashtable, double the size */
744 newhash = hashtable_resize(hashtable_utf, hashtable_utf->size * 2);
746 #if defined(ENABLE_STATISTICS)
748 count_utf_len += sizeof(utf*) * hashtable_utf->size;
751 /* transfer elements to new hashtable */
753 for (i = 0; i < hashtable_utf->size; i++) {
754 u = hashtable_utf->ptr[i];
758 slot = utf_hashkey(u->text, u->blength) & (newhash->size - 1);
760 u->hashlink = (utf *) newhash->ptr[slot];
761 newhash->ptr[slot] = u;
763 /* follow link in external hash chain */
769 /* dispose old table */
771 hashtable_free(hashtable_utf);
773 hashtable_utf = newhash;
776 LOCK_MONITOR_EXIT(hashtable_utf->header);
782 /* utf_new_u2 ******************************************************************
784 Make utf symbol from u2 array, if isclassname is true '.' is
787 *******************************************************************************/
789 utf *utf_new_u2(u2 *unicode_pos, u4 unicode_length, bool isclassname)
791 char *buffer; /* memory buffer for unicode characters */
792 char *pos; /* pointer to current position in buffer */
793 u4 left; /* unicode characters left */
794 u4 buflength; /* utf length in bytes of the u2 array */
795 utf *result; /* resulting utf-string */
798 /* determine utf length in bytes and allocate memory */
800 buflength = u2_utflength(unicode_pos, unicode_length);
801 buffer = MNEW(char, buflength);
806 for (i = 0; i++ < unicode_length; unicode_pos++) {
807 /* next unicode character */
810 if ((c != 0) && (c < 0x80)) {
813 if ((int) left < 0) break;
814 /* convert classname */
815 if (isclassname && c == '.')
820 } else if (c < 0x800) {
822 unsigned char high = c >> 6;
823 unsigned char low = c & 0x3F;
825 if ((int) left < 0) break;
826 *pos++ = high | 0xC0;
832 char mid = (c >> 6) & 0x3F;
835 if ((int) left < 0) break;
836 *pos++ = high | 0xE0;
842 /* insert utf-string into symbol-table */
843 result = utf_new(buffer,buflength);
845 MFREE(buffer, char, buflength);
851 /* utf_new_char ****************************************************************
853 Creates a new utf symbol, the text for this symbol is passed as a
854 c-string ( = char* ).
856 *******************************************************************************/
858 utf *utf_new_char(const char *text)
860 return utf_new(text, strlen(text));
864 /* utf_new_char_classname ******************************************************
866 Creates a new utf symbol, the text for this symbol is passed as a
867 c-string ( = char* ) "." characters are going to be replaced by
868 "/". Since the above function is used often, this is a separte
869 function, instead of an if.
871 *******************************************************************************/
873 utf *utf_new_char_classname(const char *text)
875 if (strchr(text, '.')) {
876 char *txt = strdup(text);
877 char *end = txt + strlen(txt);
881 for (c = txt; c < end; c++)
882 if (*c == '.') *c = '/';
884 tmpRes = utf_new(txt, strlen(txt));
890 return utf_new(text, strlen(text));
894 /* utf_nextu2 ******************************************************************
896 Read the next unicode character from the utf string and increment
897 the utf-string pointer accordingly.
899 CAUTION: This function is unsafe for input that was not checked
902 *******************************************************************************/
904 u2 utf_nextu2(char **utf_ptr)
906 /* uncompressed unicode character */
908 /* current position in utf text */
909 unsigned char *utf = (unsigned char *) (*utf_ptr);
910 /* bytes representing the unicode character */
911 unsigned char ch1, ch2, ch3;
912 /* number of bytes used to represent the unicode character */
915 switch ((ch1 = utf[0]) >> 4) {
916 default: /* 1 byte */
920 case 0xD: /* 2 bytes */
921 if (((ch2 = utf[1]) & 0xC0) == 0x80) {
922 unsigned char high = ch1 & 0x1F;
923 unsigned char low = ch2 & 0x3F;
924 unicode_char = (high << 6) + low;
929 case 0xE: /* 2 or 3 bytes */
930 if (((ch2 = utf[1]) & 0xC0) == 0x80) {
931 if (((ch3 = utf[2]) & 0xC0) == 0x80) {
932 unsigned char low = ch3 & 0x3f;
933 unsigned char mid = ch2 & 0x3f;
934 unsigned char high = ch1 & 0x0f;
935 unicode_char = (((high << 6) + mid) << 6) + low;
943 /* update position in utf-text */
944 *utf_ptr = (char *) (utf + len);
950 /* utf_bytes *******************************************************************
952 Determine number of bytes (aka. octets) in the utf string.
955 u............utf string
958 The number of octets of this utf string.
959 There is _no_ terminating zero included in this count.
961 *******************************************************************************/
969 /* utf_get_number_of_u2s_for_buffer ********************************************
971 Determine number of UTF-16 u2s in the given UTF-8 buffer
973 CAUTION: This function is unsafe for input that was not checked
976 CAUTION: Use this function *only* when you want to convert an UTF-8 buffer
977 to an array of u2s (UTF-16) and want to know how many of them you will get.
978 All other uses of this function are probably wrong.
981 buffer........points to first char in buffer
982 blength.......number of _bytes_ in the buffer
985 the number of u2s needed to hold this string in UTF-16 encoding.
986 There is _no_ terminating zero included in this count.
988 NOTE: Unlike utf_get_number_of_u2s, this function never throws an
991 *******************************************************************************/
993 u4 utf_get_number_of_u2s_for_buffer(const char *buffer, u4 blength)
995 const char *endpos; /* points behind utf string */
996 const char *utf_ptr; /* current position in utf text */
997 u4 len = 0; /* number of unicode characters */
1000 endpos = utf_ptr + blength;
1002 while (utf_ptr < endpos) {
1004 /* next unicode character */
1005 utf_nextu2((char **)&utf_ptr);
1008 assert(utf_ptr == endpos);
1014 /* utf_get_number_of_u2s *******************************************************
1016 Determine number of UTF-16 u2s in the utf string.
1018 CAUTION: This function is unsafe for input that was not checked
1021 CAUTION: Use this function *only* when you want to convert a utf string
1022 to an array of u2s and want to know how many of them you will get.
1023 All other uses of this function are probably wrong.
1026 u............utf string
1029 the number of u2s needed to hold this string in UTF-16 encoding.
1030 There is _no_ terminating zero included in this count.
1031 XXX 0 if a NullPointerException has been thrown (see below)
1033 *******************************************************************************/
1035 u4 utf_get_number_of_u2s(utf *u)
1037 char *endpos; /* points behind utf string */
1038 char *utf_ptr; /* current position in utf text */
1039 u4 len = 0; /* number of unicode characters */
1041 /* XXX this is probably not checked by most callers! Review this after */
1042 /* the invalid uses of this function have been eliminated */
1044 exceptions_throw_nullpointerexception();
1048 endpos = UTF_END(u);
1051 while (utf_ptr < endpos) {
1053 /* next unicode character */
1054 utf_nextu2(&utf_ptr);
1057 if (utf_ptr != endpos) {
1058 /* string ended abruptly */
1059 exceptions_throw_internalerror("Illegal utf8 string");
1067 /* utf8_safe_number_of_u2s *****************************************************
1069 Determine number of UTF-16 u2s needed for decoding the given UTF-8 string.
1070 (For invalid UTF-8 the U+fffd replacement character will be counted.)
1072 This function is safe even for invalid UTF-8 strings.
1075 text..........zero-terminated(!) UTF-8 string (may be invalid)
1077 nbytes........strlen(text). (This is needed to completely emulate
1081 the number of u2s needed to hold this string in UTF-16 encoding.
1082 There is _no_ terminating zero included in this count.
1084 *******************************************************************************/
1086 s4 utf8_safe_number_of_u2s(const char *text, s4 nbytes) {
1087 register const unsigned char *t;
1090 register const unsigned char *tlimit;
1098 assert(nbytes >= 0);
1101 t = (const unsigned char *) text;
1102 tlimit = t + nbytes;
1104 /* CAUTION: Keep this code in sync with utf8_safe_convert_to_u2s! */
1110 /* highest bit set, non-ASCII character */
1112 if ((byte & 0xe0) == 0xc0) {
1113 /* 2-byte: should be 110..... 10...... ? */
1115 if ((*t++ & 0xc0) == 0x80)
1116 ; /* valid 2-byte */
1120 else if ((byte & 0xf0) == 0xe0) {
1121 /* 3-byte: should be 1110.... 10...... 10...... */
1125 return len + 1; /* invalid, stop here */
1127 if ((*t++ & 0xc0) == 0x80) {
1128 if ((*t++ & 0xc0) == 0x80)
1129 ; /* valid 3-byte */
1136 else if ((byte & 0xf8) == 0xf0) {
1137 /* 4-byte: should be 11110... 10...... 10...... 10...... */
1141 return len + 1; /* invalid, stop here */
1143 if (((byte1 = *t++) & 0xc0) == 0x80) {
1144 if (((byte2 = *t++) & 0xc0) == 0x80) {
1145 if (((byte3 = *t++) & 0xc0) == 0x80) {
1146 /* valid 4-byte UTF-8? */
1147 value = ((byte & 0x07) << 18)
1148 | ((byte1 & 0x3f) << 12)
1149 | ((byte2 & 0x3f) << 6)
1150 | ((byte3 & 0x3f) );
1152 if (value > 0x10FFFF)
1154 else if (value > 0xFFFF)
1155 len += 1; /* we need surrogates */
1157 ; /* 16bit suffice */
1168 else if ((byte & 0xfc) == 0xf8) {
1169 /* invalid 5-byte */
1171 return len + 1; /* invalid, stop here */
1174 for (; skip && ((*t & 0xc0) == 0x80); --skip)
1177 else if ((byte & 0xfe) == 0xfc) {
1178 /* invalid 6-byte */
1180 return len + 1; /* invalid, stop here */
1183 for (; skip && ((*t & 0xc0) == 0x80); --skip)
1195 /* ASCII character, common case */
1205 /* utf8_safe_convert_to_u2s ****************************************************
1207 Convert the given UTF-8 string to UTF-16 into a pre-allocated buffer.
1208 (Invalid UTF-8 will be replaced with the U+fffd replacement character.)
1209 Use utf8_safe_number_of_u2s to determine the number of u2s to allocate.
1211 This function is safe even for invalid UTF-8 strings.
1214 text..........zero-terminated(!) UTF-8 string (may be invalid)
1216 nbytes........strlen(text). (This is needed to completely emulate
1218 buffer........a preallocated array of u2s to receive the decoded
1219 string. Use utf8_safe_number_of_u2s to get the
1220 required number of u2s for allocating this.
1222 *******************************************************************************/
1224 #define UNICODE_REPLACEMENT 0xfffd
1226 void utf8_safe_convert_to_u2s(const char *text, s4 nbytes, u2 *buffer) {
1227 register const unsigned char *t;
1229 register const unsigned char *tlimit;
1237 assert(nbytes >= 0);
1239 t = (const unsigned char *) text;
1240 tlimit = t + nbytes;
1242 /* CAUTION: Keep this code in sync with utf8_safe_number_of_u2s! */
1248 /* highest bit set, non-ASCII character */
1250 if ((byte & 0xe0) == 0xc0) {
1251 /* 2-byte: should be 110..... 10...... */
1253 if (((byte1 = *t++) & 0xc0) == 0x80) {
1254 /* valid 2-byte UTF-8 */
1255 *buffer++ = ((byte & 0x1f) << 6)
1256 | ((byte1 & 0x3f) );
1259 *buffer++ = UNICODE_REPLACEMENT;
1263 else if ((byte & 0xf0) == 0xe0) {
1264 /* 3-byte: should be 1110.... 10...... 10...... */
1266 if (t + 2 > tlimit) {
1267 *buffer++ = UNICODE_REPLACEMENT;
1271 if (((byte1 = *t++) & 0xc0) == 0x80) {
1272 if (((byte2 = *t++) & 0xc0) == 0x80) {
1273 /* valid 3-byte UTF-8 */
1274 *buffer++ = ((byte & 0x0f) << 12)
1275 | ((byte1 & 0x3f) << 6)
1276 | ((byte2 & 0x3f) );
1279 *buffer++ = UNICODE_REPLACEMENT;
1284 *buffer++ = UNICODE_REPLACEMENT;
1288 else if ((byte & 0xf8) == 0xf0) {
1289 /* 4-byte: should be 11110... 10...... 10...... 10...... */
1291 if (t + 3 > tlimit) {
1292 *buffer++ = UNICODE_REPLACEMENT;
1296 if (((byte1 = *t++) & 0xc0) == 0x80) {
1297 if (((byte2 = *t++) & 0xc0) == 0x80) {
1298 if (((byte3 = *t++) & 0xc0) == 0x80) {
1299 /* valid 4-byte UTF-8? */
1300 value = ((byte & 0x07) << 18)
1301 | ((byte1 & 0x3f) << 12)
1302 | ((byte2 & 0x3f) << 6)
1303 | ((byte3 & 0x3f) );
1305 if (value > 0x10FFFF) {
1306 *buffer++ = UNICODE_REPLACEMENT;
1308 else if (value > 0xFFFF) {
1309 /* we need surrogates */
1310 *buffer++ = 0xd800 | ((value >> 10) - 0x40);
1311 *buffer++ = 0xdc00 | (value & 0x03ff);
1314 *buffer++ = value; /* 16bit suffice */
1317 *buffer++ = UNICODE_REPLACEMENT;
1322 *buffer++ = UNICODE_REPLACEMENT;
1327 *buffer++ = UNICODE_REPLACEMENT;
1331 else if ((byte & 0xfc) == 0xf8) {
1332 if (t + 4 > tlimit) {
1333 *buffer++ = UNICODE_REPLACEMENT;
1338 for (; skip && ((*t & 0xc0) == 0x80); --skip)
1340 *buffer++ = UNICODE_REPLACEMENT;
1342 else if ((byte & 0xfe) == 0xfc) {
1343 if (t + 5 > tlimit) {
1344 *buffer++ = UNICODE_REPLACEMENT;
1349 for (; skip && ((*t & 0xc0) == 0x80); --skip)
1351 *buffer++ = UNICODE_REPLACEMENT;
1354 *buffer++ = UNICODE_REPLACEMENT;
1362 /* ASCII character, common case */
1370 /* u2_utflength ****************************************************************
1372 Returns the utf length in bytes of a u2 array.
1374 *******************************************************************************/
1376 u4 u2_utflength(u2 *text, u4 u2_length)
1378 u4 result_len = 0; /* utf length in bytes */
1379 u2 ch; /* current unicode character */
1382 for (len = 0; len < u2_length; len++) {
1383 /* next unicode character */
1386 /* determine bytes required to store unicode character as utf */
1387 if (ch && (ch < 0x80))
1389 else if (ch < 0x800)
1399 /* utf_copy ********************************************************************
1401 Copy the given utf string byte-for-byte to a buffer.
1404 buffer.......the buffer
1405 u............the utf string
1407 *******************************************************************************/
1409 void utf_copy(char *buffer, utf *u)
1411 /* our utf strings are zero-terminated (done by utf_new) */
1412 MCOPY(buffer, u->text, char, u->blength + 1);
1416 /* utf_cat *********************************************************************
1418 Append the given utf string byte-for-byte to a buffer.
1421 buffer.......the buffer
1422 u............the utf string
1424 *******************************************************************************/
1426 void utf_cat(char *buffer, utf *u)
1428 /* our utf strings are zero-terminated (done by utf_new) */
1429 MCOPY(buffer + strlen(buffer), u->text, char, u->blength + 1);
1433 /* utf_copy_classname **********************************************************
1435 Copy the given utf classname byte-for-byte to a buffer.
1436 '/' is replaced by '.'
1439 buffer.......the buffer
1440 u............the utf string
1442 *******************************************************************************/
1444 void utf_copy_classname(char *buffer, utf *u)
1453 endptr = UTF_END(u) + 1; /* utfs are zero-terminared by utf_new */
1455 while (srcptr != endptr) {
1464 /* utf_cat *********************************************************************
1466 Append the given utf classname byte-for-byte to a buffer.
1467 '/' is replaced by '.'
1470 buffer.......the buffer
1471 u............the utf string
1473 *******************************************************************************/
1475 void utf_cat_classname(char *buffer, utf *u)
1477 utf_copy_classname(buffer + strlen(buffer), u);
1480 /* utf_display_printable_ascii *************************************************
1482 Write utf symbol to stdout (for debugging purposes).
1483 Non-printable and non-ASCII characters are printed as '?'.
1485 *******************************************************************************/
1487 void utf_display_printable_ascii(utf *u)
1489 char *endpos; /* points behind utf string */
1490 char *utf_ptr; /* current position in utf text */
1498 endpos = UTF_END(u);
1501 while (utf_ptr < endpos) {
1502 /* read next unicode character */
1504 u2 c = utf_nextu2(&utf_ptr);
1506 if ((c >= 32) && (c <= 127))
1516 /* utf_display_printable_ascii_classname ***************************************
1518 Write utf symbol to stdout with `/' converted to `.' (for debugging
1520 Non-printable and non-ASCII characters are printed as '?'.
1522 *******************************************************************************/
1524 void utf_display_printable_ascii_classname(utf *u)
1526 char *endpos; /* points behind utf string */
1527 char *utf_ptr; /* current position in utf text */
1535 endpos = UTF_END(u);
1538 while (utf_ptr < endpos) {
1539 /* read next unicode character */
1541 u2 c = utf_nextu2(&utf_ptr);
1546 if ((c >= 32) && (c <= 127))
1556 /* utf_sprint_convert_to_latin1 ************************************************
1558 Write utf symbol into c-string (for debugging purposes).
1559 Characters are converted to 8-bit Latin-1, non-Latin-1 characters yield
1562 *******************************************************************************/
1564 void utf_sprint_convert_to_latin1(char *buffer, utf *u)
1566 char *endpos; /* points behind utf string */
1567 char *utf_ptr; /* current position in utf text */
1568 u2 pos = 0; /* position in c-string */
1571 strcpy(buffer, "NULL");
1575 endpos = UTF_END(u);
1578 while (utf_ptr < endpos)
1579 /* copy next unicode character */
1580 buffer[pos++] = utf_nextu2(&utf_ptr);
1582 /* terminate string */
1587 /* utf_sprint_convert_to_latin1_classname **************************************
1589 Write utf symbol into c-string with `/' converted to `.' (for debugging
1591 Characters are converted to 8-bit Latin-1, non-Latin-1 characters yield
1594 *******************************************************************************/
1596 void utf_sprint_convert_to_latin1_classname(char *buffer, utf *u)
1598 char *endpos; /* points behind utf string */
1599 char *utf_ptr; /* current position in utf text */
1600 u2 pos = 0; /* position in c-string */
1603 strcpy(buffer, "NULL");
1607 endpos = UTF_END(u);
1610 while (utf_ptr < endpos) {
1611 /* copy next unicode character */
1612 u2 c = utf_nextu2(&utf_ptr);
1613 if (c == '/') c = '.';
1617 /* terminate string */
1622 /* utf_strcat_convert_to_latin1 ************************************************
1624 Like libc strcat, but uses an utf8 string.
1625 Characters are converted to 8-bit Latin-1, non-Latin-1 characters yield
1628 *******************************************************************************/
1630 void utf_strcat_convert_to_latin1(char *buffer, utf *u)
1632 utf_sprint_convert_to_latin1(buffer + strlen(buffer), u);
1636 /* utf_strcat_convert_to_latin1_classname **************************************
1638 Like libc strcat, but uses an utf8 string.
1639 Characters are converted to 8-bit Latin-1, non-Latin-1 characters yield
1642 *******************************************************************************/
1644 void utf_strcat_convert_to_latin1_classname(char *buffer, utf *u)
1646 utf_sprint_convert_to_latin1_classname(buffer + strlen(buffer), u);
1650 /* utf_fprint_printable_ascii **************************************************
1652 Write utf symbol into file.
1653 Non-printable and non-ASCII characters are printed as '?'.
1655 *******************************************************************************/
1657 void utf_fprint_printable_ascii(FILE *file, utf *u)
1659 char *endpos; /* points behind utf string */
1660 char *utf_ptr; /* current position in utf text */
1665 endpos = UTF_END(u);
1668 while (utf_ptr < endpos) {
1669 /* read next unicode character */
1670 u2 c = utf_nextu2(&utf_ptr);
1672 if (c >= 32 && c <= 127) fprintf(file, "%c", c);
1673 else fprintf(file, "?");
1678 /* utf_fprint_printable_ascii_classname ****************************************
1680 Write utf symbol into file with `/' converted to `.'.
1681 Non-printable and non-ASCII characters are printed as '?'.
1683 *******************************************************************************/
1685 void utf_fprint_printable_ascii_classname(FILE *file, utf *u)
1687 char *endpos; /* points behind utf string */
1688 char *utf_ptr; /* current position in utf text */
1693 endpos = UTF_END(u);
1696 while (utf_ptr < endpos) {
1697 /* read next unicode character */
1698 u2 c = utf_nextu2(&utf_ptr);
1699 if (c == '/') c = '.';
1701 if (c >= 32 && c <= 127) fprintf(file, "%c", c);
1702 else fprintf(file, "?");
1707 /* is_valid_utf ****************************************************************
1709 Return true if the given string is a valid UTF-8 string.
1711 utf_ptr...points to first character
1712 end_pos...points after last character
1714 *******************************************************************************/
1716 /* static unsigned long min_codepoint[6] = {0,1L<<7,1L<<11,1L<<16,1L<<21,1L<<26}; */
1718 bool is_valid_utf(char *utf_ptr, char *end_pos)
1725 if (end_pos < utf_ptr) return false;
1726 bytes = end_pos - utf_ptr;
1730 if (!c) return false; /* 0x00 is not allowed */
1731 if ((c & 0x80) == 0) continue; /* ASCII */
1733 if ((c & 0xe0) == 0xc0) len = 1; /* 110x xxxx */
1734 else if ((c & 0xf0) == 0xe0) len = 2; /* 1110 xxxx */
1735 else if ((c & 0xf8) == 0xf0) len = 3; /* 1111 0xxx */
1736 else if ((c & 0xfc) == 0xf8) len = 4; /* 1111 10xx */
1737 else if ((c & 0xfe) == 0xfc) len = 5; /* 1111 110x */
1738 else return false; /* invalid leading byte */
1740 if (len > 2) return false; /* Java limitation */
1742 v = (unsigned long)c & (0x3f >> len);
1744 if ((bytes -= len) < 0) return false; /* missing bytes */
1746 for (i = len; i--; ) {
1748 if ((c & 0xc0) != 0x80) /* 10xx xxxx */
1750 v = (v << 6) | (c & 0x3f);
1754 if (len != 1) return false; /* Java special */
1757 /* Sun Java seems to allow overlong UTF-8 encodings */
1759 /* if (v < min_codepoint[len]) */
1760 /* XXX throw exception? */
1763 /* surrogates in UTF-8 seem to be allowed in Java classfiles */
1764 /* if (v >= 0xd800 && v <= 0xdfff) return false; */ /* surrogates */
1766 /* even these seem to be allowed */
1767 /* if (v == 0xfffe || v == 0xffff) return false; */ /* invalid codepoints */
1774 /* is_valid_name ***************************************************************
1776 Return true if the given string may be used as a class/field/method
1777 name. (Currently this only disallows empty strings and control
1780 NOTE: The string is assumed to have passed is_valid_utf!
1782 utf_ptr...points to first character
1783 end_pos...points after last character
1785 *******************************************************************************/
1787 bool is_valid_name(char *utf_ptr, char *end_pos)
1789 if (end_pos <= utf_ptr) return false; /* disallow empty names */
1791 while (utf_ptr < end_pos) {
1792 unsigned char c = *utf_ptr++;
1794 if (c < 0x20) return false; /* disallow control characters */
1795 if (c == 0xc0 && (unsigned char) *utf_ptr == 0x80) /* disallow zero */
1802 bool is_valid_name_utf(utf *u)
1804 return is_valid_name(u->text, UTF_END(u));
1808 /* utf_show ********************************************************************
1810 Writes the utf symbols in the utfhash to stdout and displays the
1811 number of external hash chains grouped according to the chainlength
1812 (for debugging purposes).
1814 *******************************************************************************/
1816 #if !defined(NDEBUG)
1820 #define CHAIN_LIMIT 20 /* limit for seperated enumeration */
1822 u4 chain_count[CHAIN_LIMIT]; /* numbers of chains */
1823 u4 max_chainlength = 0; /* maximum length of the chains */
1824 u4 sum_chainlength = 0; /* sum of the chainlengths */
1825 u4 beyond_limit = 0; /* number of utf-symbols in chains with length>=CHAIN_LIMIT-1 */
1828 printf("UTF-HASH:\n");
1830 /* show element of utf-hashtable */
1832 for (i = 0; i < hashtable_utf->size; i++) {
1833 utf *u = hashtable_utf->ptr[i];
1836 printf("SLOT %d: ", (int) i);
1840 utf_display_printable_ascii(u);
1848 printf("UTF-HASH: %d slots for %d entries\n",
1849 (int) hashtable_utf->size, (int) hashtable_utf->entries );
1851 if (hashtable_utf->entries == 0)
1854 printf("chains:\n chainlength number of chains %% of utfstrings\n");
1856 for (i=0;i<CHAIN_LIMIT;i++)
1859 /* count numbers of hashchains according to their length */
1860 for (i=0; i<hashtable_utf->size; i++) {
1862 utf *u = (utf*) hashtable_utf->ptr[i];
1863 u4 chain_length = 0;
1865 /* determine chainlength */
1871 /* update sum of all chainlengths */
1872 sum_chainlength+=chain_length;
1874 /* determine the maximum length of the chains */
1875 if (chain_length>max_chainlength)
1876 max_chainlength = chain_length;
1878 /* update number of utf-symbols in chains with length>=CHAIN_LIMIT-1 */
1879 if (chain_length>=CHAIN_LIMIT) {
1880 beyond_limit+=chain_length;
1881 chain_length=CHAIN_LIMIT-1;
1884 /* update number of hashchains of current length */
1885 chain_count[chain_length]++;
1888 /* display results */
1889 for (i=1;i<CHAIN_LIMIT-1;i++)
1890 printf(" %2d %17d %18.2f%%\n",i,chain_count[i],(((float) chain_count[i]*i*100)/hashtable_utf->entries));
1892 printf(" >=%2d %17d %18.2f%%\n",CHAIN_LIMIT-1,chain_count[CHAIN_LIMIT-1],((float) beyond_limit*100)/hashtable_utf->entries);
1895 printf("max. chainlength:%5d\n",max_chainlength);
1897 /* avg. chainlength = sum of chainlengths / number of chains */
1898 printf("avg. chainlength:%5.2f\n",(float) sum_chainlength / (hashtable_utf->size-chain_count[0]));
1900 #endif /* !defined(NDEBUG) */
1904 * These are local overrides for various environment variables in Emacs.
1905 * Please do not remove this and leave it at the end of the file, where
1906 * Emacs will automagically detect them.
1907 * ---------------------------------------------------------------------
1910 * indent-tabs-mode: t
1914 * vim:noexpandtab:sw=4:ts=4: