1 /* src/vmcore/utf8.c - utf8 string functions
3 Copyright (C) 1996-2005, 2006, 2007 R. Grafl, A. Krall, C. Kruegel,
4 C. Oates, R. Obermaisser, M. Platter, M. Probst, S. Ring,
5 E. Steiner, C. Thalinger, D. Thuernbeck, P. Tomsich, C. Ullrich,
6 J. Wenninger, Institut f. Computersprachen - TU Wien
8 This file is part of CACAO.
10 This program is free software; you can redistribute it and/or
11 modify it under the terms of the GNU General Public License as
12 published by the Free Software Foundation; either version 2, or (at
13 your option) any later version.
15 This program is distributed in the hope that it will be useful, but
16 WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
25 $Id: utf8.c 8132 2007-06-22 11:15:47Z twisti $
37 #include "mm/memory.h"
39 #include "threads/lock-common.h"
41 #include "toolbox/hashtable.h"
43 #include "vm/exceptions.h"
45 #include "vmcore/options.h"
47 #if defined(ENABLE_STATISTICS)
48 # include "vmcore/statistics.h"
51 #include "vmcore/utf8.h"
54 /* global variables ***********************************************************/
56 /* hashsize must be power of 2 */
58 #define HASHTABLE_UTF_SIZE 16384 /* initial size of utf-hash */
60 hashtable *hashtable_utf; /* hashtable for utf8-symbols */
63 /* utf-symbols for pointer comparison of frequently used strings **************/
65 utf *utf_java_lang_Object;
67 utf *utf_java_lang_Class;
68 utf *utf_java_lang_ClassLoader;
69 utf *utf_java_lang_Cloneable;
70 utf *utf_java_lang_SecurityManager;
71 utf *utf_java_lang_String;
72 utf *utf_java_lang_System;
73 utf *utf_java_lang_ThreadGroup;
74 utf *utf_java_lang_ref_SoftReference;
75 utf *utf_java_lang_ref_WeakReference;
76 utf *utf_java_lang_ref_PhantomReference;
77 utf *utf_java_io_Serializable;
79 utf *utf_java_lang_Throwable;
80 utf *utf_java_lang_Error;
82 utf *utf_java_lang_AbstractMethodError;
83 utf *utf_java_lang_ClassCircularityError;
84 utf *utf_java_lang_ClassFormatError;
85 utf *utf_java_lang_ExceptionInInitializerError;
86 utf *utf_java_lang_IncompatibleClassChangeError;
87 utf *utf_java_lang_InstantiationError;
88 utf *utf_java_lang_InternalError;
89 utf *utf_java_lang_LinkageError;
90 utf *utf_java_lang_NoClassDefFoundError;
91 utf *utf_java_lang_NoSuchFieldError;
92 utf *utf_java_lang_NoSuchMethodError;
93 utf *utf_java_lang_OutOfMemoryError;
94 utf *utf_java_lang_UnsatisfiedLinkError;
95 utf *utf_java_lang_UnsupportedClassVersionError;
96 utf *utf_java_lang_VerifyError;
97 utf *utf_java_lang_VirtualMachineError;
99 #if defined(WITH_CLASSPATH_GNU)
100 utf *utf_java_lang_VMThrowable;
103 utf *utf_java_lang_Exception;
105 utf *utf_java_lang_ArithmeticException;
106 utf *utf_java_lang_ArrayIndexOutOfBoundsException;
107 utf *utf_java_lang_ArrayStoreException;
108 utf *utf_java_lang_ClassCastException;
109 utf *utf_java_lang_ClassNotFoundException;
110 utf *utf_java_lang_CloneNotSupportedException;
111 utf *utf_java_lang_IllegalAccessException;
112 utf *utf_java_lang_IllegalArgumentException;
113 utf *utf_java_lang_IllegalMonitorStateException;
114 utf *utf_java_lang_InstantiationException;
115 utf *utf_java_lang_InterruptedException;
116 utf *utf_java_lang_NegativeArraySizeException;
117 utf *utf_java_lang_NullPointerException;
118 utf *utf_java_lang_StringIndexOutOfBoundsException;
120 utf *utf_java_lang_reflect_InvocationTargetException;
122 utf *utf_java_security_PrivilegedActionException;
124 #if defined(ENABLE_JAVASE)
125 utf* utf_java_lang_Void;
128 utf* utf_java_lang_Boolean;
129 utf* utf_java_lang_Byte;
130 utf* utf_java_lang_Character;
131 utf* utf_java_lang_Short;
132 utf* utf_java_lang_Integer;
133 utf* utf_java_lang_Long;
134 utf* utf_java_lang_Float;
135 utf* utf_java_lang_Double;
137 #if defined(ENABLE_JAVASE)
138 utf *utf_java_lang_StackTraceElement;
139 utf *utf_java_lang_reflect_Constructor;
140 utf *utf_java_lang_reflect_Field;
141 utf *utf_java_lang_reflect_Method;
142 utf *utf_java_util_Vector;
145 utf *utf_InnerClasses; /* InnerClasses */
146 utf *utf_ConstantValue; /* ConstantValue */
147 utf *utf_Code; /* Code */
148 utf *utf_Exceptions; /* Exceptions */
149 utf *utf_LineNumberTable; /* LineNumberTable */
150 utf *utf_SourceFile; /* SourceFile */
152 #if defined(ENABLE_JAVASE)
153 utf *utf_EnclosingMethod;
155 utf *utf_RuntimeVisibleAnnotations;
156 utf *utf_StackMapTable;
159 utf *utf_init; /* <init> */
160 utf *utf_clinit; /* <clinit> */
161 utf *utf_clone; /* clone */
162 utf *utf_finalize; /* finalize */
163 utf *utf_run; /* run */
168 utf *utf_removeThread;
173 utf *utf_fillInStackTrace;
175 utf *utf_getSystemClassLoader;
178 utf *utf_printStackTrace;
180 utf *utf_division_by_zero;
191 utf *utf_void__void; /* ()V */
192 utf *utf_boolean__void; /* (Z)V */
193 utf *utf_byte__void; /* (B)V */
194 utf *utf_char__void; /* (C)V */
195 utf *utf_short__void; /* (S)V */
196 utf *utf_int__void; /* (I)V */
197 utf *utf_long__void; /* (J)V */
198 utf *utf_float__void; /* (F)V */
199 utf *utf_double__void; /* (D)V */
201 utf *utf_void__java_lang_ClassLoader; /* ()Ljava/lang/ClassLoader; */
202 utf *utf_void__java_lang_Object; /* ()Ljava/lang/Object; */
203 utf *utf_void__java_lang_Throwable; /* ()Ljava/lang/Throwable; */
204 utf *utf_java_lang_ClassLoader_java_lang_String__J;
205 utf *utf_java_lang_Exception__V; /* (Ljava/lang/Exception;)V */
206 utf *utf_java_lang_Object__java_lang_Object;
207 utf *utf_java_lang_String__void; /* (Ljava/lang/String;)V */
208 utf *utf_java_lang_String__java_lang_Class;
209 utf *utf_java_lang_Thread__V; /* (Ljava/lang/Thread;)V */
210 utf *utf_java_lang_Throwable__void; /* (Ljava/lang/Throwable;)V */
211 utf *utf_java_lang_Throwable__java_lang_Throwable;
213 utf *utf_not_named_yet; /* special name for unnamed classes */
215 utf *array_packagename;
218 /* utf_init ********************************************************************
220 Initializes the utf8 subsystem.
222 *******************************************************************************/
226 /* create utf8 hashtable */
228 hashtable_utf = NEW(hashtable);
230 hashtable_create(hashtable_utf, HASHTABLE_UTF_SIZE);
232 #if defined(ENABLE_STATISTICS)
234 count_utf_len += sizeof(utf*) * hashtable_utf->size;
237 /* create utf-symbols for pointer comparison of frequently used strings */
239 utf_java_lang_Object = utf_new_char("java/lang/Object");
241 utf_java_lang_Class = utf_new_char("java/lang/Class");
242 utf_java_lang_ClassLoader = utf_new_char("java/lang/ClassLoader");
243 utf_java_lang_Cloneable = utf_new_char("java/lang/Cloneable");
244 utf_java_lang_SecurityManager = utf_new_char("java/lang/SecurityManager");
245 utf_java_lang_String = utf_new_char("java/lang/String");
246 utf_java_lang_System = utf_new_char("java/lang/System");
247 utf_java_lang_ThreadGroup = utf_new_char("java/lang/ThreadGroup");
249 utf_java_lang_ref_SoftReference =
250 utf_new_char("java/lang/ref/SoftReference");
252 utf_java_lang_ref_WeakReference =
253 utf_new_char("java/lang/ref/WeakReference");
255 utf_java_lang_ref_PhantomReference =
256 utf_new_char("java/lang/ref/PhantomReference");
258 utf_java_io_Serializable = utf_new_char("java/io/Serializable");
260 utf_java_lang_Throwable = utf_new_char("java/lang/Throwable");
261 utf_java_lang_Error = utf_new_char("java/lang/Error");
263 utf_java_lang_ClassCircularityError =
264 utf_new_char("java/lang/ClassCircularityError");
266 utf_java_lang_ClassFormatError = utf_new_char("java/lang/ClassFormatError");
268 utf_java_lang_ExceptionInInitializerError =
269 utf_new_char("java/lang/ExceptionInInitializerError");
271 utf_java_lang_IncompatibleClassChangeError =
272 utf_new_char("java/lang/IncompatibleClassChangeError");
274 utf_java_lang_InstantiationError =
275 utf_new_char("java/lang/InstantiationError");
277 utf_java_lang_InternalError = utf_new_char("java/lang/InternalError");
278 utf_java_lang_LinkageError = utf_new_char("java/lang/LinkageError");
280 utf_java_lang_NoClassDefFoundError =
281 utf_new_char("java/lang/NoClassDefFoundError");
283 utf_java_lang_OutOfMemoryError = utf_new_char("java/lang/OutOfMemoryError");
285 utf_java_lang_UnsatisfiedLinkError =
286 utf_new_char("java/lang/UnsatisfiedLinkError");
288 utf_java_lang_UnsupportedClassVersionError =
289 utf_new_char("java/lang/UnsupportedClassVersionError");
291 utf_java_lang_VerifyError = utf_new_char("java/lang/VerifyError");
293 utf_java_lang_VirtualMachineError =
294 utf_new_char("java/lang/VirtualMachineError");
296 #if defined(ENABLE_JAVASE)
297 utf_java_lang_AbstractMethodError =
298 utf_new_char("java/lang/AbstractMethodError");
300 utf_java_lang_NoSuchFieldError =
301 utf_new_char("java/lang/NoSuchFieldError");
303 utf_java_lang_NoSuchMethodError =
304 utf_new_char("java/lang/NoSuchMethodError");
307 #if defined(WITH_CLASSPATH_GNU)
308 utf_java_lang_VMThrowable = utf_new_char("java/lang/VMThrowable");
311 utf_java_lang_Exception = utf_new_char("java/lang/Exception");
313 utf_java_lang_ArithmeticException =
314 utf_new_char("java/lang/ArithmeticException");
316 utf_java_lang_ArrayIndexOutOfBoundsException =
317 utf_new_char("java/lang/ArrayIndexOutOfBoundsException");
319 utf_java_lang_ArrayStoreException =
320 utf_new_char("java/lang/ArrayStoreException");
322 utf_java_lang_ClassCastException =
323 utf_new_char("java/lang/ClassCastException");
325 utf_java_lang_ClassNotFoundException =
326 utf_new_char("java/lang/ClassNotFoundException");
328 utf_java_lang_CloneNotSupportedException =
329 utf_new_char("java/lang/CloneNotSupportedException");
331 utf_java_lang_IllegalAccessException =
332 utf_new_char("java/lang/IllegalAccessException");
334 utf_java_lang_IllegalArgumentException =
335 utf_new_char("java/lang/IllegalArgumentException");
337 utf_java_lang_IllegalMonitorStateException =
338 utf_new_char("java/lang/IllegalMonitorStateException");
340 utf_java_lang_InstantiationException =
341 utf_new_char("java/lang/InstantiationException");
343 utf_java_lang_InterruptedException =
344 utf_new_char("java/lang/InterruptedException");
346 utf_java_lang_NegativeArraySizeException =
347 utf_new_char("java/lang/NegativeArraySizeException");
349 utf_java_lang_NullPointerException =
350 utf_new_char("java/lang/NullPointerException");
352 utf_java_lang_StringIndexOutOfBoundsException =
353 utf_new_char("java/lang/StringIndexOutOfBoundsException");
355 utf_java_lang_reflect_InvocationTargetException =
356 utf_new_char("java/lang/reflect/InvocationTargetException");
358 utf_java_security_PrivilegedActionException =
359 utf_new_char("java/security/PrivilegedActionException");
361 #if defined(ENABLE_JAVASE)
362 utf_java_lang_Void = utf_new_char("java/lang/Void");
365 utf_java_lang_Boolean = utf_new_char("java/lang/Boolean");
366 utf_java_lang_Byte = utf_new_char("java/lang/Byte");
367 utf_java_lang_Character = utf_new_char("java/lang/Character");
368 utf_java_lang_Short = utf_new_char("java/lang/Short");
369 utf_java_lang_Integer = utf_new_char("java/lang/Integer");
370 utf_java_lang_Long = utf_new_char("java/lang/Long");
371 utf_java_lang_Float = utf_new_char("java/lang/Float");
372 utf_java_lang_Double = utf_new_char("java/lang/Double");
374 #if defined(ENABLE_JAVASE)
375 utf_java_lang_StackTraceElement =
376 utf_new_char("java/lang/StackTraceElement");
378 utf_java_lang_reflect_Constructor =
379 utf_new_char("java/lang/reflect/Constructor");
381 utf_java_lang_reflect_Field = utf_new_char("java/lang/reflect/Field");
382 utf_java_lang_reflect_Method = utf_new_char("java/lang/reflect/Method");
383 utf_java_util_Vector = utf_new_char("java/util/Vector");
386 utf_InnerClasses = utf_new_char("InnerClasses");
387 utf_ConstantValue = utf_new_char("ConstantValue");
388 utf_Code = utf_new_char("Code");
389 utf_Exceptions = utf_new_char("Exceptions");
390 utf_LineNumberTable = utf_new_char("LineNumberTable");
391 utf_SourceFile = utf_new_char("SourceFile");
393 #if defined(ENABLE_JAVASE)
394 utf_EnclosingMethod = utf_new_char("EnclosingMethod");
395 utf_Signature = utf_new_char("Signature");
396 utf_RuntimeVisibleAnnotations = utf_new_char("RuntimeVisibleAnnotations");
397 utf_StackMapTable = utf_new_char("StackMapTable");
400 utf_init = utf_new_char("<init>");
401 utf_clinit = utf_new_char("<clinit>");
402 utf_clone = utf_new_char("clone");
403 utf_finalize = utf_new_char("finalize");
404 utf_run = utf_new_char("run");
406 utf_add = utf_new_char("add");
407 utf_remove = utf_new_char("remove");
408 utf_addThread = utf_new_char("addThread");
409 utf_removeThread = utf_new_char("removeThread");
410 utf_put = utf_new_char("put");
411 utf_get = utf_new_char("get");
412 utf_value = utf_new_char("value");
414 utf_fillInStackTrace = utf_new_char("fillInStackTrace");
415 utf_findNative = utf_new_char("findNative");
416 utf_getSystemClassLoader = utf_new_char("getSystemClassLoader");
417 utf_initCause = utf_new_char("initCause");
418 utf_loadClass = utf_new_char("loadClass");
419 utf_printStackTrace = utf_new_char("printStackTrace");
421 utf_division_by_zero = utf_new_char("/ by zero");
423 utf_Z = utf_new_char("Z");
424 utf_B = utf_new_char("B");
425 utf_C = utf_new_char("C");
426 utf_S = utf_new_char("S");
427 utf_I = utf_new_char("I");
428 utf_J = utf_new_char("J");
429 utf_F = utf_new_char("F");
430 utf_D = utf_new_char("D");
432 utf_void__void = utf_new_char("()V");
433 utf_boolean__void = utf_new_char("(Z)V");
434 utf_byte__void = utf_new_char("(B)V");
435 utf_char__void = utf_new_char("(C)V");
436 utf_short__void = utf_new_char("(S)V");
437 utf_int__void = utf_new_char("(I)V");
438 utf_long__void = utf_new_char("(J)V");
439 utf_float__void = utf_new_char("(F)V");
440 utf_double__void = utf_new_char("(D)V");
441 utf_void__java_lang_Object = utf_new_char("()Ljava/lang/Object;");
442 utf_void__java_lang_Throwable = utf_new_char("()Ljava/lang/Throwable;");
444 utf_void__java_lang_ClassLoader =
445 utf_new_char("()Ljava/lang/ClassLoader;");
447 utf_java_lang_ClassLoader_java_lang_String__J =
448 utf_new_char("(Ljava/lang/ClassLoader;Ljava/lang/String;)J");
450 utf_java_lang_Exception__V = utf_new_char("(Ljava/lang/Exception;)V");
452 utf_java_lang_Object__java_lang_Object =
453 utf_new_char("(Ljava/lang/Object;)Ljava/lang/Object;");
455 utf_java_lang_String__void = utf_new_char("(Ljava/lang/String;)V");
457 utf_java_lang_String__java_lang_Class =
458 utf_new_char("(Ljava/lang/String;)Ljava/lang/Class;");
460 utf_java_lang_Thread__V = utf_new_char("(Ljava/lang/Thread;)V");
461 utf_java_lang_Throwable__void = utf_new_char("(Ljava/lang/Throwable;)V");
463 utf_java_lang_Throwable__java_lang_Throwable =
464 utf_new_char("(Ljava/lang/Throwable;)Ljava/lang/Throwable;");
466 utf_null = utf_new_char("null");
467 utf_not_named_yet = utf_new_char("\t<not_named_yet>");
468 array_packagename = utf_new_char("\t<the array package>");
470 /* everything's ok */
476 /* utf_hashkey *****************************************************************
478 The hashkey is computed from the utf-text by using up to 8
479 characters. For utf-symbols longer than 15 characters 3 characters
480 are taken from the beginning and the end, 2 characters are taken
483 *******************************************************************************/
485 #define nbs(val) ((u4) *(++text) << val) /* get next byte, left shift by val */
486 #define fbs(val) ((u4) *( text) << val) /* get first byte, left shift by val */
488 u4 utf_hashkey(const char *text, u4 length)
490 const char *start_pos = text; /* pointer to utf text */
494 case 0: /* empty string */
497 case 1: return fbs(0);
498 case 2: return fbs(0) ^ nbs(3);
499 case 3: return fbs(0) ^ nbs(3) ^ nbs(5);
500 case 4: return fbs(0) ^ nbs(2) ^ nbs(4) ^ nbs(6);
501 case 5: return fbs(0) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(6);
502 case 6: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(5) ^ nbs(6);
503 case 7: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(5) ^ nbs(6);
504 case 8: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(5) ^ nbs(6) ^ nbs(7);
511 return a ^ nbs(4) ^ nbs(5) ^ nbs(6) ^ nbs(7) ^ nbs(8);
520 return a ^ nbs(6) ^ nbs(7) ^ nbs(8) ^ nbs(9);
529 return a ^ nbs(6) ^ nbs(7) ^ nbs(8) ^ nbs(9) ^ nbs(10);
541 return a ^ nbs(9) ^ nbs(10);
553 return a ^ nbs(9) ^ nbs(10);
564 return a ^ nbs(9) ^ nbs(10) ^ nbs(11);
575 return a ^ nbs(9) ^ nbs(10) ^ nbs(11);
577 default: /* 3 characters from beginning */
583 /* 2 characters from middle */
584 text = start_pos + (length / 2);
589 /* 3 characters from end */
590 text = start_pos + length - 4;
595 return a ^ nbs(10) ^ nbs(11);
599 /* utf_full_hashkey ************************************************************
601 This function computes a hash value using all bytes in the string.
603 The algorithm is the "One-at-a-time" algorithm as published
604 by Bob Jenkins on http://burtleburtle.net/bob/hash/doobs.html.
606 *******************************************************************************/
608 u4 utf_full_hashkey(const char *text, u4 length)
610 register const unsigned char *p = (const unsigned char *) text;
618 hash += (hash << 10);
622 hash ^= (hash >> 11);
623 hash += (hash << 15);
628 /* unicode_hashkey *************************************************************
630 Compute the hashkey of a unicode string.
632 *******************************************************************************/
634 u4 unicode_hashkey(u2 *text, u2 len)
636 return utf_hashkey((char *) text, len);
640 /* utf_new *********************************************************************
642 Creates a new utf-symbol, the text of the symbol is passed as a
643 u1-array. The function searches the utf-hashtable for a utf-symbol
644 with this text. On success the element returned, otherwise a new
645 hashtable element is created.
647 If the number of entries in the hashtable exceeds twice the size of
648 the hashtable slots a reorganization of the hashtable is done and
649 the utf symbols are copied to a new hashtable with doubled size.
651 *******************************************************************************/
653 utf *utf_new(const char *text, u2 length)
655 u4 key; /* hashkey computed from utf-text */
656 u4 slot; /* slot in hashtable */
657 utf *u; /* hashtable element */
660 LOCK_MONITOR_ENTER(hashtable_utf->header);
662 #if defined(ENABLE_STATISTICS)
667 key = utf_hashkey(text, length);
668 slot = key & (hashtable_utf->size - 1);
669 u = hashtable_utf->ptr[slot];
671 /* search external hash chain for utf-symbol */
674 if (u->blength == length) {
675 /* compare text of hashtable elements */
677 for (i = 0; i < length; i++)
678 if (text[i] != u->text[i])
681 #if defined(ENABLE_STATISTICS)
683 count_utf_new_found++;
686 /* symbol found in hashtable */
688 LOCK_MONITOR_EXIT(hashtable_utf->header);
694 u = u->hashlink; /* next element in external chain */
697 /* location in hashtable found, create new utf element */
701 u->blength = length; /* length in bytes of utfstring */
702 u->hashlink = hashtable_utf->ptr[slot]; /* link in external hashchain */
703 u->text = mem_alloc(length + 1);/* allocate memory for utf-text */
705 memcpy(u->text, text, length); /* copy utf-text */
706 u->text[length] = '\0';
708 #if defined(ENABLE_STATISTICS)
710 count_utf_len += sizeof(utf) + length + 1;
713 hashtable_utf->ptr[slot] = u; /* insert symbol into table */
714 hashtable_utf->entries++; /* update number of entries */
716 if (hashtable_utf->entries > (hashtable_utf->size * 2)) {
718 /* reorganization of hashtable, average length of the external
719 chains is approx. 2 */
721 hashtable *newhash; /* the new hashtable */
727 /* create new hashtable, double the size */
729 newhash = hashtable_resize(hashtable_utf, hashtable_utf->size * 2);
731 #if defined(ENABLE_STATISTICS)
733 count_utf_len += sizeof(utf*) * hashtable_utf->size;
736 /* transfer elements to new hashtable */
738 for (i = 0; i < hashtable_utf->size; i++) {
739 u = hashtable_utf->ptr[i];
743 slot = utf_hashkey(u->text, u->blength) & (newhash->size - 1);
745 u->hashlink = (utf *) newhash->ptr[slot];
746 newhash->ptr[slot] = u;
748 /* follow link in external hash chain */
754 /* dispose old table */
756 hashtable_free(hashtable_utf);
758 hashtable_utf = newhash;
761 LOCK_MONITOR_EXIT(hashtable_utf->header);
767 /* utf_new_u2 ******************************************************************
769 Make utf symbol from u2 array, if isclassname is true '.' is
772 *******************************************************************************/
774 utf *utf_new_u2(u2 *unicode_pos, u4 unicode_length, bool isclassname)
776 char *buffer; /* memory buffer for unicode characters */
777 char *pos; /* pointer to current position in buffer */
778 u4 left; /* unicode characters left */
779 u4 buflength; /* utf length in bytes of the u2 array */
780 utf *result; /* resulting utf-string */
783 /* determine utf length in bytes and allocate memory */
785 buflength = u2_utflength(unicode_pos, unicode_length);
786 buffer = MNEW(char, buflength);
791 for (i = 0; i++ < unicode_length; unicode_pos++) {
792 /* next unicode character */
795 if ((c != 0) && (c < 0x80)) {
798 if ((int) left < 0) break;
799 /* convert classname */
800 if (isclassname && c == '.')
805 } else if (c < 0x800) {
807 unsigned char high = c >> 6;
808 unsigned char low = c & 0x3F;
810 if ((int) left < 0) break;
811 *pos++ = high | 0xC0;
817 char mid = (c >> 6) & 0x3F;
820 if ((int) left < 0) break;
821 *pos++ = high | 0xE0;
827 /* insert utf-string into symbol-table */
828 result = utf_new(buffer,buflength);
830 MFREE(buffer, char, buflength);
836 /* utf_new_char ****************************************************************
838 Creates a new utf symbol, the text for this symbol is passed as a
839 c-string ( = char* ).
841 *******************************************************************************/
843 utf *utf_new_char(const char *text)
845 return utf_new(text, strlen(text));
849 /* utf_new_char_classname ******************************************************
851 Creates a new utf symbol, the text for this symbol is passed as a
852 c-string ( = char* ) "." characters are going to be replaced by
853 "/". Since the above function is used often, this is a separte
854 function, instead of an if.
856 *******************************************************************************/
858 utf *utf_new_char_classname(const char *text)
860 if (strchr(text, '.')) {
861 char *txt = strdup(text);
862 char *end = txt + strlen(txt);
866 for (c = txt; c < end; c++)
867 if (*c == '.') *c = '/';
869 tmpRes = utf_new(txt, strlen(txt));
875 return utf_new(text, strlen(text));
879 /* utf_nextu2 ******************************************************************
881 Read the next unicode character from the utf string and increment
882 the utf-string pointer accordingly.
884 CAUTION: This function is unsafe for input that was not checked
887 *******************************************************************************/
889 u2 utf_nextu2(char **utf_ptr)
891 /* uncompressed unicode character */
893 /* current position in utf text */
894 unsigned char *utf = (unsigned char *) (*utf_ptr);
895 /* bytes representing the unicode character */
896 unsigned char ch1, ch2, ch3;
897 /* number of bytes used to represent the unicode character */
900 switch ((ch1 = utf[0]) >> 4) {
901 default: /* 1 byte */
905 case 0xD: /* 2 bytes */
906 if (((ch2 = utf[1]) & 0xC0) == 0x80) {
907 unsigned char high = ch1 & 0x1F;
908 unsigned char low = ch2 & 0x3F;
909 unicode_char = (high << 6) + low;
914 case 0xE: /* 2 or 3 bytes */
915 if (((ch2 = utf[1]) & 0xC0) == 0x80) {
916 if (((ch3 = utf[2]) & 0xC0) == 0x80) {
917 unsigned char low = ch3 & 0x3f;
918 unsigned char mid = ch2 & 0x3f;
919 unsigned char high = ch1 & 0x0f;
920 unicode_char = (((high << 6) + mid) << 6) + low;
928 /* update position in utf-text */
929 *utf_ptr = (char *) (utf + len);
935 /* utf_bytes *******************************************************************
937 Determine number of bytes (aka. octets) in the utf string.
940 u............utf string
943 The number of octets of this utf string.
944 There is _no_ terminating zero included in this count.
946 *******************************************************************************/
954 /* utf_get_number_of_u2s_for_buffer ********************************************
956 Determine number of UTF-16 u2s in the given UTF-8 buffer
958 CAUTION: This function is unsafe for input that was not checked
961 CAUTION: Use this function *only* when you want to convert an UTF-8 buffer
962 to an array of u2s (UTF-16) and want to know how many of them you will get.
963 All other uses of this function are probably wrong.
966 buffer........points to first char in buffer
967 blength.......number of _bytes_ in the buffer
970 the number of u2s needed to hold this string in UTF-16 encoding.
971 There is _no_ terminating zero included in this count.
973 NOTE: Unlike utf_get_number_of_u2s, this function never throws an
976 *******************************************************************************/
978 u4 utf_get_number_of_u2s_for_buffer(const char *buffer, u4 blength)
980 const char *endpos; /* points behind utf string */
981 const char *utf_ptr; /* current position in utf text */
982 u4 len = 0; /* number of unicode characters */
985 endpos = utf_ptr + blength;
987 while (utf_ptr < endpos) {
989 /* next unicode character */
990 utf_nextu2((char **)&utf_ptr);
993 assert(utf_ptr == endpos);
999 /* utf_get_number_of_u2s *******************************************************
1001 Determine number of UTF-16 u2s in the utf string.
1003 CAUTION: This function is unsafe for input that was not checked
1006 CAUTION: Use this function *only* when you want to convert a utf string
1007 to an array of u2s and want to know how many of them you will get.
1008 All other uses of this function are probably wrong.
1011 u............utf string
1014 the number of u2s needed to hold this string in UTF-16 encoding.
1015 There is _no_ terminating zero included in this count.
1016 XXX 0 if a NullPointerException has been thrown (see below)
1018 *******************************************************************************/
1020 u4 utf_get_number_of_u2s(utf *u)
1022 char *endpos; /* points behind utf string */
1023 char *utf_ptr; /* current position in utf text */
1024 u4 len = 0; /* number of unicode characters */
1026 /* XXX this is probably not checked by most callers! Review this after */
1027 /* the invalid uses of this function have been eliminated */
1029 exceptions_throw_nullpointerexception();
1033 endpos = UTF_END(u);
1036 while (utf_ptr < endpos) {
1038 /* next unicode character */
1039 utf_nextu2(&utf_ptr);
1042 if (utf_ptr != endpos) {
1043 /* string ended abruptly */
1044 exceptions_throw_internalerror("Illegal utf8 string");
1052 /* utf8_safe_number_of_u2s *****************************************************
1054 Determine number of UTF-16 u2s needed for decoding the given UTF-8 string.
1055 (For invalid UTF-8 the U+fffd replacement character will be counted.)
1057 This function is safe even for invalid UTF-8 strings.
1060 text..........zero-terminated(!) UTF-8 string (may be invalid)
1062 nbytes........strlen(text). (This is needed to completely emulate
1066 the number of u2s needed to hold this string in UTF-16 encoding.
1067 There is _no_ terminating zero included in this count.
1069 *******************************************************************************/
1071 s4 utf8_safe_number_of_u2s(const char *text, s4 nbytes) {
1072 register const unsigned char *t;
1075 register const unsigned char *tlimit;
1083 assert(nbytes >= 0);
1086 t = (const unsigned char *) text;
1087 tlimit = t + nbytes;
1089 /* CAUTION: Keep this code in sync with utf8_safe_convert_to_u2s! */
1095 /* highest bit set, non-ASCII character */
1097 if ((byte & 0xe0) == 0xc0) {
1098 /* 2-byte: should be 110..... 10...... ? */
1100 if ((*t++ & 0xc0) == 0x80)
1101 ; /* valid 2-byte */
1105 else if ((byte & 0xf0) == 0xe0) {
1106 /* 3-byte: should be 1110.... 10...... 10...... */
1110 return len + 1; /* invalid, stop here */
1112 if ((*t++ & 0xc0) == 0x80) {
1113 if ((*t++ & 0xc0) == 0x80)
1114 ; /* valid 3-byte */
1121 else if ((byte & 0xf8) == 0xf0) {
1122 /* 4-byte: should be 11110... 10...... 10...... 10...... */
1126 return len + 1; /* invalid, stop here */
1128 if (((byte1 = *t++) & 0xc0) == 0x80) {
1129 if (((byte2 = *t++) & 0xc0) == 0x80) {
1130 if (((byte3 = *t++) & 0xc0) == 0x80) {
1131 /* valid 4-byte UTF-8? */
1132 value = ((byte & 0x07) << 18)
1133 | ((byte1 & 0x3f) << 12)
1134 | ((byte2 & 0x3f) << 6)
1135 | ((byte3 & 0x3f) );
1137 if (value > 0x10FFFF)
1139 else if (value > 0xFFFF)
1140 len += 1; /* we need surrogates */
1142 ; /* 16bit suffice */
1153 else if ((byte & 0xfc) == 0xf8) {
1154 /* invalid 5-byte */
1156 return len + 1; /* invalid, stop here */
1159 for (; skip && ((*t & 0xc0) == 0x80); --skip)
1162 else if ((byte & 0xfe) == 0xfc) {
1163 /* invalid 6-byte */
1165 return len + 1; /* invalid, stop here */
1168 for (; skip && ((*t & 0xc0) == 0x80); --skip)
1180 /* ASCII character, common case */
1190 /* utf8_safe_convert_to_u2s ****************************************************
1192 Convert the given UTF-8 string to UTF-16 into a pre-allocated buffer.
1193 (Invalid UTF-8 will be replaced with the U+fffd replacement character.)
1194 Use utf8_safe_number_of_u2s to determine the number of u2s to allocate.
1196 This function is safe even for invalid UTF-8 strings.
1199 text..........zero-terminated(!) UTF-8 string (may be invalid)
1201 nbytes........strlen(text). (This is needed to completely emulate
1203 buffer........a preallocated array of u2s to receive the decoded
1204 string. Use utf8_safe_number_of_u2s to get the
1205 required number of u2s for allocating this.
1207 *******************************************************************************/
1209 #define UNICODE_REPLACEMENT 0xfffd
1211 void utf8_safe_convert_to_u2s(const char *text, s4 nbytes, u2 *buffer) {
1212 register const unsigned char *t;
1214 register const unsigned char *tlimit;
1222 assert(nbytes >= 0);
1224 t = (const unsigned char *) text;
1225 tlimit = t + nbytes;
1227 /* CAUTION: Keep this code in sync with utf8_safe_number_of_u2s! */
1233 /* highest bit set, non-ASCII character */
1235 if ((byte & 0xe0) == 0xc0) {
1236 /* 2-byte: should be 110..... 10...... */
1238 if (((byte1 = *t++) & 0xc0) == 0x80) {
1239 /* valid 2-byte UTF-8 */
1240 *buffer++ = ((byte & 0x1f) << 6)
1241 | ((byte1 & 0x3f) );
1244 *buffer++ = UNICODE_REPLACEMENT;
1248 else if ((byte & 0xf0) == 0xe0) {
1249 /* 3-byte: should be 1110.... 10...... 10...... */
1251 if (t + 2 > tlimit) {
1252 *buffer++ = UNICODE_REPLACEMENT;
1256 if (((byte1 = *t++) & 0xc0) == 0x80) {
1257 if (((byte2 = *t++) & 0xc0) == 0x80) {
1258 /* valid 3-byte UTF-8 */
1259 *buffer++ = ((byte & 0x0f) << 12)
1260 | ((byte1 & 0x3f) << 6)
1261 | ((byte2 & 0x3f) );
1264 *buffer++ = UNICODE_REPLACEMENT;
1269 *buffer++ = UNICODE_REPLACEMENT;
1273 else if ((byte & 0xf8) == 0xf0) {
1274 /* 4-byte: should be 11110... 10...... 10...... 10...... */
1276 if (t + 3 > tlimit) {
1277 *buffer++ = UNICODE_REPLACEMENT;
1281 if (((byte1 = *t++) & 0xc0) == 0x80) {
1282 if (((byte2 = *t++) & 0xc0) == 0x80) {
1283 if (((byte3 = *t++) & 0xc0) == 0x80) {
1284 /* valid 4-byte UTF-8? */
1285 value = ((byte & 0x07) << 18)
1286 | ((byte1 & 0x3f) << 12)
1287 | ((byte2 & 0x3f) << 6)
1288 | ((byte3 & 0x3f) );
1290 if (value > 0x10FFFF) {
1291 *buffer++ = UNICODE_REPLACEMENT;
1293 else if (value > 0xFFFF) {
1294 /* we need surrogates */
1295 *buffer++ = 0xd800 | ((value >> 10) - 0x40);
1296 *buffer++ = 0xdc00 | (value & 0x03ff);
1299 *buffer++ = value; /* 16bit suffice */
1302 *buffer++ = UNICODE_REPLACEMENT;
1307 *buffer++ = UNICODE_REPLACEMENT;
1312 *buffer++ = UNICODE_REPLACEMENT;
1316 else if ((byte & 0xfc) == 0xf8) {
1317 if (t + 4 > tlimit) {
1318 *buffer++ = UNICODE_REPLACEMENT;
1323 for (; skip && ((*t & 0xc0) == 0x80); --skip)
1325 *buffer++ = UNICODE_REPLACEMENT;
1327 else if ((byte & 0xfe) == 0xfc) {
1328 if (t + 5 > tlimit) {
1329 *buffer++ = UNICODE_REPLACEMENT;
1334 for (; skip && ((*t & 0xc0) == 0x80); --skip)
1336 *buffer++ = UNICODE_REPLACEMENT;
1339 *buffer++ = UNICODE_REPLACEMENT;
1347 /* ASCII character, common case */
1355 /* u2_utflength ****************************************************************
1357 Returns the utf length in bytes of a u2 array.
1359 *******************************************************************************/
1361 u4 u2_utflength(u2 *text, u4 u2_length)
1363 u4 result_len = 0; /* utf length in bytes */
1364 u2 ch; /* current unicode character */
1367 for (len = 0; len < u2_length; len++) {
1368 /* next unicode character */
1371 /* determine bytes required to store unicode character as utf */
1372 if (ch && (ch < 0x80))
1374 else if (ch < 0x800)
1384 /* utf_copy ********************************************************************
1386 Copy the given utf string byte-for-byte to a buffer.
1389 buffer.......the buffer
1390 u............the utf string
1392 *******************************************************************************/
1394 void utf_copy(char *buffer, utf *u)
1396 /* our utf strings are zero-terminated (done by utf_new) */
1397 MCOPY(buffer, u->text, char, u->blength + 1);
1401 /* utf_cat *********************************************************************
1403 Append the given utf string byte-for-byte to a buffer.
1406 buffer.......the buffer
1407 u............the utf string
1409 *******************************************************************************/
1411 void utf_cat(char *buffer, utf *u)
1413 /* our utf strings are zero-terminated (done by utf_new) */
1414 MCOPY(buffer + strlen(buffer), u->text, char, u->blength + 1);
1418 /* utf_copy_classname **********************************************************
1420 Copy the given utf classname byte-for-byte to a buffer.
1421 '/' is replaced by '.'
1424 buffer.......the buffer
1425 u............the utf string
1427 *******************************************************************************/
1429 void utf_copy_classname(char *buffer, utf *u)
1438 endptr = UTF_END(u) + 1; /* utfs are zero-terminared by utf_new */
1440 while (srcptr != endptr) {
1449 /* utf_cat *********************************************************************
1451 Append the given utf classname byte-for-byte to a buffer.
1452 '/' is replaced by '.'
1455 buffer.......the buffer
1456 u............the utf string
1458 *******************************************************************************/
1460 void utf_cat_classname(char *buffer, utf *u)
1462 utf_copy_classname(buffer + strlen(buffer), u);
1465 /* utf_display_printable_ascii *************************************************
1467 Write utf symbol to stdout (for debugging purposes).
1468 Non-printable and non-ASCII characters are printed as '?'.
1470 *******************************************************************************/
1472 void utf_display_printable_ascii(utf *u)
1474 char *endpos; /* points behind utf string */
1475 char *utf_ptr; /* current position in utf text */
1483 endpos = UTF_END(u);
1486 while (utf_ptr < endpos) {
1487 /* read next unicode character */
1489 u2 c = utf_nextu2(&utf_ptr);
1491 if ((c >= 32) && (c <= 127))
1501 /* utf_display_printable_ascii_classname ***************************************
1503 Write utf symbol to stdout with `/' converted to `.' (for debugging
1505 Non-printable and non-ASCII characters are printed as '?'.
1507 *******************************************************************************/
1509 void utf_display_printable_ascii_classname(utf *u)
1511 char *endpos; /* points behind utf string */
1512 char *utf_ptr; /* current position in utf text */
1520 endpos = UTF_END(u);
1523 while (utf_ptr < endpos) {
1524 /* read next unicode character */
1526 u2 c = utf_nextu2(&utf_ptr);
1531 if ((c >= 32) && (c <= 127))
1541 /* utf_sprint_convert_to_latin1 ************************************************
1543 Write utf symbol into c-string (for debugging purposes).
1544 Characters are converted to 8-bit Latin-1, non-Latin-1 characters yield
1547 *******************************************************************************/
1549 void utf_sprint_convert_to_latin1(char *buffer, utf *u)
1551 char *endpos; /* points behind utf string */
1552 char *utf_ptr; /* current position in utf text */
1553 u2 pos = 0; /* position in c-string */
1556 strcpy(buffer, "NULL");
1560 endpos = UTF_END(u);
1563 while (utf_ptr < endpos)
1564 /* copy next unicode character */
1565 buffer[pos++] = utf_nextu2(&utf_ptr);
1567 /* terminate string */
1572 /* utf_sprint_convert_to_latin1_classname **************************************
1574 Write utf symbol into c-string with `/' converted to `.' (for debugging
1576 Characters are converted to 8-bit Latin-1, non-Latin-1 characters yield
1579 *******************************************************************************/
1581 void utf_sprint_convert_to_latin1_classname(char *buffer, utf *u)
1583 char *endpos; /* points behind utf string */
1584 char *utf_ptr; /* current position in utf text */
1585 u2 pos = 0; /* position in c-string */
1588 strcpy(buffer, "NULL");
1592 endpos = UTF_END(u);
1595 while (utf_ptr < endpos) {
1596 /* copy next unicode character */
1597 u2 c = utf_nextu2(&utf_ptr);
1598 if (c == '/') c = '.';
1602 /* terminate string */
1607 /* utf_strcat_convert_to_latin1 ************************************************
1609 Like libc strcat, but uses an utf8 string.
1610 Characters are converted to 8-bit Latin-1, non-Latin-1 characters yield
1613 *******************************************************************************/
1615 void utf_strcat_convert_to_latin1(char *buffer, utf *u)
1617 utf_sprint_convert_to_latin1(buffer + strlen(buffer), u);
1621 /* utf_strcat_convert_to_latin1_classname **************************************
1623 Like libc strcat, but uses an utf8 string.
1624 Characters are converted to 8-bit Latin-1, non-Latin-1 characters yield
1627 *******************************************************************************/
1629 void utf_strcat_convert_to_latin1_classname(char *buffer, utf *u)
1631 utf_sprint_convert_to_latin1_classname(buffer + strlen(buffer), u);
1635 /* utf_fprint_printable_ascii **************************************************
1637 Write utf symbol into file.
1638 Non-printable and non-ASCII characters are printed as '?'.
1640 *******************************************************************************/
1642 void utf_fprint_printable_ascii(FILE *file, utf *u)
1644 char *endpos; /* points behind utf string */
1645 char *utf_ptr; /* current position in utf text */
1650 endpos = UTF_END(u);
1653 while (utf_ptr < endpos) {
1654 /* read next unicode character */
1655 u2 c = utf_nextu2(&utf_ptr);
1657 if (c >= 32 && c <= 127) fprintf(file, "%c", c);
1658 else fprintf(file, "?");
1663 /* utf_fprint_printable_ascii_classname ****************************************
1665 Write utf symbol into file with `/' converted to `.'.
1666 Non-printable and non-ASCII characters are printed as '?'.
1668 *******************************************************************************/
1670 void utf_fprint_printable_ascii_classname(FILE *file, utf *u)
1672 char *endpos; /* points behind utf string */
1673 char *utf_ptr; /* current position in utf text */
1678 endpos = UTF_END(u);
1681 while (utf_ptr < endpos) {
1682 /* read next unicode character */
1683 u2 c = utf_nextu2(&utf_ptr);
1684 if (c == '/') c = '.';
1686 if (c >= 32 && c <= 127) fprintf(file, "%c", c);
1687 else fprintf(file, "?");
1692 /* is_valid_utf ****************************************************************
1694 Return true if the given string is a valid UTF-8 string.
1696 utf_ptr...points to first character
1697 end_pos...points after last character
1699 *******************************************************************************/
1701 /* static unsigned long min_codepoint[6] = {0,1L<<7,1L<<11,1L<<16,1L<<21,1L<<26}; */
1703 bool is_valid_utf(char *utf_ptr, char *end_pos)
1710 if (end_pos < utf_ptr) return false;
1711 bytes = end_pos - utf_ptr;
1715 if (!c) return false; /* 0x00 is not allowed */
1716 if ((c & 0x80) == 0) continue; /* ASCII */
1718 if ((c & 0xe0) == 0xc0) len = 1; /* 110x xxxx */
1719 else if ((c & 0xf0) == 0xe0) len = 2; /* 1110 xxxx */
1720 else if ((c & 0xf8) == 0xf0) len = 3; /* 1111 0xxx */
1721 else if ((c & 0xfc) == 0xf8) len = 4; /* 1111 10xx */
1722 else if ((c & 0xfe) == 0xfc) len = 5; /* 1111 110x */
1723 else return false; /* invalid leading byte */
1725 if (len > 2) return false; /* Java limitation */
1727 v = (unsigned long)c & (0x3f >> len);
1729 if ((bytes -= len) < 0) return false; /* missing bytes */
1731 for (i = len; i--; ) {
1733 if ((c & 0xc0) != 0x80) /* 10xx xxxx */
1735 v = (v << 6) | (c & 0x3f);
1739 if (len != 1) return false; /* Java special */
1742 /* Sun Java seems to allow overlong UTF-8 encodings */
1744 /* if (v < min_codepoint[len]) */
1745 /* XXX throw exception? */
1748 /* surrogates in UTF-8 seem to be allowed in Java classfiles */
1749 /* if (v >= 0xd800 && v <= 0xdfff) return false; */ /* surrogates */
1751 /* even these seem to be allowed */
1752 /* if (v == 0xfffe || v == 0xffff) return false; */ /* invalid codepoints */
1759 /* is_valid_name ***************************************************************
1761 Return true if the given string may be used as a class/field/method
1762 name. (Currently this only disallows empty strings and control
1765 NOTE: The string is assumed to have passed is_valid_utf!
1767 utf_ptr...points to first character
1768 end_pos...points after last character
1770 *******************************************************************************/
1772 bool is_valid_name(char *utf_ptr, char *end_pos)
1774 if (end_pos <= utf_ptr) return false; /* disallow empty names */
1776 while (utf_ptr < end_pos) {
1777 unsigned char c = *utf_ptr++;
1779 if (c < 0x20) return false; /* disallow control characters */
1780 if (c == 0xc0 && (unsigned char) *utf_ptr == 0x80) /* disallow zero */
1787 bool is_valid_name_utf(utf *u)
1789 return is_valid_name(u->text, UTF_END(u));
1793 /* utf_show ********************************************************************
1795 Writes the utf symbols in the utfhash to stdout and displays the
1796 number of external hash chains grouped according to the chainlength
1797 (for debugging purposes).
1799 *******************************************************************************/
1801 #if !defined(NDEBUG)
1805 #define CHAIN_LIMIT 20 /* limit for seperated enumeration */
1807 u4 chain_count[CHAIN_LIMIT]; /* numbers of chains */
1808 u4 max_chainlength = 0; /* maximum length of the chains */
1809 u4 sum_chainlength = 0; /* sum of the chainlengths */
1810 u4 beyond_limit = 0; /* number of utf-symbols in chains with length>=CHAIN_LIMIT-1 */
1813 printf("UTF-HASH:\n");
1815 /* show element of utf-hashtable */
1817 for (i = 0; i < hashtable_utf->size; i++) {
1818 utf *u = hashtable_utf->ptr[i];
1821 printf("SLOT %d: ", (int) i);
1825 utf_display_printable_ascii(u);
1833 printf("UTF-HASH: %d slots for %d entries\n",
1834 (int) hashtable_utf->size, (int) hashtable_utf->entries );
1836 if (hashtable_utf->entries == 0)
1839 printf("chains:\n chainlength number of chains %% of utfstrings\n");
1841 for (i=0;i<CHAIN_LIMIT;i++)
1844 /* count numbers of hashchains according to their length */
1845 for (i=0; i<hashtable_utf->size; i++) {
1847 utf *u = (utf*) hashtable_utf->ptr[i];
1848 u4 chain_length = 0;
1850 /* determine chainlength */
1856 /* update sum of all chainlengths */
1857 sum_chainlength+=chain_length;
1859 /* determine the maximum length of the chains */
1860 if (chain_length>max_chainlength)
1861 max_chainlength = chain_length;
1863 /* update number of utf-symbols in chains with length>=CHAIN_LIMIT-1 */
1864 if (chain_length>=CHAIN_LIMIT) {
1865 beyond_limit+=chain_length;
1866 chain_length=CHAIN_LIMIT-1;
1869 /* update number of hashchains of current length */
1870 chain_count[chain_length]++;
1873 /* display results */
1874 for (i=1;i<CHAIN_LIMIT-1;i++)
1875 printf(" %2d %17d %18.2f%%\n",i,chain_count[i],(((float) chain_count[i]*i*100)/hashtable_utf->entries));
1877 printf(" >=%2d %17d %18.2f%%\n",CHAIN_LIMIT-1,chain_count[CHAIN_LIMIT-1],((float) beyond_limit*100)/hashtable_utf->entries);
1880 printf("max. chainlength:%5d\n",max_chainlength);
1882 /* avg. chainlength = sum of chainlengths / number of chains */
1883 printf("avg. chainlength:%5.2f\n",(float) sum_chainlength / (hashtable_utf->size-chain_count[0]));
1885 #endif /* !defined(NDEBUG) */
1889 * These are local overrides for various environment variables in Emacs.
1890 * Please do not remove this and leave it at the end of the file, where
1891 * Emacs will automagically detect them.
1892 * ---------------------------------------------------------------------
1895 * indent-tabs-mode: t
1899 * vim:noexpandtab:sw=4:ts=4: