1 /* src/vmcore/utf8.c - utf8 string functions
3 Copyright (C) 1996-2005, 2006, 2007 R. Grafl, A. Krall, C. Kruegel,
4 C. Oates, R. Obermaisser, M. Platter, M. Probst, S. Ring,
5 E. Steiner, C. Thalinger, D. Thuernbeck, P. Tomsich, C. Ullrich,
6 J. Wenninger, Institut f. Computersprachen - TU Wien
8 This file is part of CACAO.
10 This program is free software; you can redistribute it and/or
11 modify it under the terms of the GNU General Public License as
12 published by the Free Software Foundation; either version 2, or (at
13 your option) any later version.
15 This program is distributed in the hope that it will be useful, but
16 WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
25 $Id: utf8.c 8123 2007-06-20 23:50:55Z michi $
37 #include "mm/memory.h"
39 #include "threads/lock-common.h"
41 #include "toolbox/hashtable.h"
43 #include "vm/exceptions.h"
45 #include "vmcore/options.h"
47 #if defined(ENABLE_STATISTICS)
48 # include "vmcore/statistics.h"
51 #include "vmcore/utf8.h"
54 /* global variables ***********************************************************/
56 /* hashsize must be power of 2 */
58 #define HASHTABLE_UTF_SIZE 16384 /* initial size of utf-hash */
60 hashtable *hashtable_utf; /* hashtable for utf8-symbols */
63 /* utf-symbols for pointer comparison of frequently used strings **************/
65 utf *utf_java_lang_Object;
67 utf *utf_java_lang_Class;
68 utf *utf_java_lang_ClassLoader;
69 utf *utf_java_lang_Cloneable;
70 utf *utf_java_lang_SecurityManager;
71 utf *utf_java_lang_String;
72 utf *utf_java_lang_System;
73 utf *utf_java_lang_ThreadGroup;
74 utf *utf_java_lang_ref_SoftReference;
75 utf *utf_java_lang_ref_WeakReference;
76 utf *utf_java_lang_ref_PhantomReference;
77 utf *utf_java_io_Serializable;
79 utf *utf_java_lang_Throwable;
80 utf *utf_java_lang_Error;
82 utf *utf_java_lang_AbstractMethodError;
83 utf *utf_java_lang_ClassCircularityError;
84 utf *utf_java_lang_ClassFormatError;
85 utf *utf_java_lang_ExceptionInInitializerError;
86 utf *utf_java_lang_IncompatibleClassChangeError;
87 utf *utf_java_lang_InstantiationError;
88 utf *utf_java_lang_InternalError;
89 utf *utf_java_lang_LinkageError;
90 utf *utf_java_lang_NoClassDefFoundError;
91 utf *utf_java_lang_NoSuchFieldError;
92 utf *utf_java_lang_NoSuchMethodError;
93 utf *utf_java_lang_OutOfMemoryError;
94 utf *utf_java_lang_UnsatisfiedLinkError;
95 utf *utf_java_lang_UnsupportedClassVersionError;
96 utf *utf_java_lang_VerifyError;
97 utf *utf_java_lang_VirtualMachineError;
99 #if defined(WITH_CLASSPATH_GNU)
100 utf *utf_java_lang_VMThrowable;
103 utf *utf_java_lang_Exception;
105 utf *utf_java_lang_ArithmeticException;
106 utf *utf_java_lang_ArrayIndexOutOfBoundsException;
107 utf *utf_java_lang_ArrayStoreException;
108 utf *utf_java_lang_ClassCastException;
109 utf *utf_java_lang_ClassNotFoundException;
110 utf *utf_java_lang_CloneNotSupportedException;
111 utf *utf_java_lang_IllegalAccessException;
112 utf *utf_java_lang_IllegalArgumentException;
113 utf *utf_java_lang_IllegalMonitorStateException;
114 utf *utf_java_lang_InstantiationException;
115 utf *utf_java_lang_InterruptedException;
116 utf *utf_java_lang_NegativeArraySizeException;
117 utf *utf_java_lang_NullPointerException;
118 utf *utf_java_lang_StringIndexOutOfBoundsException;
120 utf *utf_java_lang_reflect_InvocationTargetException;
122 utf *utf_java_security_PrivilegedActionException;
124 #if defined(ENABLE_JAVASE)
125 utf* utf_java_lang_Void;
128 utf* utf_java_lang_Boolean;
129 utf* utf_java_lang_Byte;
130 utf* utf_java_lang_Character;
131 utf* utf_java_lang_Short;
132 utf* utf_java_lang_Integer;
133 utf* utf_java_lang_Long;
134 utf* utf_java_lang_Float;
135 utf* utf_java_lang_Double;
137 #if defined(ENABLE_JAVASE)
138 utf *utf_java_lang_StackTraceElement;
139 utf *utf_java_lang_reflect_Constructor;
140 utf *utf_java_lang_reflect_Field;
141 utf *utf_java_lang_reflect_Method;
142 utf *utf_java_util_Vector;
145 utf *utf_InnerClasses; /* InnerClasses */
146 utf *utf_ConstantValue; /* ConstantValue */
147 utf *utf_Code; /* Code */
148 utf *utf_Exceptions; /* Exceptions */
149 utf *utf_LineNumberTable; /* LineNumberTable */
150 utf *utf_SourceFile; /* SourceFile */
152 #if defined(ENABLE_JAVASE)
153 utf *utf_EnclosingMethod;
155 utf *utf_RuntimeVisibleAnnotations;
156 utf *utf_StackMapTable;
159 utf *utf_init; /* <init> */
160 utf *utf_clinit; /* <clinit> */
161 utf *utf_clone; /* clone */
162 utf *utf_finalize; /* finalize */
163 utf *utf_run; /* run */
168 utf *utf_removeThread;
173 utf *utf_fillInStackTrace;
174 utf *utf_getSystemClassLoader;
177 utf *utf_printStackTrace;
179 utf *utf_division_by_zero;
190 utf *utf_void__void; /* ()V */
191 utf *utf_boolean__void; /* (Z)V */
192 utf *utf_byte__void; /* (B)V */
193 utf *utf_char__void; /* (C)V */
194 utf *utf_short__void; /* (S)V */
195 utf *utf_int__void; /* (I)V */
196 utf *utf_long__void; /* (J)V */
197 utf *utf_float__void; /* (F)V */
198 utf *utf_double__void; /* (D)V */
200 utf *utf_void__java_lang_ClassLoader; /* ()Ljava/lang/ClassLoader; */
201 utf *utf_void__java_lang_Object; /* ()Ljava/lang/Object; */
202 utf *utf_void__java_lang_Throwable; /* ()Ljava/lang/Throwable; */
203 utf *utf_java_lang_Exception__V; /* (Ljava/lang/Exception;)V */
204 utf *utf_java_lang_Object__java_lang_Object;
205 utf *utf_java_lang_String__void; /* (Ljava/lang/String;)V */
206 utf *utf_java_lang_String__java_lang_Class;
207 utf *utf_java_lang_Thread__V; /* (Ljava/lang/Thread;)V */
208 utf *utf_java_lang_Throwable__void; /* (Ljava/lang/Throwable;)V */
209 utf *utf_java_lang_Throwable__java_lang_Throwable;
211 utf *utf_not_named_yet; /* special name for unnamed classes */
213 utf *array_packagename;
216 /* utf_init ********************************************************************
218 Initializes the utf8 subsystem.
220 *******************************************************************************/
224 /* create utf8 hashtable */
226 hashtable_utf = NEW(hashtable);
228 hashtable_create(hashtable_utf, HASHTABLE_UTF_SIZE);
230 #if defined(ENABLE_STATISTICS)
232 count_utf_len += sizeof(utf*) * hashtable_utf->size;
235 /* create utf-symbols for pointer comparison of frequently used strings */
237 utf_java_lang_Object = utf_new_char("java/lang/Object");
239 utf_java_lang_Class = utf_new_char("java/lang/Class");
240 utf_java_lang_ClassLoader = utf_new_char("java/lang/ClassLoader");
241 utf_java_lang_Cloneable = utf_new_char("java/lang/Cloneable");
242 utf_java_lang_SecurityManager = utf_new_char("java/lang/SecurityManager");
243 utf_java_lang_String = utf_new_char("java/lang/String");
244 utf_java_lang_System = utf_new_char("java/lang/System");
245 utf_java_lang_ThreadGroup = utf_new_char("java/lang/ThreadGroup");
247 utf_java_lang_ref_SoftReference =
248 utf_new_char("java/lang/ref/SoftReference");
250 utf_java_lang_ref_WeakReference =
251 utf_new_char("java/lang/ref/WeakReference");
253 utf_java_lang_ref_PhantomReference =
254 utf_new_char("java/lang/ref/PhantomReference");
256 utf_java_io_Serializable = utf_new_char("java/io/Serializable");
258 utf_java_lang_Throwable = utf_new_char("java/lang/Throwable");
259 utf_java_lang_Error = utf_new_char("java/lang/Error");
261 utf_java_lang_ClassCircularityError =
262 utf_new_char("java/lang/ClassCircularityError");
264 utf_java_lang_ClassFormatError = utf_new_char("java/lang/ClassFormatError");
266 utf_java_lang_ExceptionInInitializerError =
267 utf_new_char("java/lang/ExceptionInInitializerError");
269 utf_java_lang_IncompatibleClassChangeError =
270 utf_new_char("java/lang/IncompatibleClassChangeError");
272 utf_java_lang_InstantiationError =
273 utf_new_char("java/lang/InstantiationError");
275 utf_java_lang_InternalError = utf_new_char("java/lang/InternalError");
276 utf_java_lang_LinkageError = utf_new_char("java/lang/LinkageError");
278 utf_java_lang_NoClassDefFoundError =
279 utf_new_char("java/lang/NoClassDefFoundError");
281 utf_java_lang_OutOfMemoryError = utf_new_char("java/lang/OutOfMemoryError");
283 utf_java_lang_UnsatisfiedLinkError =
284 utf_new_char("java/lang/UnsatisfiedLinkError");
286 utf_java_lang_UnsupportedClassVersionError =
287 utf_new_char("java/lang/UnsupportedClassVersionError");
289 utf_java_lang_VerifyError = utf_new_char("java/lang/VerifyError");
291 utf_java_lang_VirtualMachineError =
292 utf_new_char("java/lang/VirtualMachineError");
294 #if defined(ENABLE_JAVASE)
295 utf_java_lang_AbstractMethodError =
296 utf_new_char("java/lang/AbstractMethodError");
298 utf_java_lang_NoSuchFieldError =
299 utf_new_char("java/lang/NoSuchFieldError");
301 utf_java_lang_NoSuchMethodError =
302 utf_new_char("java/lang/NoSuchMethodError");
305 #if defined(WITH_CLASSPATH_GNU)
306 utf_java_lang_VMThrowable = utf_new_char("java/lang/VMThrowable");
309 utf_java_lang_Exception = utf_new_char("java/lang/Exception");
311 utf_java_lang_ArithmeticException =
312 utf_new_char("java/lang/ArithmeticException");
314 utf_java_lang_ArrayIndexOutOfBoundsException =
315 utf_new_char("java/lang/ArrayIndexOutOfBoundsException");
317 utf_java_lang_ArrayStoreException =
318 utf_new_char("java/lang/ArrayStoreException");
320 utf_java_lang_ClassCastException =
321 utf_new_char("java/lang/ClassCastException");
323 utf_java_lang_ClassNotFoundException =
324 utf_new_char("java/lang/ClassNotFoundException");
326 utf_java_lang_CloneNotSupportedException =
327 utf_new_char("java/lang/CloneNotSupportedException");
329 utf_java_lang_IllegalAccessException =
330 utf_new_char("java/lang/IllegalAccessException");
332 utf_java_lang_IllegalArgumentException =
333 utf_new_char("java/lang/IllegalArgumentException");
335 utf_java_lang_IllegalMonitorStateException =
336 utf_new_char("java/lang/IllegalMonitorStateException");
338 utf_java_lang_InstantiationException =
339 utf_new_char("java/lang/InstantiationException");
341 utf_java_lang_InterruptedException =
342 utf_new_char("java/lang/InterruptedException");
344 utf_java_lang_NegativeArraySizeException =
345 utf_new_char("java/lang/NegativeArraySizeException");
347 utf_java_lang_NullPointerException =
348 utf_new_char("java/lang/NullPointerException");
350 utf_java_lang_StringIndexOutOfBoundsException =
351 utf_new_char("java/lang/StringIndexOutOfBoundsException");
353 utf_java_lang_reflect_InvocationTargetException =
354 utf_new_char("java/lang/reflect/InvocationTargetException");
356 utf_java_security_PrivilegedActionException =
357 utf_new_char("java/security/PrivilegedActionException");
359 #if defined(ENABLE_JAVASE)
360 utf_java_lang_Void = utf_new_char("java/lang/Void");
363 utf_java_lang_Boolean = utf_new_char("java/lang/Boolean");
364 utf_java_lang_Byte = utf_new_char("java/lang/Byte");
365 utf_java_lang_Character = utf_new_char("java/lang/Character");
366 utf_java_lang_Short = utf_new_char("java/lang/Short");
367 utf_java_lang_Integer = utf_new_char("java/lang/Integer");
368 utf_java_lang_Long = utf_new_char("java/lang/Long");
369 utf_java_lang_Float = utf_new_char("java/lang/Float");
370 utf_java_lang_Double = utf_new_char("java/lang/Double");
372 #if defined(ENABLE_JAVASE)
373 utf_java_lang_StackTraceElement =
374 utf_new_char("java/lang/StackTraceElement");
376 utf_java_lang_reflect_Constructor =
377 utf_new_char("java/lang/reflect/Constructor");
379 utf_java_lang_reflect_Field = utf_new_char("java/lang/reflect/Field");
380 utf_java_lang_reflect_Method = utf_new_char("java/lang/reflect/Method");
381 utf_java_util_Vector = utf_new_char("java/util/Vector");
384 utf_InnerClasses = utf_new_char("InnerClasses");
385 utf_ConstantValue = utf_new_char("ConstantValue");
386 utf_Code = utf_new_char("Code");
387 utf_Exceptions = utf_new_char("Exceptions");
388 utf_LineNumberTable = utf_new_char("LineNumberTable");
389 utf_SourceFile = utf_new_char("SourceFile");
391 #if defined(ENABLE_JAVASE)
392 utf_EnclosingMethod = utf_new_char("EnclosingMethod");
393 utf_Signature = utf_new_char("Signature");
394 utf_RuntimeVisibleAnnotations = utf_new_char("RuntimeVisibleAnnotations");
395 utf_StackMapTable = utf_new_char("StackMapTable");
398 utf_init = utf_new_char("<init>");
399 utf_clinit = utf_new_char("<clinit>");
400 utf_clone = utf_new_char("clone");
401 utf_finalize = utf_new_char("finalize");
402 utf_run = utf_new_char("run");
404 utf_add = utf_new_char("add");
405 utf_remove = utf_new_char("remove");
406 utf_addThread = utf_new_char("addThread");
407 utf_removeThread = utf_new_char("removeThread");
408 utf_put = utf_new_char("put");
409 utf_get = utf_new_char("get");
410 utf_value = utf_new_char("value");
412 utf_fillInStackTrace = utf_new_char("fillInStackTrace");
413 utf_getSystemClassLoader = utf_new_char("getSystemClassLoader");
414 utf_initCause = utf_new_char("initCause");
415 utf_loadClass = utf_new_char("loadClass");
416 utf_printStackTrace = utf_new_char("printStackTrace");
418 utf_division_by_zero = utf_new_char("/ by zero");
420 utf_Z = utf_new_char("Z");
421 utf_B = utf_new_char("B");
422 utf_C = utf_new_char("C");
423 utf_S = utf_new_char("S");
424 utf_I = utf_new_char("I");
425 utf_J = utf_new_char("J");
426 utf_F = utf_new_char("F");
427 utf_D = utf_new_char("D");
429 utf_void__void = utf_new_char("()V");
430 utf_boolean__void = utf_new_char("(Z)V");
431 utf_byte__void = utf_new_char("(B)V");
432 utf_char__void = utf_new_char("(C)V");
433 utf_short__void = utf_new_char("(S)V");
434 utf_int__void = utf_new_char("(I)V");
435 utf_long__void = utf_new_char("(J)V");
436 utf_float__void = utf_new_char("(F)V");
437 utf_double__void = utf_new_char("(D)V");
438 utf_void__java_lang_Object = utf_new_char("()Ljava/lang/Object;");
439 utf_void__java_lang_Throwable = utf_new_char("()Ljava/lang/Throwable;");
441 utf_void__java_lang_ClassLoader =
442 utf_new_char("()Ljava/lang/ClassLoader;");
444 utf_java_lang_Exception__V = utf_new_char("(Ljava/lang/Exception;)V");
446 utf_java_lang_Object__java_lang_Object =
447 utf_new_char("(Ljava/lang/Object;)Ljava/lang/Object;");
449 utf_java_lang_String__void = utf_new_char("(Ljava/lang/String;)V");
451 utf_java_lang_String__java_lang_Class =
452 utf_new_char("(Ljava/lang/String;)Ljava/lang/Class;");
454 utf_java_lang_Thread__V = utf_new_char("(Ljava/lang/Thread;)V");
455 utf_java_lang_Throwable__void = utf_new_char("(Ljava/lang/Throwable;)V");
457 utf_java_lang_Throwable__java_lang_Throwable =
458 utf_new_char("(Ljava/lang/Throwable;)Ljava/lang/Throwable;");
460 utf_null = utf_new_char("null");
461 utf_not_named_yet = utf_new_char("\t<not_named_yet>");
462 array_packagename = utf_new_char("\t<the array package>");
464 /* everything's ok */
470 /* utf_hashkey *****************************************************************
472 The hashkey is computed from the utf-text by using up to 8
473 characters. For utf-symbols longer than 15 characters 3 characters
474 are taken from the beginning and the end, 2 characters are taken
477 *******************************************************************************/
479 #define nbs(val) ((u4) *(++text) << val) /* get next byte, left shift by val */
480 #define fbs(val) ((u4) *( text) << val) /* get first byte, left shift by val */
482 u4 utf_hashkey(const char *text, u4 length)
484 const char *start_pos = text; /* pointer to utf text */
488 case 0: /* empty string */
491 case 1: return fbs(0);
492 case 2: return fbs(0) ^ nbs(3);
493 case 3: return fbs(0) ^ nbs(3) ^ nbs(5);
494 case 4: return fbs(0) ^ nbs(2) ^ nbs(4) ^ nbs(6);
495 case 5: return fbs(0) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(6);
496 case 6: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(5) ^ nbs(6);
497 case 7: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(5) ^ nbs(6);
498 case 8: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(5) ^ nbs(6) ^ nbs(7);
505 return a ^ nbs(4) ^ nbs(5) ^ nbs(6) ^ nbs(7) ^ nbs(8);
514 return a ^ nbs(6) ^ nbs(7) ^ nbs(8) ^ nbs(9);
523 return a ^ nbs(6) ^ nbs(7) ^ nbs(8) ^ nbs(9) ^ nbs(10);
535 return a ^ nbs(9) ^ nbs(10);
547 return a ^ nbs(9) ^ nbs(10);
558 return a ^ nbs(9) ^ nbs(10) ^ nbs(11);
569 return a ^ nbs(9) ^ nbs(10) ^ nbs(11);
571 default: /* 3 characters from beginning */
577 /* 2 characters from middle */
578 text = start_pos + (length / 2);
583 /* 3 characters from end */
584 text = start_pos + length - 4;
589 return a ^ nbs(10) ^ nbs(11);
593 /* utf_full_hashkey ************************************************************
595 This function computes a hash value using all bytes in the string.
597 The algorithm is the "One-at-a-time" algorithm as published
598 by Bob Jenkins on http://burtleburtle.net/bob/hash/doobs.html.
600 *******************************************************************************/
602 u4 utf_full_hashkey(const char *text, u4 length)
604 register const unsigned char *p = (const unsigned char *) text;
612 hash += (hash << 10);
616 hash ^= (hash >> 11);
617 hash += (hash << 15);
622 /* unicode_hashkey *************************************************************
624 Compute the hashkey of a unicode string.
626 *******************************************************************************/
628 u4 unicode_hashkey(u2 *text, u2 len)
630 return utf_hashkey((char *) text, len);
634 /* utf_new *********************************************************************
636 Creates a new utf-symbol, the text of the symbol is passed as a
637 u1-array. The function searches the utf-hashtable for a utf-symbol
638 with this text. On success the element returned, otherwise a new
639 hashtable element is created.
641 If the number of entries in the hashtable exceeds twice the size of
642 the hashtable slots a reorganization of the hashtable is done and
643 the utf symbols are copied to a new hashtable with doubled size.
645 *******************************************************************************/
647 utf *utf_new(const char *text, u2 length)
649 u4 key; /* hashkey computed from utf-text */
650 u4 slot; /* slot in hashtable */
651 utf *u; /* hashtable element */
654 LOCK_MONITOR_ENTER(hashtable_utf->header);
656 #if defined(ENABLE_STATISTICS)
661 key = utf_hashkey(text, length);
662 slot = key & (hashtable_utf->size - 1);
663 u = hashtable_utf->ptr[slot];
665 /* search external hash chain for utf-symbol */
668 if (u->blength == length) {
669 /* compare text of hashtable elements */
671 for (i = 0; i < length; i++)
672 if (text[i] != u->text[i])
675 #if defined(ENABLE_STATISTICS)
677 count_utf_new_found++;
680 /* symbol found in hashtable */
682 LOCK_MONITOR_EXIT(hashtable_utf->header);
688 u = u->hashlink; /* next element in external chain */
691 /* location in hashtable found, create new utf element */
695 u->blength = length; /* length in bytes of utfstring */
696 u->hashlink = hashtable_utf->ptr[slot]; /* link in external hashchain */
697 u->text = mem_alloc(length + 1);/* allocate memory for utf-text */
699 memcpy(u->text, text, length); /* copy utf-text */
700 u->text[length] = '\0';
702 #if defined(ENABLE_STATISTICS)
704 count_utf_len += sizeof(utf) + length + 1;
707 hashtable_utf->ptr[slot] = u; /* insert symbol into table */
708 hashtable_utf->entries++; /* update number of entries */
710 if (hashtable_utf->entries > (hashtable_utf->size * 2)) {
712 /* reorganization of hashtable, average length of the external
713 chains is approx. 2 */
715 hashtable *newhash; /* the new hashtable */
721 /* create new hashtable, double the size */
723 newhash = hashtable_resize(hashtable_utf, hashtable_utf->size * 2);
725 #if defined(ENABLE_STATISTICS)
727 count_utf_len += sizeof(utf*) * hashtable_utf->size;
730 /* transfer elements to new hashtable */
732 for (i = 0; i < hashtable_utf->size; i++) {
733 u = hashtable_utf->ptr[i];
737 slot = utf_hashkey(u->text, u->blength) & (newhash->size - 1);
739 u->hashlink = (utf *) newhash->ptr[slot];
740 newhash->ptr[slot] = u;
742 /* follow link in external hash chain */
748 /* dispose old table */
750 hashtable_free(hashtable_utf);
752 hashtable_utf = newhash;
755 LOCK_MONITOR_EXIT(hashtable_utf->header);
761 /* utf_new_u2 ******************************************************************
763 Make utf symbol from u2 array, if isclassname is true '.' is
766 *******************************************************************************/
768 utf *utf_new_u2(u2 *unicode_pos, u4 unicode_length, bool isclassname)
770 char *buffer; /* memory buffer for unicode characters */
771 char *pos; /* pointer to current position in buffer */
772 u4 left; /* unicode characters left */
773 u4 buflength; /* utf length in bytes of the u2 array */
774 utf *result; /* resulting utf-string */
777 /* determine utf length in bytes and allocate memory */
779 buflength = u2_utflength(unicode_pos, unicode_length);
780 buffer = MNEW(char, buflength);
785 for (i = 0; i++ < unicode_length; unicode_pos++) {
786 /* next unicode character */
789 if ((c != 0) && (c < 0x80)) {
792 if ((int) left < 0) break;
793 /* convert classname */
794 if (isclassname && c == '.')
799 } else if (c < 0x800) {
801 unsigned char high = c >> 6;
802 unsigned char low = c & 0x3F;
804 if ((int) left < 0) break;
805 *pos++ = high | 0xC0;
811 char mid = (c >> 6) & 0x3F;
814 if ((int) left < 0) break;
815 *pos++ = high | 0xE0;
821 /* insert utf-string into symbol-table */
822 result = utf_new(buffer,buflength);
824 MFREE(buffer, char, buflength);
830 /* utf_new_char ****************************************************************
832 Creates a new utf symbol, the text for this symbol is passed as a
833 c-string ( = char* ).
835 *******************************************************************************/
837 utf *utf_new_char(const char *text)
839 return utf_new(text, strlen(text));
843 /* utf_new_char_classname ******************************************************
845 Creates a new utf symbol, the text for this symbol is passed as a
846 c-string ( = char* ) "." characters are going to be replaced by
847 "/". Since the above function is used often, this is a separte
848 function, instead of an if.
850 *******************************************************************************/
852 utf *utf_new_char_classname(const char *text)
854 if (strchr(text, '.')) {
855 char *txt = strdup(text);
856 char *end = txt + strlen(txt);
860 for (c = txt; c < end; c++)
861 if (*c == '.') *c = '/';
863 tmpRes = utf_new(txt, strlen(txt));
869 return utf_new(text, strlen(text));
873 /* utf_nextu2 ******************************************************************
875 Read the next unicode character from the utf string and increment
876 the utf-string pointer accordingly.
878 CAUTION: This function is unsafe for input that was not checked
881 *******************************************************************************/
883 u2 utf_nextu2(char **utf_ptr)
885 /* uncompressed unicode character */
887 /* current position in utf text */
888 unsigned char *utf = (unsigned char *) (*utf_ptr);
889 /* bytes representing the unicode character */
890 unsigned char ch1, ch2, ch3;
891 /* number of bytes used to represent the unicode character */
894 switch ((ch1 = utf[0]) >> 4) {
895 default: /* 1 byte */
899 case 0xD: /* 2 bytes */
900 if (((ch2 = utf[1]) & 0xC0) == 0x80) {
901 unsigned char high = ch1 & 0x1F;
902 unsigned char low = ch2 & 0x3F;
903 unicode_char = (high << 6) + low;
908 case 0xE: /* 2 or 3 bytes */
909 if (((ch2 = utf[1]) & 0xC0) == 0x80) {
910 if (((ch3 = utf[2]) & 0xC0) == 0x80) {
911 unsigned char low = ch3 & 0x3f;
912 unsigned char mid = ch2 & 0x3f;
913 unsigned char high = ch1 & 0x0f;
914 unicode_char = (((high << 6) + mid) << 6) + low;
922 /* update position in utf-text */
923 *utf_ptr = (char *) (utf + len);
929 /* utf_bytes *******************************************************************
931 Determine number of bytes (aka. octets) in the utf string.
934 u............utf string
937 The number of octets of this utf string.
938 There is _no_ terminating zero included in this count.
940 *******************************************************************************/
948 /* utf_get_number_of_u2s_for_buffer ********************************************
950 Determine number of UTF-16 u2s in the given UTF-8 buffer
952 CAUTION: This function is unsafe for input that was not checked
955 CAUTION: Use this function *only* when you want to convert an UTF-8 buffer
956 to an array of u2s (UTF-16) and want to know how many of them you will get.
957 All other uses of this function are probably wrong.
960 buffer........points to first char in buffer
961 blength.......number of _bytes_ in the buffer
964 the number of u2s needed to hold this string in UTF-16 encoding.
965 There is _no_ terminating zero included in this count.
967 NOTE: Unlike utf_get_number_of_u2s, this function never throws an
970 *******************************************************************************/
972 u4 utf_get_number_of_u2s_for_buffer(const char *buffer, u4 blength)
974 const char *endpos; /* points behind utf string */
975 const char *utf_ptr; /* current position in utf text */
976 u4 len = 0; /* number of unicode characters */
979 endpos = utf_ptr + blength;
981 while (utf_ptr < endpos) {
983 /* next unicode character */
984 utf_nextu2((char **)&utf_ptr);
987 assert(utf_ptr == endpos);
993 /* utf_get_number_of_u2s *******************************************************
995 Determine number of UTF-16 u2s in the utf string.
997 CAUTION: This function is unsafe for input that was not checked
1000 CAUTION: Use this function *only* when you want to convert a utf string
1001 to an array of u2s and want to know how many of them you will get.
1002 All other uses of this function are probably wrong.
1005 u............utf string
1008 the number of u2s needed to hold this string in UTF-16 encoding.
1009 There is _no_ terminating zero included in this count.
1010 XXX 0 if a NullPointerException has been thrown (see below)
1012 *******************************************************************************/
1014 u4 utf_get_number_of_u2s(utf *u)
1016 char *endpos; /* points behind utf string */
1017 char *utf_ptr; /* current position in utf text */
1018 u4 len = 0; /* number of unicode characters */
1020 /* XXX this is probably not checked by most callers! Review this after */
1021 /* the invalid uses of this function have been eliminated */
1023 exceptions_throw_nullpointerexception();
1027 endpos = UTF_END(u);
1030 while (utf_ptr < endpos) {
1032 /* next unicode character */
1033 utf_nextu2(&utf_ptr);
1036 if (utf_ptr != endpos) {
1037 /* string ended abruptly */
1038 exceptions_throw_internalerror("Illegal utf8 string");
1046 /* utf8_safe_number_of_u2s *****************************************************
1048 Determine number of UTF-16 u2s needed for decoding the given UTF-8 string.
1049 (For invalid UTF-8 the U+fffd replacement character will be counted.)
1051 This function is safe even for invalid UTF-8 strings.
1054 text..........zero-terminated(!) UTF-8 string (may be invalid)
1056 nbytes........strlen(text). (This is needed to completely emulate
1060 the number of u2s needed to hold this string in UTF-16 encoding.
1061 There is _no_ terminating zero included in this count.
1063 *******************************************************************************/
1065 s4 utf8_safe_number_of_u2s(const char *text, s4 nbytes) {
1066 register const unsigned char *t;
1069 register const unsigned char *tlimit;
1077 assert(nbytes >= 0);
1080 t = (const unsigned char *) text;
1081 tlimit = t + nbytes;
1083 /* CAUTION: Keep this code in sync with utf8_safe_convert_to_u2s! */
1089 /* highest bit set, non-ASCII character */
1091 if ((byte & 0xe0) == 0xc0) {
1092 /* 2-byte: should be 110..... 10...... ? */
1094 if ((*t++ & 0xc0) == 0x80)
1095 ; /* valid 2-byte */
1099 else if ((byte & 0xf0) == 0xe0) {
1100 /* 3-byte: should be 1110.... 10...... 10...... */
1104 return len + 1; /* invalid, stop here */
1106 if ((*t++ & 0xc0) == 0x80) {
1107 if ((*t++ & 0xc0) == 0x80)
1108 ; /* valid 3-byte */
1115 else if ((byte & 0xf8) == 0xf0) {
1116 /* 4-byte: should be 11110... 10...... 10...... 10...... */
1120 return len + 1; /* invalid, stop here */
1122 if (((byte1 = *t++) & 0xc0) == 0x80) {
1123 if (((byte2 = *t++) & 0xc0) == 0x80) {
1124 if (((byte3 = *t++) & 0xc0) == 0x80) {
1125 /* valid 4-byte UTF-8? */
1126 value = ((byte & 0x07) << 18)
1127 | ((byte1 & 0x3f) << 12)
1128 | ((byte2 & 0x3f) << 6)
1129 | ((byte3 & 0x3f) );
1131 if (value > 0x10FFFF)
1133 else if (value > 0xFFFF)
1134 len += 1; /* we need surrogates */
1136 ; /* 16bit suffice */
1147 else if ((byte & 0xfc) == 0xf8) {
1148 /* invalid 5-byte */
1150 return len + 1; /* invalid, stop here */
1153 for (; skip && ((*t & 0xc0) == 0x80); --skip)
1156 else if ((byte & 0xfe) == 0xfc) {
1157 /* invalid 6-byte */
1159 return len + 1; /* invalid, stop here */
1162 for (; skip && ((*t & 0xc0) == 0x80); --skip)
1174 /* ASCII character, common case */
1184 /* utf8_safe_convert_to_u2s ****************************************************
1186 Convert the given UTF-8 string to UTF-16 into a pre-allocated buffer.
1187 (Invalid UTF-8 will be replaced with the U+fffd replacement character.)
1188 Use utf8_safe_number_of_u2s to determine the number of u2s to allocate.
1190 This function is safe even for invalid UTF-8 strings.
1193 text..........zero-terminated(!) UTF-8 string (may be invalid)
1195 nbytes........strlen(text). (This is needed to completely emulate
1197 buffer........a preallocated array of u2s to receive the decoded
1198 string. Use utf8_safe_number_of_u2s to get the
1199 required number of u2s for allocating this.
1201 *******************************************************************************/
1203 #define UNICODE_REPLACEMENT 0xfffd
1205 void utf8_safe_convert_to_u2s(const char *text, s4 nbytes, u2 *buffer) {
1206 register const unsigned char *t;
1208 register const unsigned char *tlimit;
1216 assert(nbytes >= 0);
1218 t = (const unsigned char *) text;
1219 tlimit = t + nbytes;
1221 /* CAUTION: Keep this code in sync with utf8_safe_number_of_u2s! */
1227 /* highest bit set, non-ASCII character */
1229 if ((byte & 0xe0) == 0xc0) {
1230 /* 2-byte: should be 110..... 10...... */
1232 if (((byte1 = *t++) & 0xc0) == 0x80) {
1233 /* valid 2-byte UTF-8 */
1234 *buffer++ = ((byte & 0x1f) << 6)
1235 | ((byte1 & 0x3f) );
1238 *buffer++ = UNICODE_REPLACEMENT;
1242 else if ((byte & 0xf0) == 0xe0) {
1243 /* 3-byte: should be 1110.... 10...... 10...... */
1245 if (t + 2 > tlimit) {
1246 *buffer++ = UNICODE_REPLACEMENT;
1250 if (((byte1 = *t++) & 0xc0) == 0x80) {
1251 if (((byte2 = *t++) & 0xc0) == 0x80) {
1252 /* valid 3-byte UTF-8 */
1253 *buffer++ = ((byte & 0x0f) << 12)
1254 | ((byte1 & 0x3f) << 6)
1255 | ((byte2 & 0x3f) );
1258 *buffer++ = UNICODE_REPLACEMENT;
1263 *buffer++ = UNICODE_REPLACEMENT;
1267 else if ((byte & 0xf8) == 0xf0) {
1268 /* 4-byte: should be 11110... 10...... 10...... 10...... */
1270 if (t + 3 > tlimit) {
1271 *buffer++ = UNICODE_REPLACEMENT;
1275 if (((byte1 = *t++) & 0xc0) == 0x80) {
1276 if (((byte2 = *t++) & 0xc0) == 0x80) {
1277 if (((byte3 = *t++) & 0xc0) == 0x80) {
1278 /* valid 4-byte UTF-8? */
1279 value = ((byte & 0x07) << 18)
1280 | ((byte1 & 0x3f) << 12)
1281 | ((byte2 & 0x3f) << 6)
1282 | ((byte3 & 0x3f) );
1284 if (value > 0x10FFFF) {
1285 *buffer++ = UNICODE_REPLACEMENT;
1287 else if (value > 0xFFFF) {
1288 /* we need surrogates */
1289 *buffer++ = 0xd800 | ((value >> 10) - 0x40);
1290 *buffer++ = 0xdc00 | (value & 0x03ff);
1293 *buffer++ = value; /* 16bit suffice */
1296 *buffer++ = UNICODE_REPLACEMENT;
1301 *buffer++ = UNICODE_REPLACEMENT;
1306 *buffer++ = UNICODE_REPLACEMENT;
1310 else if ((byte & 0xfc) == 0xf8) {
1311 if (t + 4 > tlimit) {
1312 *buffer++ = UNICODE_REPLACEMENT;
1317 for (; skip && ((*t & 0xc0) == 0x80); --skip)
1319 *buffer++ = UNICODE_REPLACEMENT;
1321 else if ((byte & 0xfe) == 0xfc) {
1322 if (t + 5 > tlimit) {
1323 *buffer++ = UNICODE_REPLACEMENT;
1328 for (; skip && ((*t & 0xc0) == 0x80); --skip)
1330 *buffer++ = UNICODE_REPLACEMENT;
1333 *buffer++ = UNICODE_REPLACEMENT;
1341 /* ASCII character, common case */
1349 /* u2_utflength ****************************************************************
1351 Returns the utf length in bytes of a u2 array.
1353 *******************************************************************************/
1355 u4 u2_utflength(u2 *text, u4 u2_length)
1357 u4 result_len = 0; /* utf length in bytes */
1358 u2 ch; /* current unicode character */
1361 for (len = 0; len < u2_length; len++) {
1362 /* next unicode character */
1365 /* determine bytes required to store unicode character as utf */
1366 if (ch && (ch < 0x80))
1368 else if (ch < 0x800)
1378 /* utf_copy ********************************************************************
1380 Copy the given utf string byte-for-byte to a buffer.
1383 buffer.......the buffer
1384 u............the utf string
1386 *******************************************************************************/
1388 void utf_copy(char *buffer, utf *u)
1390 /* our utf strings are zero-terminated (done by utf_new) */
1391 MCOPY(buffer, u->text, char, u->blength + 1);
1395 /* utf_cat *********************************************************************
1397 Append the given utf string byte-for-byte to a buffer.
1400 buffer.......the buffer
1401 u............the utf string
1403 *******************************************************************************/
1405 void utf_cat(char *buffer, utf *u)
1407 /* our utf strings are zero-terminated (done by utf_new) */
1408 MCOPY(buffer + strlen(buffer), u->text, char, u->blength + 1);
1412 /* utf_copy_classname **********************************************************
1414 Copy the given utf classname byte-for-byte to a buffer.
1415 '/' is replaced by '.'
1418 buffer.......the buffer
1419 u............the utf string
1421 *******************************************************************************/
1423 void utf_copy_classname(char *buffer, utf *u)
1432 endptr = UTF_END(u) + 1; /* utfs are zero-terminared by utf_new */
1434 while (srcptr != endptr) {
1443 /* utf_cat *********************************************************************
1445 Append the given utf classname byte-for-byte to a buffer.
1446 '/' is replaced by '.'
1449 buffer.......the buffer
1450 u............the utf string
1452 *******************************************************************************/
1454 void utf_cat_classname(char *buffer, utf *u)
1456 utf_copy_classname(buffer + strlen(buffer), u);
1459 /* utf_display_printable_ascii *************************************************
1461 Write utf symbol to stdout (for debugging purposes).
1462 Non-printable and non-ASCII characters are printed as '?'.
1464 *******************************************************************************/
1466 void utf_display_printable_ascii(utf *u)
1468 char *endpos; /* points behind utf string */
1469 char *utf_ptr; /* current position in utf text */
1477 endpos = UTF_END(u);
1480 while (utf_ptr < endpos) {
1481 /* read next unicode character */
1483 u2 c = utf_nextu2(&utf_ptr);
1485 if ((c >= 32) && (c <= 127))
1495 /* utf_display_printable_ascii_classname ***************************************
1497 Write utf symbol to stdout with `/' converted to `.' (for debugging
1499 Non-printable and non-ASCII characters are printed as '?'.
1501 *******************************************************************************/
1503 void utf_display_printable_ascii_classname(utf *u)
1505 char *endpos; /* points behind utf string */
1506 char *utf_ptr; /* current position in utf text */
1514 endpos = UTF_END(u);
1517 while (utf_ptr < endpos) {
1518 /* read next unicode character */
1520 u2 c = utf_nextu2(&utf_ptr);
1525 if ((c >= 32) && (c <= 127))
1535 /* utf_sprint_convert_to_latin1 ************************************************
1537 Write utf symbol into c-string (for debugging purposes).
1538 Characters are converted to 8-bit Latin-1, non-Latin-1 characters yield
1541 *******************************************************************************/
1543 void utf_sprint_convert_to_latin1(char *buffer, utf *u)
1545 char *endpos; /* points behind utf string */
1546 char *utf_ptr; /* current position in utf text */
1547 u2 pos = 0; /* position in c-string */
1550 strcpy(buffer, "NULL");
1554 endpos = UTF_END(u);
1557 while (utf_ptr < endpos)
1558 /* copy next unicode character */
1559 buffer[pos++] = utf_nextu2(&utf_ptr);
1561 /* terminate string */
1566 /* utf_sprint_convert_to_latin1_classname **************************************
1568 Write utf symbol into c-string with `/' converted to `.' (for debugging
1570 Characters are converted to 8-bit Latin-1, non-Latin-1 characters yield
1573 *******************************************************************************/
1575 void utf_sprint_convert_to_latin1_classname(char *buffer, utf *u)
1577 char *endpos; /* points behind utf string */
1578 char *utf_ptr; /* current position in utf text */
1579 u2 pos = 0; /* position in c-string */
1582 strcpy(buffer, "NULL");
1586 endpos = UTF_END(u);
1589 while (utf_ptr < endpos) {
1590 /* copy next unicode character */
1591 u2 c = utf_nextu2(&utf_ptr);
1592 if (c == '/') c = '.';
1596 /* terminate string */
1601 /* utf_strcat_convert_to_latin1 ************************************************
1603 Like libc strcat, but uses an utf8 string.
1604 Characters are converted to 8-bit Latin-1, non-Latin-1 characters yield
1607 *******************************************************************************/
1609 void utf_strcat_convert_to_latin1(char *buffer, utf *u)
1611 utf_sprint_convert_to_latin1(buffer + strlen(buffer), u);
1615 /* utf_strcat_convert_to_latin1_classname **************************************
1617 Like libc strcat, but uses an utf8 string.
1618 Characters are converted to 8-bit Latin-1, non-Latin-1 characters yield
1621 *******************************************************************************/
1623 void utf_strcat_convert_to_latin1_classname(char *buffer, utf *u)
1625 utf_sprint_convert_to_latin1_classname(buffer + strlen(buffer), u);
1629 /* utf_fprint_printable_ascii **************************************************
1631 Write utf symbol into file.
1632 Non-printable and non-ASCII characters are printed as '?'.
1634 *******************************************************************************/
1636 void utf_fprint_printable_ascii(FILE *file, utf *u)
1638 char *endpos; /* points behind utf string */
1639 char *utf_ptr; /* current position in utf text */
1644 endpos = UTF_END(u);
1647 while (utf_ptr < endpos) {
1648 /* read next unicode character */
1649 u2 c = utf_nextu2(&utf_ptr);
1651 if (c >= 32 && c <= 127) fprintf(file, "%c", c);
1652 else fprintf(file, "?");
1657 /* utf_fprint_printable_ascii_classname ****************************************
1659 Write utf symbol into file with `/' converted to `.'.
1660 Non-printable and non-ASCII characters are printed as '?'.
1662 *******************************************************************************/
1664 void utf_fprint_printable_ascii_classname(FILE *file, utf *u)
1666 char *endpos; /* points behind utf string */
1667 char *utf_ptr; /* current position in utf text */
1672 endpos = UTF_END(u);
1675 while (utf_ptr < endpos) {
1676 /* read next unicode character */
1677 u2 c = utf_nextu2(&utf_ptr);
1678 if (c == '/') c = '.';
1680 if (c >= 32 && c <= 127) fprintf(file, "%c", c);
1681 else fprintf(file, "?");
1686 /* is_valid_utf ****************************************************************
1688 Return true if the given string is a valid UTF-8 string.
1690 utf_ptr...points to first character
1691 end_pos...points after last character
1693 *******************************************************************************/
1695 /* static unsigned long min_codepoint[6] = {0,1L<<7,1L<<11,1L<<16,1L<<21,1L<<26}; */
1697 bool is_valid_utf(char *utf_ptr, char *end_pos)
1704 if (end_pos < utf_ptr) return false;
1705 bytes = end_pos - utf_ptr;
1709 if (!c) return false; /* 0x00 is not allowed */
1710 if ((c & 0x80) == 0) continue; /* ASCII */
1712 if ((c & 0xe0) == 0xc0) len = 1; /* 110x xxxx */
1713 else if ((c & 0xf0) == 0xe0) len = 2; /* 1110 xxxx */
1714 else if ((c & 0xf8) == 0xf0) len = 3; /* 1111 0xxx */
1715 else if ((c & 0xfc) == 0xf8) len = 4; /* 1111 10xx */
1716 else if ((c & 0xfe) == 0xfc) len = 5; /* 1111 110x */
1717 else return false; /* invalid leading byte */
1719 if (len > 2) return false; /* Java limitation */
1721 v = (unsigned long)c & (0x3f >> len);
1723 if ((bytes -= len) < 0) return false; /* missing bytes */
1725 for (i = len; i--; ) {
1727 if ((c & 0xc0) != 0x80) /* 10xx xxxx */
1729 v = (v << 6) | (c & 0x3f);
1733 if (len != 1) return false; /* Java special */
1736 /* Sun Java seems to allow overlong UTF-8 encodings */
1738 /* if (v < min_codepoint[len]) */
1739 /* XXX throw exception? */
1742 /* surrogates in UTF-8 seem to be allowed in Java classfiles */
1743 /* if (v >= 0xd800 && v <= 0xdfff) return false; */ /* surrogates */
1745 /* even these seem to be allowed */
1746 /* if (v == 0xfffe || v == 0xffff) return false; */ /* invalid codepoints */
1753 /* is_valid_name ***************************************************************
1755 Return true if the given string may be used as a class/field/method
1756 name. (Currently this only disallows empty strings and control
1759 NOTE: The string is assumed to have passed is_valid_utf!
1761 utf_ptr...points to first character
1762 end_pos...points after last character
1764 *******************************************************************************/
1766 bool is_valid_name(char *utf_ptr, char *end_pos)
1768 if (end_pos <= utf_ptr) return false; /* disallow empty names */
1770 while (utf_ptr < end_pos) {
1771 unsigned char c = *utf_ptr++;
1773 if (c < 0x20) return false; /* disallow control characters */
1774 if (c == 0xc0 && (unsigned char) *utf_ptr == 0x80) /* disallow zero */
1781 bool is_valid_name_utf(utf *u)
1783 return is_valid_name(u->text, UTF_END(u));
1787 /* utf_show ********************************************************************
1789 Writes the utf symbols in the utfhash to stdout and displays the
1790 number of external hash chains grouped according to the chainlength
1791 (for debugging purposes).
1793 *******************************************************************************/
1795 #if !defined(NDEBUG)
1799 #define CHAIN_LIMIT 20 /* limit for seperated enumeration */
1801 u4 chain_count[CHAIN_LIMIT]; /* numbers of chains */
1802 u4 max_chainlength = 0; /* maximum length of the chains */
1803 u4 sum_chainlength = 0; /* sum of the chainlengths */
1804 u4 beyond_limit = 0; /* number of utf-symbols in chains with length>=CHAIN_LIMIT-1 */
1807 printf("UTF-HASH:\n");
1809 /* show element of utf-hashtable */
1811 for (i = 0; i < hashtable_utf->size; i++) {
1812 utf *u = hashtable_utf->ptr[i];
1815 printf("SLOT %d: ", (int) i);
1819 utf_display_printable_ascii(u);
1827 printf("UTF-HASH: %d slots for %d entries\n",
1828 (int) hashtable_utf->size, (int) hashtable_utf->entries );
1830 if (hashtable_utf->entries == 0)
1833 printf("chains:\n chainlength number of chains %% of utfstrings\n");
1835 for (i=0;i<CHAIN_LIMIT;i++)
1838 /* count numbers of hashchains according to their length */
1839 for (i=0; i<hashtable_utf->size; i++) {
1841 utf *u = (utf*) hashtable_utf->ptr[i];
1842 u4 chain_length = 0;
1844 /* determine chainlength */
1850 /* update sum of all chainlengths */
1851 sum_chainlength+=chain_length;
1853 /* determine the maximum length of the chains */
1854 if (chain_length>max_chainlength)
1855 max_chainlength = chain_length;
1857 /* update number of utf-symbols in chains with length>=CHAIN_LIMIT-1 */
1858 if (chain_length>=CHAIN_LIMIT) {
1859 beyond_limit+=chain_length;
1860 chain_length=CHAIN_LIMIT-1;
1863 /* update number of hashchains of current length */
1864 chain_count[chain_length]++;
1867 /* display results */
1868 for (i=1;i<CHAIN_LIMIT-1;i++)
1869 printf(" %2d %17d %18.2f%%\n",i,chain_count[i],(((float) chain_count[i]*i*100)/hashtable_utf->entries));
1871 printf(" >=%2d %17d %18.2f%%\n",CHAIN_LIMIT-1,chain_count[CHAIN_LIMIT-1],((float) beyond_limit*100)/hashtable_utf->entries);
1874 printf("max. chainlength:%5d\n",max_chainlength);
1876 /* avg. chainlength = sum of chainlengths / number of chains */
1877 printf("avg. chainlength:%5.2f\n",(float) sum_chainlength / (hashtable_utf->size-chain_count[0]));
1879 #endif /* !defined(NDEBUG) */
1883 * These are local overrides for various environment variables in Emacs.
1884 * Please do not remove this and leave it at the end of the file, where
1885 * Emacs will automagically detect them.
1886 * ---------------------------------------------------------------------
1889 * indent-tabs-mode: t
1893 * vim:noexpandtab:sw=4:ts=4: