1 /* src/vm/utf.c - utf functions
3 Copyright (C) 1996-2005 R. Grafl, A. Krall, C. Kruegel, C. Oates,
4 R. Obermaisser, M. Platter, M. Probst, S. Ring, E. Steiner,
5 C. Thalinger, D. Thuernbeck, P. Tomsich, C. Ullrich, J. Wenninger,
6 Institut f. Computersprachen - TU Wien
8 This file is part of CACAO.
10 This program is free software; you can redistribute it and/or
11 modify it under the terms of the GNU General Public License as
12 published by the Free Software Foundation; either version 2, or (at
13 your option) any later version.
15 This program is distributed in the hope that it will be useful, but
16 WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
25 Contact: cacao@complang.tuwien.ac.at
27 Authors: Reinhard Grafl
33 $Id: utf8.c 2061 2005-03-23 11:10:33Z twisti $
40 #include "mm/memory.h"
41 #include "vm/exceptions.h"
42 #include "vm/options.h"
43 #include "vm/statistics.h"
44 #include "vm/tables.h"
48 hashtable utf_hash; /* hashtable for utf8-symbols */
51 /* utf-symbols for pointer comparison of frequently used strings **************/
53 utf *utf_java_lang_Object; /* java/lang/Object */
55 utf *utf_java_lang_Class;
56 utf *utf_java_lang_ClassLoader;
57 utf *utf_java_lang_Cloneable;
58 utf *utf_java_lang_SecurityManager;
59 utf *utf_java_lang_String;
60 utf *utf_java_lang_System;
61 utf *utf_java_io_Serializable;
63 utf *utf_java_lang_Throwable;
64 utf *utf_java_lang_VMThrowable;
65 utf *utf_java_lang_Exception;
66 utf *utf_java_lang_Error;
67 utf *utf_java_lang_OutOfMemoryError;
69 utf* utf_java_lang_Void;
70 utf* utf_java_lang_Boolean;
71 utf* utf_java_lang_Byte;
72 utf* utf_java_lang_Character;
73 utf* utf_java_lang_Short;
74 utf* utf_java_lang_Integer;
75 utf* utf_java_lang_Long;
76 utf* utf_java_lang_Float;
77 utf* utf_java_lang_Double;
79 utf *utf_java_util_Vector;
81 utf *utf_InnerClasses; /* InnerClasses */
82 utf *utf_ConstantValue; /* ConstantValue */
83 utf *utf_Code; /* Code */
84 utf *utf_Exceptions; /* Exceptions */
85 utf *utf_LineNumberTable; /* LineNumberTable */
86 utf *utf_SourceFile; /* SourceFile */
88 utf *utf_init; /* <init> */
89 utf *utf_clinit; /* <clinit> */
90 utf *utf_finalize; /* finalize */
92 utf *utf_printStackTrace;
93 utf *utf_fillInStackTrace;
96 utf *utf_void__void; /* ()V */
97 utf *utf_boolean__void; /* (Z)V */
98 utf *utf_byte__void; /* (B)V */
99 utf *utf_char__void; /* (C)V */
100 utf *utf_short__void; /* (S)V */
101 utf *utf_int__void; /* (I)V */
102 utf *utf_long__void; /* (J)V */
103 utf *utf_float__void; /* (F)V */
104 utf *utf_double__void; /* (D)V */
105 utf *utf_void__java_lang_Throwable; /* ()Ljava/lang/Throwable; */
106 utf *utf_java_lang_String__void; /* (Ljava/lang/String;)V */
107 utf *utf_java_lang_String__java_lang_Class;
108 utf *utf_java_lang_Throwable__void; /* (Ljava/lang/Throwable;)V */
110 utf *array_packagename;
113 /* utf_init ********************************************************************
115 Initializes the utf8 subsystem.
117 *******************************************************************************/
121 /* create utf-symbols for pointer comparison of frequently used strings */
123 utf_java_lang_Object = utf_new_char("java/lang/Object");
125 utf_java_lang_Class = utf_new_char("java/lang/Class");
126 utf_java_lang_ClassLoader = utf_new_char("java/lang/ClassLoader");
127 utf_java_lang_Cloneable = utf_new_char("java/lang/Cloneable");
128 utf_java_lang_SecurityManager = utf_new_char("java/lang/SecurityManager");
129 utf_java_lang_String = utf_new_char("java/lang/String");
130 utf_java_lang_System = utf_new_char("java/lang/System");
131 utf_java_io_Serializable = utf_new_char("java/io/Serializable");
133 utf_java_lang_Throwable = utf_new_char("java/lang/Throwable");
134 utf_java_lang_VMThrowable = utf_new_char("java/lang/VMThrowable");
135 utf_java_lang_Exception = utf_new_char("java/lang/Exception");
136 utf_java_lang_Error = utf_new_char("java/lang/Error");
137 utf_java_lang_OutOfMemoryError = utf_new_char("java/lang/OutOfMemoryError");
139 utf_java_lang_Void = utf_new_char("java/lang/Void");
140 utf_java_lang_Boolean = utf_new_char("java/lang/Boolean");
141 utf_java_lang_Byte = utf_new_char("java/lang/Byte");
142 utf_java_lang_Character = utf_new_char("java/lang/Character");
143 utf_java_lang_Short = utf_new_char("java/lang/Short");
144 utf_java_lang_Integer = utf_new_char("java/lang/Integer");
145 utf_java_lang_Long = utf_new_char("java/lang/Long");
146 utf_java_lang_Float = utf_new_char("java/lang/Float");
147 utf_java_lang_Double = utf_new_char("java/lang/Double");
149 utf_java_util_Vector = utf_new_char("java/util/Vector");
151 utf_InnerClasses = utf_new_char("InnerClasses");
152 utf_ConstantValue = utf_new_char("ConstantValue");
153 utf_Code = utf_new_char("Code");
154 utf_Exceptions = utf_new_char("Exceptions");
155 utf_LineNumberTable = utf_new_char("LineNumberTable");
156 utf_SourceFile = utf_new_char("SourceFile");
158 utf_init = utf_new_char("<init>");
159 utf_clinit = utf_new_char("<clinit>");
160 utf_finalize = utf_new_char("finalize");
162 utf_printStackTrace = utf_new_char("printStackTrace");
163 utf_fillInStackTrace = utf_new_char("fillInStackTrace");
164 utf_loadClass = utf_new_char("loadClass");
166 utf_void__void = utf_new_char("()V");
167 utf_boolean__void = utf_new_char("(Z)V");
168 utf_byte__void = utf_new_char("(B)V");
169 utf_char__void = utf_new_char("(C)V");
170 utf_short__void = utf_new_char("(S)V");
171 utf_int__void = utf_new_char("(I)V");
172 utf_long__void = utf_new_char("(J)V");
173 utf_float__void = utf_new_char("(F)V");
174 utf_double__void = utf_new_char("(D)V");
175 utf_void__java_lang_Throwable = utf_new_char("()Ljava/lang/Throwable;");
176 utf_java_lang_String__void = utf_new_char("(Ljava/lang/String;)V");
178 utf_java_lang_String__java_lang_Class =
179 utf_new_char("(Ljava/lang/String;)Ljava/lang/Class;");
181 utf_java_lang_Throwable__void = utf_new_char("(Ljava/lang/Throwable;)V");
182 array_packagename = utf_new_char("<the array package>");
186 /* utf_hashkey *****************************************************************
188 The hashkey is computed from the utf-text by using up to 8
189 characters. For utf-symbols longer than 15 characters 3 characters
190 are taken from the beginning and the end, 2 characters are taken
193 *******************************************************************************/
195 #define nbs(val) ((u4) *(++text) << val) /* get next byte, left shift by val */
196 #define fbs(val) ((u4) *( text) << val) /* get first byte, left shift by val */
198 u4 utf_hashkey(const char *text, u4 length)
200 const char *start_pos = text; /* pointer to utf text */
204 case 0: /* empty string */
207 case 1: return fbs(0);
208 case 2: return fbs(0) ^ nbs(3);
209 case 3: return fbs(0) ^ nbs(3) ^ nbs(5);
210 case 4: return fbs(0) ^ nbs(2) ^ nbs(4) ^ nbs(6);
211 case 5: return fbs(0) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(6);
212 case 6: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(5) ^ nbs(6);
213 case 7: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(5) ^ nbs(6);
214 case 8: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(5) ^ nbs(6) ^ nbs(7);
221 return a ^ nbs(4) ^ nbs(5) ^ nbs(6) ^ nbs(7) ^ nbs(8);
230 return a ^ nbs(6) ^ nbs(7) ^ nbs(8) ^ nbs(9);
239 return a ^ nbs(6) ^ nbs(7) ^ nbs(8) ^ nbs(9) ^ nbs(10);
251 return a ^ nbs(9) ^ nbs(10);
263 return a ^ nbs(9) ^ nbs(10);
274 return a ^ nbs(9) ^ nbs(10) ^ nbs(11);
285 return a ^ nbs(9) ^ nbs(10) ^ nbs(11);
287 default: /* 3 characters from beginning */
293 /* 2 characters from middle */
294 text = start_pos + (length / 2);
299 /* 3 characters from end */
300 text = start_pos + length - 4;
305 return a ^ nbs(10) ^ nbs(11);
310 /* utf_hashkey *****************************************************************
312 Compute the hashkey of a unicode string.
314 *******************************************************************************/
316 u4 unicode_hashkey(u2 *text, u2 len)
318 return utf_hashkey((char *) text, len);
322 /* utf_new *********************************************************************
324 Creates a new utf-symbol, the text of the symbol is passed as a
325 u1-array. The function searches the utf-hashtable for a utf-symbol
326 with this text. On success the element returned, otherwise a new
327 hashtable element is created.
329 If the number of entries in the hashtable exceeds twice the size of
330 the hashtable slots a reorganization of the hashtable is done and
331 the utf symbols are copied to a new hashtable with doubled size.
333 *******************************************************************************/
335 utf *utf_new_intern(const char *text, u2 length);
337 utf *utf_new(const char *text, u2 length)
341 #if defined(USE_THREADS) && defined(NATIVE_THREADS)
345 r = utf_new_intern(text, length);
347 #if defined(USE_THREADS) && defined(NATIVE_THREADS)
355 utf *utf_new_intern(const char *text, u2 length)
357 u4 key; /* hashkey computed from utf-text */
358 u4 slot; /* slot in hashtable */
359 utf *u; /* hashtable element */
367 key = utf_hashkey(text, length);
368 slot = key & (utf_hash.size - 1);
369 u = utf_hash.ptr[slot];
371 /* search external hash chain for utf-symbol */
373 if (u->blength == length) {
375 /* compare text of hashtable elements */
376 for (i = 0; i < length; i++)
377 if (text[i] != u->text[i]) goto nomatch;
381 count_utf_new_found++;
384 /* symbol found in hashtable */
388 u = u->hashlink; /* next element in external chain */
393 count_utf_len += sizeof(utf) + length;
396 /* location in hashtable found, create new utf element */
398 u->blength = length; /* length in bytes of utfstring */
399 u->hashlink = utf_hash.ptr[slot]; /* link in external hashchain */
400 u->text = mem_alloc(length + 1);/* allocate memory for utf-text */
401 memcpy(u->text, text, length); /* copy utf-text */
402 u->text[length] = '\0';
403 utf_hash.ptr[slot] = u; /* insert symbol into table */
405 utf_hash.entries++; /* update number of entries */
407 if (utf_hash.entries > (utf_hash.size * 2)) {
409 /* reorganization of hashtable, average length of
410 the external chains is approx. 2 */
414 hashtable newhash; /* the new hashtable */
416 /* create new hashtable, double the size */
417 init_hashtable(&newhash, utf_hash.size * 2);
418 newhash.entries = utf_hash.entries;
422 count_utf_len += sizeof(utf*) * utf_hash.size;
425 /* transfer elements to new hashtable */
426 for (i = 0; i < utf_hash.size; i++) {
427 u = (utf *) utf_hash.ptr[i];
429 utf *nextu = u->hashlink;
430 u4 slot = utf_hashkey(u->text, u->blength) & (newhash.size - 1);
432 u->hashlink = (utf *) newhash.ptr[slot];
433 newhash.ptr[slot] = u;
435 /* follow link in external hash chain */
440 /* dispose old table */
441 MFREE(utf_hash.ptr, void*, utf_hash.size);
449 /* utf_new_u2 ******************************************************************
451 Make utf symbol from u2 array, if isclassname is true '.' is
454 *******************************************************************************/
456 utf *utf_new_u2(u2 *unicode_pos, u4 unicode_length, bool isclassname)
458 char *buffer; /* memory buffer for unicode characters */
459 char *pos; /* pointer to current position in buffer */
460 u4 left; /* unicode characters left */
461 u4 buflength; /* utf length in bytes of the u2 array */
462 utf *result; /* resulting utf-string */
465 /* determine utf length in bytes and allocate memory */
467 buflength = u2_utflength(unicode_pos, unicode_length);
468 buffer = MNEW(char, buflength);
473 for (i = 0; i++ < unicode_length; unicode_pos++) {
474 /* next unicode character */
477 if ((c != 0) && (c < 0x80)) {
480 if ((int) left < 0) break;
481 /* convert classname */
482 if (isclassname && c == '.')
487 } else if (c < 0x800) {
489 unsigned char high = c >> 6;
490 unsigned char low = c & 0x3F;
492 if ((int) left < 0) break;
493 *pos++ = high | 0xC0;
499 char mid = (c >> 6) & 0x3F;
502 if ((int) left < 0) break;
503 *pos++ = high | 0xE0;
509 /* insert utf-string into symbol-table */
510 result = utf_new(buffer,buflength);
512 MFREE(buffer, char, buflength);
518 /* utf_new_char ****************************************************************
520 Creates a new utf symbol, the text for this symbol is passed as a
521 c-string ( = char* ).
523 *******************************************************************************/
525 utf *utf_new_char(const char *text)
527 return utf_new(text, strlen(text));
531 /* utf_new_char_classname ******************************************************
533 Creates a new utf symbol, the text for this symbol is passed as a
534 c-string ( = char* ) "." characters are going to be replaced by
535 "/". Since the above function is used often, this is a separte
536 function, instead of an if.
538 *******************************************************************************/
540 utf *utf_new_char_classname(const char *text)
542 if (strchr(text, '.')) {
543 char *txt = strdup(text);
544 char *end = txt + strlen(txt);
548 for (c = txt; c < end; c++)
549 if (*c == '.') *c = '/';
551 tmpRes = utf_new(txt, strlen(txt));
557 return utf_new(text, strlen(text));
561 /* utf_nextu2 ******************************************************************
563 Read the next unicode character from the utf string and increment
564 the utf-string pointer accordingly.
566 *******************************************************************************/
568 u2 utf_nextu2(char **utf_ptr)
570 /* uncompressed unicode character */
572 /* current position in utf text */
573 unsigned char *utf = (unsigned char *) (*utf_ptr);
574 /* bytes representing the unicode character */
575 unsigned char ch1, ch2, ch3;
576 /* number of bytes used to represent the unicode character */
579 switch ((ch1 = utf[0]) >> 4) {
580 default: /* 1 byte */
584 case 0xD: /* 2 bytes */
585 if (((ch2 = utf[1]) & 0xC0) == 0x80) {
586 unsigned char high = ch1 & 0x1F;
587 unsigned char low = ch2 & 0x3F;
588 unicode_char = (high << 6) + low;
593 case 0xE: /* 2 or 3 bytes */
594 if (((ch2 = utf[1]) & 0xC0) == 0x80) {
595 if (((ch3 = utf[2]) & 0xC0) == 0x80) {
596 unsigned char low = ch3 & 0x3f;
597 unsigned char mid = ch2 & 0x3f;
598 unsigned char high = ch1 & 0x0f;
599 unicode_char = (((high << 6) + mid) << 6) + low;
607 /* update position in utf-text */
608 *utf_ptr = (char *) (utf + len);
614 /* utf_strlen ******************************************************************
616 Determine number of unicode characters in the utf string.
618 *******************************************************************************/
620 u4 utf_strlen(utf *u)
622 char *endpos; /* points behind utf string */
623 char *utf_ptr; /* current position in utf text */
624 u4 len = 0; /* number of unicode characters */
627 *exceptionptr = new_nullpointerexception();
634 while (utf_ptr < endpos) {
636 /* next unicode character */
637 utf_nextu2(&utf_ptr);
640 if (utf_ptr != endpos)
641 /* string ended abruptly */
642 throw_cacao_exception_exit(string_java_lang_InternalError,
643 "Illegal utf8 string");
649 /* u2_utflength ****************************************************************
651 Returns the utf length in bytes of a u2 array.
653 *******************************************************************************/
655 u4 u2_utflength(u2 *text, u4 u2_length)
657 u4 result_len = 0; /* utf length in bytes */
658 u2 ch; /* current unicode character */
661 for (len = 0; len < u2_length; len++) {
662 /* next unicode character */
665 /* determine bytes required to store unicode character as utf */
666 if (ch && (ch < 0x80))
678 /* utf_display *****************************************************************
680 Write utf symbol to stdout (for debugging purposes).
682 *******************************************************************************/
684 void utf_display(utf *u)
686 char *endpos; /* points behind utf string */
687 char *utf_ptr; /* current position in utf text */
698 while (utf_ptr < endpos) {
699 /* read next unicode character */
700 u2 c = utf_nextu2(&utf_ptr);
701 if (c >= 32 && c <= 127) printf("%c", c);
709 /* utf_display_classname *******************************************************
711 Write utf symbol to stdout with `/' converted to `.' (for debugging
714 *******************************************************************************/
716 void utf_display_classname(utf *u)
718 char *endpos; /* points behind utf string */
719 char *utf_ptr; /* current position in utf text */
730 while (utf_ptr < endpos) {
731 /* read next unicode character */
732 u2 c = utf_nextu2(&utf_ptr);
733 if (c == '/') c = '.';
734 if (c >= 32 && c <= 127) printf("%c", c);
742 /* utf_sprint ******************************************************************
744 Write utf symbol into c-string (for debugging purposes).
746 *******************************************************************************/
748 void utf_sprint(char *buffer, utf *u)
750 char *endpos; /* points behind utf string */
751 char *utf_ptr; /* current position in utf text */
752 u2 pos = 0; /* position in c-string */
755 memcpy(buffer, "NULL", 5); /* 4 chars + terminating \0 */
762 while (utf_ptr < endpos)
763 /* copy next unicode character */
764 buffer[pos++] = utf_nextu2(&utf_ptr);
766 /* terminate string */
771 /* utf_sprint_classname ********************************************************
773 Write utf symbol into c-string with `/' converted to `.' (for debugging
776 *******************************************************************************/
778 void utf_sprint_classname(char *buffer, utf *u)
780 char *endpos; /* points behind utf string */
781 char *utf_ptr; /* current position in utf text */
782 u2 pos = 0; /* position in c-string */
785 memcpy(buffer, "NULL", 5); /* 4 chars + terminating \0 */
792 while (utf_ptr < endpos) {
793 /* copy next unicode character */
794 u2 c = utf_nextu2(&utf_ptr);
795 if (c == '/') c = '.';
799 /* terminate string */
804 /* utf_fprint ******************************************************************
806 Write utf symbol into file.
808 *******************************************************************************/
810 void utf_fprint(FILE *file, utf *u)
812 char *endpos; /* points behind utf string */
813 char *utf_ptr; /* current position in utf text */
821 while (utf_ptr < endpos) {
822 /* read next unicode character */
823 u2 c = utf_nextu2(&utf_ptr);
825 if (c >= 32 && c <= 127) fprintf(file, "%c", c);
826 else fprintf(file, "?");
831 /* utf_fprint_classname ********************************************************
833 Write utf symbol into file with `/' converted to `.'.
835 *******************************************************************************/
837 void utf_fprint_classname(FILE *file, utf *u)
839 char *endpos; /* points behind utf string */
840 char *utf_ptr; /* current position in utf text */
848 while (utf_ptr < endpos) {
849 /* read next unicode character */
850 u2 c = utf_nextu2(&utf_ptr);
851 if (c == '/') c = '.';
853 if (c >= 32 && c <= 127) fprintf(file, "%c", c);
854 else fprintf(file, "?");
859 /* is_valid_utf ****************************************************************
861 Return true if the given string is a valid UTF-8 string.
863 utf_ptr...points to first character
864 end_pos...points after last character
866 *******************************************************************************/
868 static unsigned long min_codepoint[6] = {0,1L<<7,1L<<11,1L<<16,1L<<21,1L<<26};
870 bool is_valid_utf(char *utf_ptr, char *end_pos)
877 if (end_pos < utf_ptr) return false;
878 bytes = end_pos - utf_ptr;
882 if (!c) return false; /* 0x00 is not allowed */
883 if ((c & 0x80) == 0) continue; /* ASCII */
885 if ((c & 0xe0) == 0xc0) len = 1; /* 110x xxxx */
886 else if ((c & 0xf0) == 0xe0) len = 2; /* 1110 xxxx */
887 else if ((c & 0xf8) == 0xf0) len = 3; /* 1111 0xxx */
888 else if ((c & 0xfc) == 0xf8) len = 4; /* 1111 10xx */
889 else if ((c & 0xfe) == 0xfc) len = 5; /* 1111 110x */
890 else return false; /* invalid leading byte */
892 if (len > 2) return false; /* Java limitation */
894 v = (unsigned long)c & (0x3f >> len);
896 if ((bytes -= len) < 0) return false; /* missing bytes */
898 for (i = len; i--; ) {
900 if ((c & 0xc0) != 0x80) /* 10xx xxxx */
902 v = (v << 6) | (c & 0x3f);
906 if (len != 1) return false; /* Java special */
909 /* Sun Java seems to allow overlong UTF-8 encodings */
911 if (v < min_codepoint[len]) { /* overlong UTF-8 */
913 fprintf(stderr,"WARNING: Overlong UTF-8 sequence found.\n");
914 /* XXX change this to panic? */
918 /* surrogates in UTF-8 seem to be allowed in Java classfiles */
919 /* if (v >= 0xd800 && v <= 0xdfff) return false; */ /* surrogates */
921 /* even these seem to be allowed */
922 /* if (v == 0xfffe || v == 0xffff) return false; */ /* invalid codepoints */
929 /* is_valid_name ***************************************************************
931 Return true if the given string may be used as a class/field/method
932 name. (Currently this only disallows empty strings and control
935 NOTE: The string is assumed to have passed is_valid_utf!
937 utf_ptr...points to first character
938 end_pos...points after last character
940 *******************************************************************************/
942 bool is_valid_name(char *utf_ptr, char *end_pos)
944 if (end_pos <= utf_ptr) return false; /* disallow empty names */
946 while (utf_ptr < end_pos) {
947 unsigned char c = *utf_ptr++;
949 if (c < 0x20) return false; /* disallow control characters */
950 if (c == 0xc0 && (unsigned char) *utf_ptr == 0x80) /* disallow zero */
957 bool is_valid_name_utf(utf *u)
959 return is_valid_name(u->text,utf_end(u));
963 /* utf_show ********************************************************************
965 Writes the utf symbols in the utfhash to stdout and displays the
966 number of external hash chains grouped according to the chainlength
967 (for debugging purposes).
969 *******************************************************************************/
974 #define CHAIN_LIMIT 20 /* limit for seperated enumeration */
976 u4 chain_count[CHAIN_LIMIT]; /* numbers of chains */
977 u4 max_chainlength = 0; /* maximum length of the chains */
978 u4 sum_chainlength = 0; /* sum of the chainlengths */
979 u4 beyond_limit = 0; /* number of utf-symbols in chains with length>=CHAIN_LIMIT-1 */
982 printf ("UTF-HASH:\n");
984 /* show element of utf-hashtable */
985 for (i=0; i<utf_hash.size; i++) {
986 utf *u = utf_hash.ptr[i];
988 printf ("SLOT %d: ", (int) i);
1000 printf ("UTF-HASH: %d slots for %d entries\n",
1001 (int) utf_hash.size, (int) utf_hash.entries );
1004 if (utf_hash.entries == 0)
1007 printf("chains:\n chainlength number of chains %% of utfstrings\n");
1009 for (i=0;i<CHAIN_LIMIT;i++)
1012 /* count numbers of hashchains according to their length */
1013 for (i=0; i<utf_hash.size; i++) {
1015 utf *u = (utf*) utf_hash.ptr[i];
1016 u4 chain_length = 0;
1018 /* determine chainlength */
1024 /* update sum of all chainlengths */
1025 sum_chainlength+=chain_length;
1027 /* determine the maximum length of the chains */
1028 if (chain_length>max_chainlength)
1029 max_chainlength = chain_length;
1031 /* update number of utf-symbols in chains with length>=CHAIN_LIMIT-1 */
1032 if (chain_length>=CHAIN_LIMIT) {
1033 beyond_limit+=chain_length;
1034 chain_length=CHAIN_LIMIT-1;
1037 /* update number of hashchains of current length */
1038 chain_count[chain_length]++;
1041 /* display results */
1042 for (i=1;i<CHAIN_LIMIT-1;i++)
1043 printf(" %2d %17d %18.2f%%\n",i,chain_count[i],(((float) chain_count[i]*i*100)/utf_hash.entries));
1045 printf(" >=%2d %17d %18.2f%%\n",CHAIN_LIMIT-1,chain_count[CHAIN_LIMIT-1],((float) beyond_limit*100)/utf_hash.entries);
1048 printf("max. chainlength:%5d\n",max_chainlength);
1050 /* avg. chainlength = sum of chainlengths / number of chains */
1051 printf("avg. chainlength:%5.2f\n",(float) sum_chainlength / (utf_hash.size-chain_count[0]));
1056 * These are local overrides for various environment variables in Emacs.
1057 * Please do not remove this and leave it at the end of the file, where
1058 * Emacs will automagically detect them.
1059 * ---------------------------------------------------------------------
1062 * indent-tabs-mode: t