src/vm/utf8.c

   1 /* src/vm/utf.c - utf functions
   2
   3    Copyright (C) 1996-2005, 2006 R. Grafl, A. Krall, C. Kruegel,
   4    C. Oates, R. Obermaisser, M. Platter, M. Probst, S. Ring,
   5    E. Steiner, C. Thalinger, D. Thuernbeck, P. Tomsich, C. Ullrich,
   6    J. Wenninger, Institut f. Computersprachen - TU Wien
   7
   8    This file is part of CACAO.
   9
  10    This program is free software; you can redistribute it and/or
  11    modify it under the terms of the GNU General Public License as
  12    published by the Free Software Foundation; either version 2, or (at
  13    your option) any later version.
  14
  15    This program is distributed in the hope that it will be useful, but
  16    WITHOUT ANY WARRANTY; without even the implied warranty of
  17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18    General Public License for more details.
  19
  20    You should have received a copy of the GNU General Public License
  21    along with this program; if not, write to the Free Software
  22    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  23    02110-1301, USA.
  24
  25    Contact: cacao@cacaojvm.org
  26
  27    Authors: Reinhard Grafl
  28
  29    Changes: Mark Probst
  30             Andreas Krall
  31             Christian Thalinger
  32                         Edwin Steiner
  33
  34    $Id: utf8.c 4879 2006-05-05 17:34:49Z edwin $
  35
  36 */
  37
  38
  39 #include <string.h>
  40 #include <assert.h>
  41
  42 #include "config.h"
  43 #include "vm/types.h"
  44
  45 #include "mm/memory.h"
  46
  47 #if defined(USE_THREADS)
  48 # if defined(NATIVE_THREADS)
  49 #  include "threads/native/threads.h"
  50 # else
  51 #  include "threads/green/threads.h"
  52 # endif
  53 #endif
  54
  55 #include "vm/builtin.h"
  56 #include "vm/exceptions.h"
  57 #include "vm/hashtable.h"
  58 #include "vm/options.h"
  59 #include "vm/statistics.h"
  60 #include "vm/stringlocal.h"
  61 #include "vm/utf8.h"
  62
  63 /* global variables ***********************************************************/
  64
  65 /* hashsize must be power of 2 */
  66
  67 #define HASHTABLE_UTF_SIZE    16384     /* initial size of utf-hash           */
  68
  69 hashtable hashtable_utf;                /* hashtable for utf8-symbols         */
  70
  71 #if defined(USE_THREADS)
  72 static java_objectheader *lock_hashtable_utf;
  73 #endif
  74
  75
  76 /* utf-symbols for pointer comparison of frequently used strings **************/
  77
  78 utf *utf_java_lang_Object;
  79
  80 utf *utf_java_lang_Class;
  81 utf *utf_java_lang_ClassLoader;
  82 utf *utf_java_lang_Cloneable;
  83 utf *utf_java_lang_SecurityManager;
  84 utf *utf_java_lang_String;
  85 utf *utf_java_lang_System;
  86 utf *utf_java_lang_ThreadGroup;
  87 utf *utf_java_io_Serializable;
  88
  89 utf *utf_java_lang_Throwable;
  90 utf *utf_java_lang_VMThrowable;
  91 utf *utf_java_lang_Error;
  92 utf *utf_java_lang_NoClassDefFoundError;
  93 utf *utf_java_lang_LinkageError;
  94 utf *utf_java_lang_NoSuchMethodError;
  95 utf *utf_java_lang_OutOfMemoryError;
  96
  97 utf *utf_java_lang_Exception;
  98 utf *utf_java_lang_ClassNotFoundException;
  99 utf *utf_java_lang_IllegalArgumentException;
 100 utf *utf_java_lang_IllegalMonitorStateException;
 101
 102 utf *utf_java_lang_NullPointerException;
 103
 104 utf* utf_java_lang_Void;
 105 utf* utf_java_lang_Boolean;
 106 utf* utf_java_lang_Byte;
 107 utf* utf_java_lang_Character;
 108 utf* utf_java_lang_Short;
 109 utf* utf_java_lang_Integer;
 110 utf* utf_java_lang_Long;
 111 utf* utf_java_lang_Float;
 112 utf* utf_java_lang_Double;
 113
 114 utf *utf_java_lang_StackTraceElement;
 115 utf *utf_java_lang_reflect_Constructor;
 116 utf *utf_java_lang_reflect_Field;
 117 utf *utf_java_lang_reflect_Method;
 118 utf *utf_java_util_Vector;
 119
 120 utf *utf_InnerClasses;                  /* InnerClasses                       */
 121 utf *utf_ConstantValue;                 /* ConstantValue                      */
 122 utf *utf_Code;                          /* Code                               */
 123 utf *utf_Exceptions;                    /* Exceptions                         */
 124 utf *utf_LineNumberTable;               /* LineNumberTable                    */
 125 utf *utf_SourceFile;                    /* SourceFile                         */
 126
 127 utf *utf_init;                          /* <init>                             */
 128 utf *utf_clinit;                        /* <clinit>                           */
 129 utf *utf_clone;                         /* clone                              */
 130 utf *utf_finalize;                      /* finalize                           */
 131 utf *utf_run;                           /* run                                */
 132
 133 utf *utf_add;                           /* add                                */
 134 utf *utf_remove;                        /* remove                             */
 135 utf *utf_put;                           /* put                                */
 136 utf *utf_get;                           /* get                                */
 137 utf *utf_value;                         /* value                              */
 138
 139 utf *utf_fillInStackTrace;
 140 utf *utf_getSystemClassLoader;
 141 utf *utf_loadClass;
 142 utf *utf_printStackTrace;
 143
 144 utf *utf_Z;                             /* Z                                  */
 145 utf *utf_B;                             /* B                                  */
 146 utf *utf_C;                             /* C                                  */
 147 utf *utf_S;                             /* S                                  */
 148 utf *utf_I;                             /* I                                  */
 149 utf *utf_J;                             /* J                                  */
 150 utf *utf_F;                             /* F                                  */
 151 utf *utf_D;                             /* D                                  */
 152
 153 utf *utf_void__void;                    /* ()V                                */
 154 utf *utf_boolean__void;                 /* (Z)V                               */
 155 utf *utf_byte__void;                    /* (B)V                               */
 156 utf *utf_char__void;                    /* (C)V                               */
 157 utf *utf_short__void;                   /* (S)V                               */
 158 utf *utf_int__void;                     /* (I)V                               */
 159 utf *utf_long__void;                    /* (J)V                               */
 160 utf *utf_float__void;                   /* (F)V                               */
 161 utf *utf_double__void;                  /* (D)V                               */
 162
 163 utf *utf_void__java_lang_ClassLoader;   /* ()Ljava/lang/ClassLoader;          */
 164 utf *utf_void__java_lang_Object;        /* ()Ljava/lang/Object;               */
 165 utf *utf_void__java_lang_Throwable;     /* ()Ljava/lang/Throwable;            */
 166 utf *utf_java_lang_Object__java_lang_Object;
 167 utf *utf_java_lang_String__void;        /* (Ljava/lang/String;)V              */
 168 utf *utf_java_lang_String__java_lang_Class;
 169 utf *utf_java_lang_Throwable__void;     /* (Ljava/lang/Throwable;)V           */
 170
 171 utf *utf_not_named_yet;                 /* special name for unnamed classes   */
 172
 173 utf *array_packagename;
 174
 175
 176 /* utf_init ********************************************************************
 177
 178    Initializes the utf8 subsystem.
 179
 180 *******************************************************************************/
 181
 182 bool utf8_init(void)
 183 {
 184         /* create utf8 hashtable */
 185
 186         hashtable_create(&hashtable_utf, HASHTABLE_UTF_SIZE);
 187
 188 #if defined(ENABLE_STATISTICS)
 189         if (opt_stat)
 190                 count_utf_len += sizeof(utf*) * hashtable_utf.size;
 191 #endif
 192
 193 #if defined(USE_THREADS)
 194         /* create utf hashtable lock object */
 195
 196         lock_hashtable_utf = NEW(java_objectheader);
 197
 198 # if defined(NATIVE_THREADS)
 199         initObjectLock(lock_hashtable_utf);
 200 # endif
 201 #endif
 202
 203         /* create utf-symbols for pointer comparison of frequently used strings */
 204
 205         utf_java_lang_Object           = utf_new_char("java/lang/Object");
 206
 207         utf_java_lang_Class            = utf_new_char("java/lang/Class");
 208         utf_java_lang_ClassLoader      = utf_new_char("java/lang/ClassLoader");
 209         utf_java_lang_Cloneable        = utf_new_char("java/lang/Cloneable");
 210         utf_java_lang_SecurityManager  = utf_new_char("java/lang/SecurityManager");
 211         utf_java_lang_String           = utf_new_char("java/lang/String");
 212         utf_java_lang_System           = utf_new_char("java/lang/System");
 213         utf_java_lang_ThreadGroup      = utf_new_char("java/lang/ThreadGroup");
 214         utf_java_io_Serializable       = utf_new_char("java/io/Serializable");
 215
 216         utf_java_lang_Throwable        = utf_new_char(string_java_lang_Throwable);
 217         utf_java_lang_VMThrowable      = utf_new_char(string_java_lang_VMThrowable);
 218         utf_java_lang_Error            = utf_new_char(string_java_lang_Error);
 219
 220         utf_java_lang_NoClassDefFoundError =
 221                 utf_new_char(string_java_lang_NoClassDefFoundError);
 222
 223         utf_java_lang_LinkageError =
 224                 utf_new_char(string_java_lang_LinkageError);
 225
 226         utf_java_lang_NoSuchMethodError =
 227                 utf_new_char(string_java_lang_NoSuchMethodError);
 228
 229         utf_java_lang_OutOfMemoryError =
 230                 utf_new_char(string_java_lang_OutOfMemoryError);
 231
 232         utf_java_lang_Exception        = utf_new_char(string_java_lang_Exception);
 233
 234         utf_java_lang_ClassNotFoundException =
 235                 utf_new_char(string_java_lang_ClassNotFoundException);
 236
 237         utf_java_lang_IllegalArgumentException =
 238                 utf_new_char(string_java_lang_IllegalArgumentException);
 239
 240         utf_java_lang_IllegalMonitorStateException =
 241                 utf_new_char(string_java_lang_IllegalMonitorStateException);
 242
 243         utf_java_lang_NullPointerException =
 244                 utf_new_char(string_java_lang_NullPointerException);
 245
 246         utf_java_lang_Void             = utf_new_char("java/lang/Void");
 247         utf_java_lang_Boolean          = utf_new_char("java/lang/Boolean");
 248         utf_java_lang_Byte             = utf_new_char("java/lang/Byte");
 249         utf_java_lang_Character        = utf_new_char("java/lang/Character");
 250         utf_java_lang_Short            = utf_new_char("java/lang/Short");
 251         utf_java_lang_Integer          = utf_new_char("java/lang/Integer");
 252         utf_java_lang_Long             = utf_new_char("java/lang/Long");
 253         utf_java_lang_Float            = utf_new_char("java/lang/Float");
 254         utf_java_lang_Double           = utf_new_char("java/lang/Double");
 255
 256         utf_java_lang_StackTraceElement =
 257                 utf_new_char("java/lang/StackTraceElement");
 258
 259         utf_java_lang_reflect_Constructor =
 260                 utf_new_char("java/lang/reflect/Constructor");
 261
 262         utf_java_lang_reflect_Field    = utf_new_char("java/lang/reflect/Field");
 263         utf_java_lang_reflect_Method   = utf_new_char("java/lang/reflect/Method");
 264         utf_java_util_Vector           = utf_new_char("java/util/Vector");
 265
 266         utf_InnerClasses               = utf_new_char("InnerClasses");
 267         utf_ConstantValue              = utf_new_char("ConstantValue");
 268         utf_Code                       = utf_new_char("Code");
 269         utf_Exceptions                 = utf_new_char("Exceptions");
 270         utf_LineNumberTable            = utf_new_char("LineNumberTable");
 271         utf_SourceFile                 = utf_new_char("SourceFile");
 272
 273         utf_init                           = utf_new_char("<init>");
 274         utf_clinit                         = utf_new_char("<clinit>");
 275         utf_clone                      = utf_new_char("clone");
 276         utf_finalize                   = utf_new_char("finalize");
 277         utf_run                        = utf_new_char("run");
 278
 279         utf_add                        = utf_new_char("add");
 280         utf_remove                     = utf_new_char("remove");
 281         utf_put                        = utf_new_char("put");
 282         utf_get                        = utf_new_char("get");
 283         utf_value                      = utf_new_char("value");
 284
 285         utf_printStackTrace            = utf_new_char("printStackTrace");
 286         utf_fillInStackTrace           = utf_new_char("fillInStackTrace");
 287         utf_loadClass                  = utf_new_char("loadClass");
 288         utf_getSystemClassLoader       = utf_new_char("getSystemClassLoader");
 289
 290         utf_Z                          = utf_new_char("Z");
 291         utf_B                          = utf_new_char("B");
 292         utf_C                          = utf_new_char("C");
 293         utf_S                          = utf_new_char("S");
 294         utf_I                          = utf_new_char("I");
 295         utf_J                          = utf_new_char("J");
 296         utf_F                          = utf_new_char("F");
 297         utf_D                          = utf_new_char("D");
 298
 299         utf_void__void                 = utf_new_char("()V");
 300         utf_boolean__void              = utf_new_char("(Z)V");
 301         utf_byte__void                 = utf_new_char("(B)V");
 302         utf_char__void                 = utf_new_char("(C)V");
 303         utf_short__void                = utf_new_char("(S)V");
 304         utf_int__void                  = utf_new_char("(I)V");
 305         utf_long__void                 = utf_new_char("(J)V");
 306         utf_float__void                = utf_new_char("(F)V");
 307         utf_double__void               = utf_new_char("(D)V");
 308         utf_void__java_lang_Object     = utf_new_char("()Ljava/lang/Object;");
 309         utf_void__java_lang_Throwable  = utf_new_char("()Ljava/lang/Throwable;");
 310
 311         utf_void__java_lang_ClassLoader =
 312                 utf_new_char("()Ljava/lang/ClassLoader;");
 313
 314         utf_java_lang_Object__java_lang_Object =
 315                 utf_new_char("(Ljava/lang/Object;)Ljava/lang/Object;");
 316
 317         utf_java_lang_String__void     = utf_new_char("(Ljava/lang/String;)V");
 318
 319         utf_java_lang_String__java_lang_Class =
 320                 utf_new_char("(Ljava/lang/String;)Ljava/lang/Class;");
 321
 322         utf_java_lang_Throwable__void  = utf_new_char("(Ljava/lang/Throwable;)V");
 323
 324         utf_not_named_yet              = utf_new_char("\t<not_named_yet>");
 325
 326         array_packagename              = utf_new_char("\t<the array package>");
 327
 328         /* everything's ok */
 329
 330         return true;
 331 }
 332
 333
 334 /* utf_hashkey *****************************************************************
 335
 336    The hashkey is computed from the utf-text by using up to 8
 337    characters.  For utf-symbols longer than 15 characters 3 characters
 338    are taken from the beginning and the end, 2 characters are taken
 339    from the middle.
 340
 341 *******************************************************************************/
 342
 343 #define nbs(val) ((u4) *(++text) << val) /* get next byte, left shift by val  */
 344 #define fbs(val) ((u4) *(  text) << val) /* get first byte, left shift by val */
 345
 346 u4 utf_hashkey(const char *text, u4 length)
 347 {
 348         const char *start_pos = text;       /* pointer to utf text                */
 349         u4 a;
 350
 351         switch (length) {
 352         case 0: /* empty string */
 353                 return 0;
 354
 355         case 1: return fbs(0);
 356         case 2: return fbs(0) ^ nbs(3);
 357         case 3: return fbs(0) ^ nbs(3) ^ nbs(5);
 358         case 4: return fbs(0) ^ nbs(2) ^ nbs(4) ^ nbs(6);
 359         case 5: return fbs(0) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(6);
 360         case 6: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(5) ^ nbs(6);
 361         case 7: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(5) ^ nbs(6);
 362         case 8: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(5) ^ nbs(6) ^ nbs(7);
 363
 364         case 9:
 365                 a = fbs(0);
 366                 a ^= nbs(1);
 367                 a ^= nbs(2);
 368                 text++;
 369                 return a ^ nbs(4) ^ nbs(5) ^ nbs(6) ^ nbs(7) ^ nbs(8);
 370
 371         case 10:
 372                 a = fbs(0);
 373                 text++;
 374                 a ^= nbs(2);
 375                 a ^= nbs(3);
 376                 a ^= nbs(4);
 377                 text++;
 378                 return a ^ nbs(6) ^ nbs(7) ^ nbs(8) ^ nbs(9);
 379
 380         case 11:
 381                 a = fbs(0);
 382                 text++;
 383                 a ^= nbs(2);
 384                 a ^= nbs(3);
 385                 a ^= nbs(4);
 386                 text++;
 387                 return a ^ nbs(6) ^ nbs(7) ^ nbs(8) ^ nbs(9) ^ nbs(10);
 388
 389         case 12:
 390                 a = fbs(0);
 391                 text += 2;
 392                 a ^= nbs(2);
 393                 a ^= nbs(3);
 394                 text++;
 395                 a ^= nbs(5);
 396                 a ^= nbs(6);
 397                 a ^= nbs(7);
 398                 text++;
 399                 return a ^ nbs(9) ^ nbs(10);
 400
 401         case 13:
 402                 a = fbs(0);
 403                 a ^= nbs(1);
 404                 text++;
 405                 a ^= nbs(3);
 406                 a ^= nbs(4);
 407                 text += 2;
 408                 a ^= nbs(7);
 409                 a ^= nbs(8);
 410                 text += 2;
 411                 return a ^ nbs(9) ^ nbs(10);
 412
 413         case 14:
 414                 a = fbs(0);
 415                 text += 2;
 416                 a ^= nbs(3);
 417                 a ^= nbs(4);
 418                 text += 2;
 419                 a ^= nbs(7);
 420                 a ^= nbs(8);
 421                 text += 2;
 422                 return a ^ nbs(9) ^ nbs(10) ^ nbs(11);
 423
 424         case 15:
 425                 a = fbs(0);
 426                 text += 2;
 427                 a ^= nbs(3);
 428                 a ^= nbs(4);
 429                 text += 2;
 430                 a ^= nbs(7);
 431                 a ^= nbs(8);
 432                 text += 2;
 433                 return a ^ nbs(9) ^ nbs(10) ^ nbs(11);
 434
 435         default:  /* 3 characters from beginning */
 436                 a = fbs(0);
 437                 text += 2;
 438                 a ^= nbs(3);
 439                 a ^= nbs(4);
 440
 441                 /* 2 characters from middle */
 442                 text = start_pos + (length / 2);
 443                 a ^= fbs(5);
 444                 text += 2;
 445                 a ^= nbs(6);
 446
 447                 /* 3 characters from end */
 448                 text = start_pos + length - 4;
 449
 450                 a ^= fbs(7);
 451                 text++;
 452
 453                 return a ^ nbs(10) ^ nbs(11);
 454     }
 455 }
 456
 457 /* utf_full_hashkey ************************************************************
 458
 459    This function computes a hash value using all bytes in the string.
 460
 461    The algorithm is the "One-at-a-time" algorithm as published
 462    by Bob Jenkins on http://burtleburtle.net/bob/hash/doobs.html.
 463
 464 *******************************************************************************/
 465
 466 u4 utf_full_hashkey(const char *text, u4 length)
 467 {
 468         register const unsigned char *p = (const unsigned char *) text;
 469         register u4 hash;
 470         register u4 i;
 471
 472         hash = 0;
 473         for (i=length; i--;)
 474         {
 475             hash += *p++;
 476             hash += (hash << 10);
 477             hash ^= (hash >> 6);
 478         }
 479         hash += (hash << 3);
 480         hash ^= (hash >> 11);
 481         hash += (hash << 15);
 482
 483         return hash;
 484 }
 485
 486 /* unicode_hashkey *************************************************************
 487
 488    Compute the hashkey of a unicode string.
 489
 490 *******************************************************************************/
 491
 492 u4 unicode_hashkey(u2 *text, u2 len)
 493 {
 494         return utf_hashkey((char *) text, len);
 495 }
 496
 497
 498 /* utf_new *********************************************************************
 499
 500    Creates a new utf-symbol, the text of the symbol is passed as a
 501    u1-array. The function searches the utf-hashtable for a utf-symbol
 502    with this text. On success the element returned, otherwise a new
 503    hashtable element is created.
 504
 505    If the number of entries in the hashtable exceeds twice the size of
 506    the hashtable slots a reorganization of the hashtable is done and
 507    the utf symbols are copied to a new hashtable with doubled size.
 508
 509 *******************************************************************************/
 510
 511 utf *utf_new(const char *text, u2 length)
 512 {
 513         u4 key;                             /* hashkey computed from utf-text     */
 514         u4 slot;                            /* slot in hashtable                  */
 515         utf *u;                             /* hashtable element                  */
 516         u2 i;
 517
 518 #if defined(USE_THREADS)
 519         builtin_monitorenter(lock_hashtable_utf);
 520 #endif
 521
 522 #if defined(ENABLE_STATISTICS)
 523         if (opt_stat)
 524                 count_utf_new++;
 525 #endif
 526
 527         key  = utf_hashkey(text, length);
 528         slot = key & (hashtable_utf.size - 1);
 529         u    = hashtable_utf.ptr[slot];
 530
 531         /* search external hash chain for utf-symbol */
 532
 533         while (u) {
 534                 if (u->blength == length) {
 535                         /* compare text of hashtable elements */
 536
 537                         for (i = 0; i < length; i++)
 538                                 if (text[i] != u->text[i])
 539                                         goto nomatch;
 540
 541 #if defined(ENABLE_STATISTICS)
 542                         if (opt_stat)
 543                                 count_utf_new_found++;
 544 #endif
 545
 546                         /* symbol found in hashtable */
 547
 548 #if defined(USE_THREADS)
 549                         builtin_monitorexit(lock_hashtable_utf);
 550 #endif
 551
 552                         return u;
 553                 }
 554
 555         nomatch:
 556                 u = u->hashlink; /* next element in external chain */
 557         }
 558
 559 #if defined(ENABLE_STATISTICS)
 560         if (opt_stat)
 561                 count_utf_len += sizeof(utf) + length + 1;
 562 #endif
 563
 564         /* location in hashtable found, create new utf element */
 565         u = NEW(utf);
 566         u->blength  = length;               /* length in bytes of utfstring       */
 567         u->hashlink = hashtable_utf.ptr[slot]; /* link in external hashchain      */
 568         u->text     = mem_alloc(length + 1);/* allocate memory for utf-text       */
 569
 570         memcpy(u->text, text, length);      /* copy utf-text                      */
 571         u->text[length] = '\0';
 572
 573         hashtable_utf.ptr[slot] = u;        /* insert symbol into table           */
 574         hashtable_utf.entries++;            /* update number of entries           */
 575
 576         if (hashtable_utf.entries > (hashtable_utf.size * 2)) {
 577
 578         /* reorganization of hashtable, average length of the external
 579            chains is approx. 2 */
 580
 581                 hashtable  newhash;                              /* the new hashtable */
 582                 u4         i;
 583                 utf       *u;
 584                 utf       *nextu;
 585                 u4         slot;
 586
 587                 /* create new hashtable, double the size */
 588
 589                 hashtable_create(&newhash, hashtable_utf.size * 2);
 590                 newhash.entries = hashtable_utf.entries;
 591
 592 #if defined(ENABLE_STATISTICS)
 593                 if (opt_stat)
 594                         count_utf_len += sizeof(utf*) * hashtable_utf.size;
 595 #endif
 596
 597                 /* transfer elements to new hashtable */
 598
 599                 for (i = 0; i < hashtable_utf.size; i++) {
 600                         u = hashtable_utf.ptr[i];
 601
 602                         while (u) {
 603                                 nextu = u->hashlink;
 604                                 slot  = utf_hashkey(u->text, u->blength) & (newhash.size - 1);
 605
 606                                 u->hashlink = (utf *) newhash.ptr[slot];
 607                                 newhash.ptr[slot] = u;
 608
 609                                 /* follow link in external hash chain */
 610
 611                                 u = nextu;
 612                         }
 613                 }
 614
 615                 /* dispose old table */
 616
 617                 MFREE(hashtable_utf.ptr, void*, hashtable_utf.size);
 618                 hashtable_utf = newhash;
 619         }
 620
 621 #if defined(USE_THREADS)
 622         builtin_monitorexit(lock_hashtable_utf);
 623 #endif
 624
 625         return u;
 626 }
 627
 628
 629 /* utf_new_u2 ******************************************************************
 630
 631    Make utf symbol from u2 array, if isclassname is true '.' is
 632    replaced by '/'.
 633
 634 *******************************************************************************/
 635
 636 utf *utf_new_u2(u2 *unicode_pos, u4 unicode_length, bool isclassname)
 637 {
 638         char *buffer;                   /* memory buffer for  unicode characters  */
 639         char *pos;                      /* pointer to current position in buffer  */
 640         u4 left;                        /* unicode characters left                */
 641         u4 buflength;                   /* utf length in bytes of the u2 array    */
 642         utf *result;                    /* resulting utf-string                   */
 643         int i;
 644
 645         /* determine utf length in bytes and allocate memory */
 646
 647         buflength = u2_utflength(unicode_pos, unicode_length);
 648         buffer    = MNEW(char, buflength);
 649
 650         left = buflength;
 651         pos  = buffer;
 652
 653         for (i = 0; i++ < unicode_length; unicode_pos++) {
 654                 /* next unicode character */
 655                 u2 c = *unicode_pos;
 656
 657                 if ((c != 0) && (c < 0x80)) {
 658                         /* 1 character */
 659                         left--;
 660                 if ((int) left < 0) break;
 661                         /* convert classname */
 662                         if (isclassname && c == '.')
 663                                 *pos++ = '/';
 664                         else
 665                                 *pos++ = (char) c;
 666
 667                 } else if (c < 0x800) {
 668                         /* 2 characters */
 669                 unsigned char high = c >> 6;
 670                 unsigned char low  = c & 0x3F;
 671                         left = left - 2;
 672                 if ((int) left < 0) break;
 673                 *pos++ = high | 0xC0;
 674                 *pos++ = low  | 0x80;
 675
 676                 } else {
 677                 /* 3 characters */
 678                 char low  = c & 0x3f;
 679                 char mid  = (c >> 6) & 0x3F;
 680                 char high = c >> 12;
 681                         left = left - 3;
 682                 if ((int) left < 0) break;
 683                 *pos++ = high | 0xE0;
 684                 *pos++ = mid  | 0x80;
 685                 *pos++ = low  | 0x80;
 686                 }
 687         }
 688
 689         /* insert utf-string into symbol-table */
 690         result = utf_new(buffer,buflength);
 691
 692         MFREE(buffer, char, buflength);
 693
 694         return result;
 695 }
 696
 697
 698 /* utf_new_char ****************************************************************
 699
 700    Creates a new utf symbol, the text for this symbol is passed as a
 701    c-string ( = char* ).
 702
 703 *******************************************************************************/
 704
 705 utf *utf_new_char(const char *text)
 706 {
 707         return utf_new(text, strlen(text));
 708 }
 709
 710
 711 /* utf_new_char_classname ******************************************************
 712
 713    Creates a new utf symbol, the text for this symbol is passed as a
 714    c-string ( = char* ) "." characters are going to be replaced by
 715    "/". Since the above function is used often, this is a separte
 716    function, instead of an if.
 717
 718 *******************************************************************************/
 719
 720 utf *utf_new_char_classname(const char *text)
 721 {
 722         if (strchr(text, '.')) {
 723                 char *txt = strdup(text);
 724                 char *end = txt + strlen(txt);
 725                 char *c;
 726                 utf *tmpRes;
 727
 728                 for (c = txt; c < end; c++)
 729                         if (*c == '.') *c = '/';
 730
 731                 tmpRes = utf_new(txt, strlen(txt));
 732                 FREE(txt, 0);
 733
 734                 return tmpRes;
 735
 736         } else
 737                 return utf_new(text, strlen(text));
 738 }
 739
 740
 741 /* utf_nextu2 ******************************************************************
 742
 743    Read the next unicode character from the utf string and increment
 744    the utf-string pointer accordingly.
 745
 746 *******************************************************************************/
 747
 748 u2 utf_nextu2(char **utf_ptr)
 749 {
 750     /* uncompressed unicode character */
 751     u2 unicode_char = 0;
 752     /* current position in utf text */
 753     unsigned char *utf = (unsigned char *) (*utf_ptr);
 754     /* bytes representing the unicode character */
 755     unsigned char ch1, ch2, ch3;
 756     /* number of bytes used to represent the unicode character */
 757     int len = 0;
 758
 759     switch ((ch1 = utf[0]) >> 4) {
 760         default: /* 1 byte */
 761                 (*utf_ptr)++;
 762                 return (u2) ch1;
 763         case 0xC:
 764         case 0xD: /* 2 bytes */
 765                 if (((ch2 = utf[1]) & 0xC0) == 0x80) {
 766                         unsigned char high = ch1 & 0x1F;
 767                         unsigned char low  = ch2 & 0x3F;
 768                         unicode_char = (high << 6) + low;
 769                         len = 2;
 770                 }
 771                 break;
 772
 773         case 0xE: /* 2 or 3 bytes */
 774                 if (((ch2 = utf[1]) & 0xC0) == 0x80) {
 775                         if (((ch3 = utf[2]) & 0xC0) == 0x80) {
 776                                 unsigned char low  = ch3 & 0x3f;
 777                                 unsigned char mid  = ch2 & 0x3f;
 778                                 unsigned char high = ch1 & 0x0f;
 779                                 unicode_char = (((high << 6) + mid) << 6) + low;
 780                                 len = 3;
 781                         } else
 782                                 len = 2;
 783                 }
 784                 break;
 785     }
 786
 787     /* update position in utf-text */
 788     *utf_ptr = (char *) (utf + len);
 789
 790     return unicode_char;
 791 }
 792
 793
 794 /* utf_bytes *******************************************************************
 795
 796    Determine number of bytes (aka. octets) in the utf string.
 797
 798    IN:
 799       u............utf string
 800
 801    OUT:
 802       The number of octets of this utf string.
 803           There is _no_ terminating zero included in this count.
 804
 805 *******************************************************************************/
 806
 807 u4 utf_bytes(utf *u)
 808 {
 809         return u->blength;
 810 }
 811
 812 /* utf_get_number_of_u2s_for_buffer ********************************************
 813
 814    Determine number of UTF-16 u2s in the given UTF-8 buffer
 815
 816    CAUTION: Use this function *only* when you want to convert an UTF-8 buffer
 817    to an array of u2s (UTF-16) and want to know how many of them you will get.
 818    All other uses of this function are probably wrong.
 819
 820    IN:
 821       buffer........points to first char in buffer
 822           blength.......number of _bytes_ in the buffer
 823
 824    OUT:
 825       the number of u2s needed to hold this string in UTF-16 encoding.
 826           There is _no_ terminating zero included in this count.
 827
 828    NOTE: Unlike utf_get_number_of_u2s, this function never throws an
 829    exception.
 830
 831 *******************************************************************************/
 832
 833 u4 utf_get_number_of_u2s_for_buffer(const char *buffer, u4 blength)
 834 {
 835         const char *endpos;                 /* points behind utf string           */
 836         const char *utf_ptr;                /* current position in utf text       */
 837         u4 len = 0;                         /* number of unicode characters       */
 838
 839         utf_ptr = buffer;
 840         endpos = utf_ptr + blength;
 841
 842         while (utf_ptr < endpos) {
 843                 len++;
 844                 /* next unicode character */
 845                 utf_nextu2((char **)&utf_ptr);
 846         }
 847
 848         assert(utf_ptr == endpos);
 849
 850         return len;
 851 }
 852
 853
 854 /* utf_get_number_of_u2s *******************************************************
 855
 856    Determine number of UTF-16 u2s in the utf string.
 857
 858    CAUTION: Use this function *only* when you want to convert a utf string
 859    to an array of u2s and want to know how many of them you will get.
 860    All other uses of this function are probably wrong.
 861
 862    IN:
 863       u............utf string
 864
 865    OUT:
 866       the number of u2s needed to hold this string in UTF-16 encoding.
 867           There is _no_ terminating zero included in this count.
 868           XXX 0 if a NullPointerException has been thrown (see below)
 869
 870 *******************************************************************************/
 871
 872 u4 utf_get_number_of_u2s(utf *u)
 873 {
 874         char *endpos;                       /* points behind utf string           */
 875         char *utf_ptr;                      /* current position in utf text       */
 876         u4 len = 0;                         /* number of unicode characters       */
 877
 878         /* XXX this is probably not checked by most callers! Review this after */
 879         /* the invalid uses of this function have been eliminated */
 880         if (!u) {
 881                 exceptions_throw_nullpointerexception();
 882                 return 0;
 883         }
 884
 885         endpos = UTF_END(u);
 886         utf_ptr = u->text;
 887
 888         while (utf_ptr < endpos) {
 889                 len++;
 890                 /* next unicode character */
 891                 utf_nextu2(&utf_ptr);
 892         }
 893
 894         if (utf_ptr != endpos)
 895                 /* string ended abruptly */
 896                 throw_cacao_exception_exit(string_java_lang_InternalError,
 897                                                                    "Illegal utf8 string");
 898
 899         return len;
 900 }
 901
 902
 903 /* u2_utflength ****************************************************************
 904
 905    Returns the utf length in bytes of a u2 array.
 906
 907 *******************************************************************************/
 908
 909 u4 u2_utflength(u2 *text, u4 u2_length)
 910 {
 911         u4 result_len = 0;                  /* utf length in bytes                */
 912         u2 ch;                              /* current unicode character          */
 913         u4 len;
 914
 915         for (len = 0; len < u2_length; len++) {
 916                 /* next unicode character */
 917                 ch = *text++;
 918
 919                 /* determine bytes required to store unicode character as utf */
 920                 if (ch && (ch < 0x80))
 921                         result_len++;
 922                 else if (ch < 0x800)
 923                         result_len += 2;
 924                 else
 925                         result_len += 3;
 926         }
 927
 928     return result_len;
 929 }
 930
 931
 932 /* utf_copy ********************************************************************
 933
 934    Copy the given utf string byte-for-byte to a buffer.
 935
 936    IN:
 937       buffer.......the buffer
 938           u............the utf string
 939
 940 *******************************************************************************/
 941
 942 void utf_copy(char *buffer, utf *u)
 943 {
 944         /* our utf strings are zero-terminated (done by utf_new) */
 945         MCOPY(buffer, u->text, char, u->blength + 1);
 946 }
 947
 948
 949 /* utf_cat *********************************************************************
 950
 951    Append the given utf string byte-for-byte to a buffer.
 952
 953    IN:
 954       buffer.......the buffer
 955           u............the utf string
 956
 957 *******************************************************************************/
 958
 959 void utf_cat(char *buffer, utf *u)
 960 {
 961         /* our utf strings are zero-terminated (done by utf_new) */
 962         MCOPY(buffer + strlen(buffer), u->text, char, u->blength + 1);
 963 }
 964
 965
 966 /* utf_copy_classname **********************************************************
 967
 968    Copy the given utf classname byte-for-byte to a buffer.
 969    '/' is replaced by '.'
 970
 971    IN:
 972       buffer.......the buffer
 973           u............the utf string
 974
 975 *******************************************************************************/
 976
 977 void utf_copy_classname(char *buffer, utf *u)
 978 {
 979         char *bufptr;
 980         char *srcptr;
 981         char *endptr;
 982         char ch;
 983
 984         bufptr = buffer;
 985         srcptr = u->text;
 986         endptr = UTF_END(u) + 1; /* utfs are zero-terminared by utf_new */
 987
 988         while (srcptr != endptr) {
 989                 ch = *srcptr++;
 990                 if (ch == '/')
 991                         ch = '.';
 992                 *bufptr++ = ch;
 993         }
 994 }
 995
 996
 997 /* utf_cat *********************************************************************
 998
 999    Append the given utf classname byte-for-byte to a buffer.
1000    '/' is replaced by '.'
1001
1002    IN:
1003       buffer.......the buffer
1004           u............the utf string
1005
1006 *******************************************************************************/
1007
1008 void utf_cat_classname(char *buffer, utf *u)
1009 {
1010         utf_copy_classname(buffer + strlen(buffer), u);
1011 }
1012
1013 /* utf_display_printable_ascii *************************************************
1014
1015    Write utf symbol to stdout (for debugging purposes).
1016    Non-printable and non-ASCII characters are printed as '?'.
1017
1018 *******************************************************************************/
1019
1020 void utf_display_printable_ascii(utf *u)
1021 {
1022         char *endpos;                       /* points behind utf string           */
1023         char *utf_ptr;                      /* current position in utf text       */
1024
1025         if (u == NULL) {
1026                 printf("NULL");
1027                 fflush(stdout);
1028                 return;
1029         }
1030
1031         endpos = UTF_END(u);
1032         utf_ptr = u->text;
1033
1034         while (utf_ptr < endpos) {
1035                 /* read next unicode character */
1036
1037                 u2 c = utf_nextu2(&utf_ptr);
1038
1039                 if ((c >= 32) && (c <= 127))
1040                         printf("%c", c);
1041                 else
1042                         printf("?");
1043         }
1044
1045         fflush(stdout);
1046 }
1047
1048
1049 /* utf_display_printable_ascii_classname ***************************************
1050
1051    Write utf symbol to stdout with `/' converted to `.' (for debugging
1052    purposes).
1053    Non-printable and non-ASCII characters are printed as '?'.
1054
1055 *******************************************************************************/
1056
1057 void utf_display_printable_ascii_classname(utf *u)
1058 {
1059         char *endpos;                       /* points behind utf string           */
1060         char *utf_ptr;                      /* current position in utf text       */
1061
1062         if (u == NULL) {
1063                 printf("NULL");
1064                 fflush(stdout);
1065                 return;
1066         }
1067
1068         endpos = UTF_END(u);
1069         utf_ptr = u->text;
1070
1071         while (utf_ptr < endpos) {
1072                 /* read next unicode character */
1073
1074                 u2 c = utf_nextu2(&utf_ptr);
1075
1076                 if (c == '/')
1077                         c = '.';
1078
1079                 if ((c >= 32) && (c <= 127))
1080                         printf("%c", c);
1081                 else
1082                         printf("?");
1083         }
1084
1085         fflush(stdout);
1086 }
1087
1088
1089 /* utf_sprint_convert_to_latin1 ************************************************
1090
1091    Write utf symbol into c-string (for debugging purposes).
1092    Characters are converted to 8-bit Latin-1, non-Latin-1 characters yield
1093    invalid results.
1094
1095 *******************************************************************************/
1096
1097 void utf_sprint_convert_to_latin1(char *buffer, utf *u)
1098 {
1099         char *endpos;                       /* points behind utf string           */
1100         char *utf_ptr;                      /* current position in utf text       */
1101         u2 pos = 0;                         /* position in c-string               */
1102
1103         if (!u) {
1104                 strcpy(buffer, "NULL");
1105                 return;
1106         }
1107
1108         endpos = UTF_END(u);
1109         utf_ptr = u->text;
1110
1111         while (utf_ptr < endpos)
1112                 /* copy next unicode character */
1113                 buffer[pos++] = utf_nextu2(&utf_ptr);
1114
1115         /* terminate string */
1116         buffer[pos] = '\0';
1117 }
1118
1119
1120 /* utf_sprint_convert_to_latin1_classname **************************************
1121
1122    Write utf symbol into c-string with `/' converted to `.' (for debugging
1123    purposes).
1124    Characters are converted to 8-bit Latin-1, non-Latin-1 characters yield
1125    invalid results.
1126
1127 *******************************************************************************/
1128
1129 void utf_sprint_convert_to_latin1_classname(char *buffer, utf *u)
1130 {
1131         char *endpos;                       /* points behind utf string           */
1132         char *utf_ptr;                      /* current position in utf text       */
1133         u2 pos = 0;                         /* position in c-string               */
1134
1135         if (!u) {
1136                 strcpy(buffer, "NULL");
1137                 return;
1138         }
1139
1140         endpos = UTF_END(u);
1141         utf_ptr = u->text;
1142
1143         while (utf_ptr < endpos) {
1144                 /* copy next unicode character */
1145                 u2 c = utf_nextu2(&utf_ptr);
1146                 if (c == '/') c = '.';
1147                 buffer[pos++] = c;
1148         }
1149
1150         /* terminate string */
1151         buffer[pos] = '\0';
1152 }
1153
1154
1155 /* utf_strcat_convert_to_latin1 ************************************************
1156
1157    Like libc strcat, but uses an utf8 string.
1158    Characters are converted to 8-bit Latin-1, non-Latin-1 characters yield
1159    invalid results.
1160
1161 *******************************************************************************/
1162
1163 void utf_strcat_convert_to_latin1(char *buffer, utf *u)
1164 {
1165         utf_sprint_convert_to_latin1(buffer + strlen(buffer), u);
1166 }
1167
1168
1169 /* utf_strcat_convert_to_latin1_classname **************************************
1170
1171    Like libc strcat, but uses an utf8 string.
1172    Characters are converted to 8-bit Latin-1, non-Latin-1 characters yield
1173    invalid results.
1174
1175 *******************************************************************************/
1176
1177 void utf_strcat_convert_to_latin1_classname(char *buffer, utf *u)
1178 {
1179         utf_sprint_convert_to_latin1_classname(buffer + strlen(buffer), u);
1180 }
1181
1182
1183 /* utf_fprint_printable_ascii **************************************************
1184
1185    Write utf symbol into file.
1186    Non-printable and non-ASCII characters are printed as '?'.
1187
1188 *******************************************************************************/
1189
1190 void utf_fprint_printable_ascii(FILE *file, utf *u)
1191 {
1192         char *endpos;                       /* points behind utf string           */
1193         char *utf_ptr;                      /* current position in utf text       */
1194
1195         if (!u)
1196                 return;
1197
1198         endpos = UTF_END(u);
1199         utf_ptr = u->text;
1200
1201         while (utf_ptr < endpos) {
1202                 /* read next unicode character */
1203                 u2 c = utf_nextu2(&utf_ptr);
1204
1205                 if (c >= 32 && c <= 127) fprintf(file, "%c", c);
1206                 else fprintf(file, "?");
1207         }
1208 }
1209
1210
1211 /* utf_fprint_printable_ascii_classname ****************************************
1212
1213    Write utf symbol into file with `/' converted to `.'.
1214    Non-printable and non-ASCII characters are printed as '?'.
1215
1216 *******************************************************************************/
1217
1218 void utf_fprint_printable_ascii_classname(FILE *file, utf *u)
1219 {
1220         char *endpos;                       /* points behind utf string           */
1221         char *utf_ptr;                      /* current position in utf text       */
1222
1223     if (!u)
1224                 return;
1225
1226         endpos = UTF_END(u);
1227         utf_ptr = u->text;
1228
1229         while (utf_ptr < endpos) {
1230                 /* read next unicode character */
1231                 u2 c = utf_nextu2(&utf_ptr);
1232                 if (c == '/') c = '.';
1233
1234                 if (c >= 32 && c <= 127) fprintf(file, "%c", c);
1235                 else fprintf(file, "?");
1236         }
1237 }
1238
1239
1240 /* is_valid_utf ****************************************************************
1241
1242    Return true if the given string is a valid UTF-8 string.
1243
1244    utf_ptr...points to first character
1245    end_pos...points after last character
1246
1247 *******************************************************************************/
1248
1249 /*  static unsigned long min_codepoint[6] = {0,1L<<7,1L<<11,1L<<16,1L<<21,1L<<26}; */
1250
1251 bool is_valid_utf(char *utf_ptr, char *end_pos)
1252 {
1253         int bytes;
1254         int len,i;
1255         char c;
1256         unsigned long v;
1257
1258         if (end_pos < utf_ptr) return false;
1259         bytes = end_pos - utf_ptr;
1260         while (bytes--) {
1261                 c = *utf_ptr++;
1262
1263                 if (!c) return false;                     /* 0x00 is not allowed */
1264                 if ((c & 0x80) == 0) continue;            /* ASCII */
1265
1266                 if      ((c & 0xe0) == 0xc0) len = 1;     /* 110x xxxx */
1267                 else if ((c & 0xf0) == 0xe0) len = 2;     /* 1110 xxxx */
1268                 else if ((c & 0xf8) == 0xf0) len = 3;     /* 1111 0xxx */
1269                 else if ((c & 0xfc) == 0xf8) len = 4;     /* 1111 10xx */
1270                 else if ((c & 0xfe) == 0xfc) len = 5;     /* 1111 110x */
1271                 else return false;                        /* invalid leading byte */
1272
1273                 if (len > 2) return false;                /* Java limitation */
1274
1275                 v = (unsigned long)c & (0x3f >> len);
1276
1277                 if ((bytes -= len) < 0) return false;     /* missing bytes */
1278
1279                 for (i = len; i--; ) {
1280                         c = *utf_ptr++;
1281                         if ((c & 0xc0) != 0x80)               /* 10xx xxxx */
1282                                 return false;
1283                         v = (v << 6) | (c & 0x3f);
1284                 }
1285
1286                 if (v == 0) {
1287                         if (len != 1) return false;           /* Java special */
1288
1289                 } else {
1290                         /* Sun Java seems to allow overlong UTF-8 encodings */
1291
1292                         /* if (v < min_codepoint[len]) */
1293                                 /* XXX throw exception? */
1294                 }
1295
1296                 /* surrogates in UTF-8 seem to be allowed in Java classfiles */
1297                 /* if (v >= 0xd800 && v <= 0xdfff) return false; */ /* surrogates */
1298
1299                 /* even these seem to be allowed */
1300                 /* if (v == 0xfffe || v == 0xffff) return false; */ /* invalid codepoints */
1301         }
1302
1303         return true;
1304 }
1305
1306
1307 /* is_valid_name ***************************************************************
1308
1309    Return true if the given string may be used as a class/field/method
1310    name. (Currently this only disallows empty strings and control
1311    characters.)
1312
1313    NOTE: The string is assumed to have passed is_valid_utf!
1314
1315    utf_ptr...points to first character
1316    end_pos...points after last character
1317
1318 *******************************************************************************/
1319
1320 bool is_valid_name(char *utf_ptr, char *end_pos)
1321 {
1322         if (end_pos <= utf_ptr) return false; /* disallow empty names */
1323
1324         while (utf_ptr < end_pos) {
1325                 unsigned char c = *utf_ptr++;
1326
1327                 if (c < 0x20) return false; /* disallow control characters */
1328                 if (c == 0xc0 && (unsigned char) *utf_ptr == 0x80)  /* disallow zero */
1329                         return false;
1330         }
1331
1332         return true;
1333 }
1334
1335 bool is_valid_name_utf(utf *u)
1336 {
1337         return is_valid_name(u->text, UTF_END(u));
1338 }
1339
1340
1341 /* utf_show ********************************************************************
1342
1343    Writes the utf symbols in the utfhash to stdout and displays the
1344    number of external hash chains grouped according to the chainlength
1345    (for debugging purposes).
1346
1347 *******************************************************************************/
1348
1349 #if !defined(NDEBUG)
1350 void utf_show(void)
1351 {
1352
1353 #define CHAIN_LIMIT 20               /* limit for seperated enumeration */
1354
1355         u4 chain_count[CHAIN_LIMIT]; /* numbers of chains */
1356         u4 max_chainlength = 0;      /* maximum length of the chains */
1357         u4 sum_chainlength = 0;      /* sum of the chainlengths */
1358         u4 beyond_limit = 0;         /* number of utf-symbols in chains with length>=CHAIN_LIMIT-1 */
1359         u4 i;
1360
1361         printf("UTF-HASH:\n");
1362
1363         /* show element of utf-hashtable */
1364
1365         for (i = 0; i < hashtable_utf.size; i++) {
1366                 utf *u = hashtable_utf.ptr[i];
1367
1368                 if (u) {
1369                         printf("SLOT %d: ", (int) i);
1370
1371                         while (u) {
1372                                 printf("'");
1373                                 utf_display_printable_ascii(u);
1374                                 printf("' ");
1375                                 u = u->hashlink;
1376                         }
1377                         printf("\n");
1378                 }
1379         }
1380
1381         printf("UTF-HASH: %d slots for %d entries\n",
1382                    (int) hashtable_utf.size, (int) hashtable_utf.entries );
1383
1384         if (hashtable_utf.entries == 0)
1385                 return;
1386
1387         printf("chains:\n  chainlength    number of chains    %% of utfstrings\n");
1388
1389         for (i=0;i<CHAIN_LIMIT;i++)
1390                 chain_count[i]=0;
1391
1392         /* count numbers of hashchains according to their length */
1393         for (i=0; i<hashtable_utf.size; i++) {
1394
1395                 utf *u = (utf*) hashtable_utf.ptr[i];
1396                 u4 chain_length = 0;
1397
1398                 /* determine chainlength */
1399                 while (u) {
1400                         u = u->hashlink;
1401                         chain_length++;
1402                 }
1403
1404                 /* update sum of all chainlengths */
1405                 sum_chainlength+=chain_length;
1406
1407                 /* determine the maximum length of the chains */
1408                 if (chain_length>max_chainlength)
1409                         max_chainlength = chain_length;
1410
1411                 /* update number of utf-symbols in chains with length>=CHAIN_LIMIT-1 */
1412                 if (chain_length>=CHAIN_LIMIT) {
1413                         beyond_limit+=chain_length;
1414                         chain_length=CHAIN_LIMIT-1;
1415                 }
1416
1417                 /* update number of hashchains of current length */
1418                 chain_count[chain_length]++;
1419         }
1420
1421         /* display results */
1422         for (i=1;i<CHAIN_LIMIT-1;i++)
1423                 printf("       %2d %17d %18.2f%%\n",i,chain_count[i],(((float) chain_count[i]*i*100)/hashtable_utf.entries));
1424
1425         printf("     >=%2d %17d %18.2f%%\n",CHAIN_LIMIT-1,chain_count[CHAIN_LIMIT-1],((float) beyond_limit*100)/hashtable_utf.entries);
1426
1427
1428         printf("max. chainlength:%5d\n",max_chainlength);
1429
1430         /* avg. chainlength = sum of chainlengths / number of chains */
1431         printf("avg. chainlength:%5.2f\n",(float) sum_chainlength / (hashtable_utf.size-chain_count[0]));
1432 }
1433 #endif /* !defined(NDEBUG) */
1434
1435
1436 /*
1437  * These are local overrides for various environment variables in Emacs.
1438  * Please do not remove this and leave it at the end of the file, where
1439  * Emacs will automagically detect them.
1440  * ---------------------------------------------------------------------
1441  * Local variables:
1442  * mode: c
1443  * indent-tabs-mode: t
1444  * c-basic-offset: 4
1445  * tab-width: 4
1446  * End:
1447  * vim:noexpandtab:sw=4:ts=4:
1448  */