src/vm/utf8.c

   1 /* src/vm/utf8.c - utf8 string functions
   2
   3    Copyright (C) 1996-2005, 2006 R. Grafl, A. Krall, C. Kruegel,
   4    C. Oates, R. Obermaisser, M. Platter, M. Probst, S. Ring,
   5    E. Steiner, C. Thalinger, D. Thuernbeck, P. Tomsich, C. Ullrich,
   6    J. Wenninger, Institut f. Computersprachen - TU Wien
   7
   8    This file is part of CACAO.
   9
  10    This program is free software; you can redistribute it and/or
  11    modify it under the terms of the GNU General Public License as
  12    published by the Free Software Foundation; either version 2, or (at
  13    your option) any later version.
  14
  15    This program is distributed in the hope that it will be useful, but
  16    WITHOUT ANY WARRANTY; without even the implied warranty of
  17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18    General Public License for more details.
  19
  20    You should have received a copy of the GNU General Public License
  21    along with this program; if not, write to the Free Software
  22    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  23    02110-1301, USA.
  24
  25    Contact: cacao@cacaojvm.org
  26
  27    Authors: Reinhard Grafl
  28
  29    Changes: Mark Probst
  30             Andreas Krall
  31             Christian Thalinger
  32                         Edwin Steiner
  33
  34    $Id: utf8.c 5123 2006-07-12 21:45:34Z twisti $
  35
  36 */
  37
  38
  39 #include "config.h"
  40
  41 #include <string.h>
  42 #include <assert.h>
  43
  44 #include "vm/types.h"
  45
  46 #include "mm/memory.h"
  47
  48 #if defined(ENABLE_THREADS)
  49 # include "threads/native/lock.h"
  50 #else
  51 # include "threads/none/lock.h"
  52 #endif
  53
  54 #include "vm/builtin.h"
  55 #include "vm/exceptions.h"
  56 #include "vm/hashtable.h"
  57 #include "vm/options.h"
  58 #include "vm/statistics.h"
  59 #include "vm/stringlocal.h"
  60 #include "vm/utf8.h"
  61
  62
  63 /* global variables ***********************************************************/
  64
  65 /* hashsize must be power of 2 */
  66
  67 #define HASHTABLE_UTF_SIZE    16384     /* initial size of utf-hash           */
  68
  69 hashtable *hashtable_utf;               /* hashtable for utf8-symbols         */
  70
  71
  72 /* utf-symbols for pointer comparison of frequently used strings **************/
  73
  74 utf *utf_java_lang_Object;
  75
  76 utf *utf_java_lang_Class;
  77 utf *utf_java_lang_ClassLoader;
  78 utf *utf_java_lang_Cloneable;
  79 utf *utf_java_lang_SecurityManager;
  80 utf *utf_java_lang_String;
  81 utf *utf_java_lang_System;
  82 utf *utf_java_lang_ThreadGroup;
  83 utf *utf_java_io_Serializable;
  84
  85 utf *utf_java_lang_Throwable;
  86 utf *utf_java_lang_VMThrowable;
  87 utf *utf_java_lang_Error;
  88 utf *utf_java_lang_AbstractMethodError;
  89 utf *utf_java_lang_LinkageError;
  90 utf *utf_java_lang_NoClassDefFoundError;
  91 utf *utf_java_lang_NoSuchMethodError;
  92 utf *utf_java_lang_OutOfMemoryError;
  93
  94 utf *utf_java_lang_Exception;
  95 utf *utf_java_lang_ClassCastException;
  96 utf *utf_java_lang_ClassNotFoundException;
  97 utf *utf_java_lang_IllegalArgumentException;
  98 utf *utf_java_lang_IllegalMonitorStateException;
  99
 100 utf *utf_java_lang_NullPointerException;
 101
 102 utf* utf_java_lang_Void;
 103 utf* utf_java_lang_Boolean;
 104 utf* utf_java_lang_Byte;
 105 utf* utf_java_lang_Character;
 106 utf* utf_java_lang_Short;
 107 utf* utf_java_lang_Integer;
 108 utf* utf_java_lang_Long;
 109 utf* utf_java_lang_Float;
 110 utf* utf_java_lang_Double;
 111
 112 utf *utf_java_lang_StackTraceElement;
 113 utf *utf_java_lang_reflect_Constructor;
 114 utf *utf_java_lang_reflect_Field;
 115 utf *utf_java_lang_reflect_Method;
 116 utf *utf_java_util_Vector;
 117
 118 utf *utf_InnerClasses;                  /* InnerClasses                       */
 119 utf *utf_ConstantValue;                 /* ConstantValue                      */
 120 utf *utf_Code;                          /* Code                               */
 121 utf *utf_Exceptions;                    /* Exceptions                         */
 122 utf *utf_LineNumberTable;               /* LineNumberTable                    */
 123 utf *utf_SourceFile;                    /* SourceFile                         */
 124
 125 utf *utf_init;                          /* <init>                             */
 126 utf *utf_clinit;                        /* <clinit>                           */
 127 utf *utf_clone;                         /* clone                              */
 128 utf *utf_finalize;                      /* finalize                           */
 129 utf *utf_run;                           /* run                                */
 130
 131 utf *utf_add;                           /* add                                */
 132 utf *utf_remove;                        /* remove                             */
 133 utf *utf_put;                           /* put                                */
 134 utf *utf_get;                           /* get                                */
 135 utf *utf_value;                         /* value                              */
 136
 137 utf *utf_fillInStackTrace;
 138 utf *utf_getSystemClassLoader;
 139 utf *utf_loadClass;
 140 utf *utf_printStackTrace;
 141
 142 utf *utf_Z;                             /* Z                                  */
 143 utf *utf_B;                             /* B                                  */
 144 utf *utf_C;                             /* C                                  */
 145 utf *utf_S;                             /* S                                  */
 146 utf *utf_I;                             /* I                                  */
 147 utf *utf_J;                             /* J                                  */
 148 utf *utf_F;                             /* F                                  */
 149 utf *utf_D;                             /* D                                  */
 150
 151 utf *utf_void__void;                    /* ()V                                */
 152 utf *utf_boolean__void;                 /* (Z)V                               */
 153 utf *utf_byte__void;                    /* (B)V                               */
 154 utf *utf_char__void;                    /* (C)V                               */
 155 utf *utf_short__void;                   /* (S)V                               */
 156 utf *utf_int__void;                     /* (I)V                               */
 157 utf *utf_long__void;                    /* (J)V                               */
 158 utf *utf_float__void;                   /* (F)V                               */
 159 utf *utf_double__void;                  /* (D)V                               */
 160
 161 utf *utf_void__java_lang_ClassLoader;   /* ()Ljava/lang/ClassLoader;          */
 162 utf *utf_void__java_lang_Object;        /* ()Ljava/lang/Object;               */
 163 utf *utf_void__java_lang_Throwable;     /* ()Ljava/lang/Throwable;            */
 164 utf *utf_java_lang_Object__java_lang_Object;
 165 utf *utf_java_lang_String__void;        /* (Ljava/lang/String;)V              */
 166 utf *utf_java_lang_String__java_lang_Class;
 167 utf *utf_java_lang_Throwable__void;     /* (Ljava/lang/Throwable;)V           */
 168
 169 utf *utf_not_named_yet;                 /* special name for unnamed classes   */
 170 utf *utf_null;
 171 utf *array_packagename;
 172
 173
 174 /* utf_init ********************************************************************
 175
 176    Initializes the utf8 subsystem.
 177
 178 *******************************************************************************/
 179
 180 bool utf8_init(void)
 181 {
 182         /* create utf8 hashtable */
 183
 184         hashtable_utf = NEW(hashtable);
 185
 186         hashtable_create(hashtable_utf, HASHTABLE_UTF_SIZE);
 187
 188 #if defined(ENABLE_STATISTICS)
 189         if (opt_stat)
 190                 count_utf_len += sizeof(utf*) * hashtable_utf->size;
 191 #endif
 192
 193         /* create utf-symbols for pointer comparison of frequently used strings */
 194
 195         utf_java_lang_Object           = utf_new_char("java/lang/Object");
 196
 197         utf_java_lang_Class            = utf_new_char("java/lang/Class");
 198         utf_java_lang_ClassLoader      = utf_new_char("java/lang/ClassLoader");
 199         utf_java_lang_Cloneable        = utf_new_char("java/lang/Cloneable");
 200         utf_java_lang_SecurityManager  = utf_new_char("java/lang/SecurityManager");
 201         utf_java_lang_String           = utf_new_char("java/lang/String");
 202         utf_java_lang_System           = utf_new_char("java/lang/System");
 203         utf_java_lang_ThreadGroup      = utf_new_char("java/lang/ThreadGroup");
 204         utf_java_io_Serializable       = utf_new_char("java/io/Serializable");
 205
 206         utf_java_lang_Throwable        = utf_new_char(string_java_lang_Throwable);
 207         utf_java_lang_VMThrowable      = utf_new_char(string_java_lang_VMThrowable);
 208         utf_java_lang_Error            = utf_new_char(string_java_lang_Error);
 209
 210         utf_java_lang_AbstractMethodError =
 211                 utf_new_char(string_java_lang_AbstractMethodError);
 212
 213         utf_java_lang_LinkageError =
 214                 utf_new_char(string_java_lang_LinkageError);
 215
 216         utf_java_lang_NoClassDefFoundError =
 217                 utf_new_char(string_java_lang_NoClassDefFoundError);
 218
 219         utf_java_lang_NoSuchMethodError =
 220                 utf_new_char(string_java_lang_NoSuchMethodError);
 221
 222         utf_java_lang_OutOfMemoryError =
 223                 utf_new_char(string_java_lang_OutOfMemoryError);
 224
 225         utf_java_lang_Exception        = utf_new_char(string_java_lang_Exception);
 226
 227         utf_java_lang_ClassCastException =
 228                 utf_new_char(string_java_lang_ClassCastException);
 229
 230         utf_java_lang_ClassNotFoundException =
 231                 utf_new_char(string_java_lang_ClassNotFoundException);
 232
 233         utf_java_lang_IllegalArgumentException =
 234                 utf_new_char(string_java_lang_IllegalArgumentException);
 235
 236         utf_java_lang_IllegalMonitorStateException =
 237                 utf_new_char(string_java_lang_IllegalMonitorStateException);
 238
 239         utf_java_lang_NullPointerException =
 240                 utf_new_char(string_java_lang_NullPointerException);
 241
 242         utf_java_lang_Void             = utf_new_char("java/lang/Void");
 243         utf_java_lang_Boolean          = utf_new_char("java/lang/Boolean");
 244         utf_java_lang_Byte             = utf_new_char("java/lang/Byte");
 245         utf_java_lang_Character        = utf_new_char("java/lang/Character");
 246         utf_java_lang_Short            = utf_new_char("java/lang/Short");
 247         utf_java_lang_Integer          = utf_new_char("java/lang/Integer");
 248         utf_java_lang_Long             = utf_new_char("java/lang/Long");
 249         utf_java_lang_Float            = utf_new_char("java/lang/Float");
 250         utf_java_lang_Double           = utf_new_char("java/lang/Double");
 251
 252         utf_java_lang_StackTraceElement =
 253                 utf_new_char("java/lang/StackTraceElement");
 254
 255         utf_java_lang_reflect_Constructor =
 256                 utf_new_char("java/lang/reflect/Constructor");
 257
 258         utf_java_lang_reflect_Field    = utf_new_char("java/lang/reflect/Field");
 259         utf_java_lang_reflect_Method   = utf_new_char("java/lang/reflect/Method");
 260         utf_java_util_Vector           = utf_new_char("java/util/Vector");
 261
 262         utf_InnerClasses               = utf_new_char("InnerClasses");
 263         utf_ConstantValue              = utf_new_char("ConstantValue");
 264         utf_Code                       = utf_new_char("Code");
 265         utf_Exceptions                 = utf_new_char("Exceptions");
 266         utf_LineNumberTable            = utf_new_char("LineNumberTable");
 267         utf_SourceFile                 = utf_new_char("SourceFile");
 268
 269         utf_init                           = utf_new_char("<init>");
 270         utf_clinit                         = utf_new_char("<clinit>");
 271         utf_clone                      = utf_new_char("clone");
 272         utf_finalize                   = utf_new_char("finalize");
 273         utf_run                        = utf_new_char("run");
 274
 275         utf_add                        = utf_new_char("add");
 276         utf_remove                     = utf_new_char("remove");
 277         utf_put                        = utf_new_char("put");
 278         utf_get                        = utf_new_char("get");
 279         utf_value                      = utf_new_char("value");
 280
 281         utf_printStackTrace            = utf_new_char("printStackTrace");
 282         utf_fillInStackTrace           = utf_new_char("fillInStackTrace");
 283         utf_loadClass                  = utf_new_char("loadClass");
 284         utf_getSystemClassLoader       = utf_new_char("getSystemClassLoader");
 285
 286         utf_Z                          = utf_new_char("Z");
 287         utf_B                          = utf_new_char("B");
 288         utf_C                          = utf_new_char("C");
 289         utf_S                          = utf_new_char("S");
 290         utf_I                          = utf_new_char("I");
 291         utf_J                          = utf_new_char("J");
 292         utf_F                          = utf_new_char("F");
 293         utf_D                          = utf_new_char("D");
 294
 295         utf_void__void                 = utf_new_char("()V");
 296         utf_boolean__void              = utf_new_char("(Z)V");
 297         utf_byte__void                 = utf_new_char("(B)V");
 298         utf_char__void                 = utf_new_char("(C)V");
 299         utf_short__void                = utf_new_char("(S)V");
 300         utf_int__void                  = utf_new_char("(I)V");
 301         utf_long__void                 = utf_new_char("(J)V");
 302         utf_float__void                = utf_new_char("(F)V");
 303         utf_double__void               = utf_new_char("(D)V");
 304         utf_void__java_lang_Object     = utf_new_char("()Ljava/lang/Object;");
 305         utf_void__java_lang_Throwable  = utf_new_char("()Ljava/lang/Throwable;");
 306
 307         utf_void__java_lang_ClassLoader =
 308                 utf_new_char("()Ljava/lang/ClassLoader;");
 309
 310         utf_java_lang_Object__java_lang_Object =
 311                 utf_new_char("(Ljava/lang/Object;)Ljava/lang/Object;");
 312
 313         utf_java_lang_String__void     = utf_new_char("(Ljava/lang/String;)V");
 314
 315         utf_java_lang_String__java_lang_Class =
 316                 utf_new_char("(Ljava/lang/String;)Ljava/lang/Class;");
 317
 318         utf_java_lang_Throwable__void  = utf_new_char("(Ljava/lang/Throwable;)V");
 319
 320         utf_null                       = utf_new_char("null");
 321         utf_not_named_yet              = utf_new_char("\t<not_named_yet>");
 322         array_packagename              = utf_new_char("\t<the array package>");
 323
 324         /* everything's ok */
 325
 326         return true;
 327 }
 328
 329
 330 /* utf_hashkey *****************************************************************
 331
 332    The hashkey is computed from the utf-text by using up to 8
 333    characters.  For utf-symbols longer than 15 characters 3 characters
 334    are taken from the beginning and the end, 2 characters are taken
 335    from the middle.
 336
 337 *******************************************************************************/
 338
 339 #define nbs(val) ((u4) *(++text) << val) /* get next byte, left shift by val  */
 340 #define fbs(val) ((u4) *(  text) << val) /* get first byte, left shift by val */
 341
 342 u4 utf_hashkey(const char *text, u4 length)
 343 {
 344         const char *start_pos = text;       /* pointer to utf text                */
 345         u4 a;
 346
 347         switch (length) {
 348         case 0: /* empty string */
 349                 return 0;
 350
 351         case 1: return fbs(0);
 352         case 2: return fbs(0) ^ nbs(3);
 353         case 3: return fbs(0) ^ nbs(3) ^ nbs(5);
 354         case 4: return fbs(0) ^ nbs(2) ^ nbs(4) ^ nbs(6);
 355         case 5: return fbs(0) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(6);
 356         case 6: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(5) ^ nbs(6);
 357         case 7: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(5) ^ nbs(6);
 358         case 8: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(5) ^ nbs(6) ^ nbs(7);
 359
 360         case 9:
 361                 a = fbs(0);
 362                 a ^= nbs(1);
 363                 a ^= nbs(2);
 364                 text++;
 365                 return a ^ nbs(4) ^ nbs(5) ^ nbs(6) ^ nbs(7) ^ nbs(8);
 366
 367         case 10:
 368                 a = fbs(0);
 369                 text++;
 370                 a ^= nbs(2);
 371                 a ^= nbs(3);
 372                 a ^= nbs(4);
 373                 text++;
 374                 return a ^ nbs(6) ^ nbs(7) ^ nbs(8) ^ nbs(9);
 375
 376         case 11:
 377                 a = fbs(0);
 378                 text++;
 379                 a ^= nbs(2);
 380                 a ^= nbs(3);
 381                 a ^= nbs(4);
 382                 text++;
 383                 return a ^ nbs(6) ^ nbs(7) ^ nbs(8) ^ nbs(9) ^ nbs(10);
 384
 385         case 12:
 386                 a = fbs(0);
 387                 text += 2;
 388                 a ^= nbs(2);
 389                 a ^= nbs(3);
 390                 text++;
 391                 a ^= nbs(5);
 392                 a ^= nbs(6);
 393                 a ^= nbs(7);
 394                 text++;
 395                 return a ^ nbs(9) ^ nbs(10);
 396
 397         case 13:
 398                 a = fbs(0);
 399                 a ^= nbs(1);
 400                 text++;
 401                 a ^= nbs(3);
 402                 a ^= nbs(4);
 403                 text += 2;
 404                 a ^= nbs(7);
 405                 a ^= nbs(8);
 406                 text += 2;
 407                 return a ^ nbs(9) ^ nbs(10);
 408
 409         case 14:
 410                 a = fbs(0);
 411                 text += 2;
 412                 a ^= nbs(3);
 413                 a ^= nbs(4);
 414                 text += 2;
 415                 a ^= nbs(7);
 416                 a ^= nbs(8);
 417                 text += 2;
 418                 return a ^ nbs(9) ^ nbs(10) ^ nbs(11);
 419
 420         case 15:
 421                 a = fbs(0);
 422                 text += 2;
 423                 a ^= nbs(3);
 424                 a ^= nbs(4);
 425                 text += 2;
 426                 a ^= nbs(7);
 427                 a ^= nbs(8);
 428                 text += 2;
 429                 return a ^ nbs(9) ^ nbs(10) ^ nbs(11);
 430
 431         default:  /* 3 characters from beginning */
 432                 a = fbs(0);
 433                 text += 2;
 434                 a ^= nbs(3);
 435                 a ^= nbs(4);
 436
 437                 /* 2 characters from middle */
 438                 text = start_pos + (length / 2);
 439                 a ^= fbs(5);
 440                 text += 2;
 441                 a ^= nbs(6);
 442
 443                 /* 3 characters from end */
 444                 text = start_pos + length - 4;
 445
 446                 a ^= fbs(7);
 447                 text++;
 448
 449                 return a ^ nbs(10) ^ nbs(11);
 450     }
 451 }
 452
 453 /* utf_full_hashkey ************************************************************
 454
 455    This function computes a hash value using all bytes in the string.
 456
 457    The algorithm is the "One-at-a-time" algorithm as published
 458    by Bob Jenkins on http://burtleburtle.net/bob/hash/doobs.html.
 459
 460 *******************************************************************************/
 461
 462 u4 utf_full_hashkey(const char *text, u4 length)
 463 {
 464         register const unsigned char *p = (const unsigned char *) text;
 465         register u4 hash;
 466         register u4 i;
 467
 468         hash = 0;
 469         for (i=length; i--;)
 470         {
 471             hash += *p++;
 472             hash += (hash << 10);
 473             hash ^= (hash >> 6);
 474         }
 475         hash += (hash << 3);
 476         hash ^= (hash >> 11);
 477         hash += (hash << 15);
 478
 479         return hash;
 480 }
 481
 482 /* unicode_hashkey *************************************************************
 483
 484    Compute the hashkey of a unicode string.
 485
 486 *******************************************************************************/
 487
 488 u4 unicode_hashkey(u2 *text, u2 len)
 489 {
 490         return utf_hashkey((char *) text, len);
 491 }
 492
 493
 494 /* utf_new *********************************************************************
 495
 496    Creates a new utf-symbol, the text of the symbol is passed as a
 497    u1-array. The function searches the utf-hashtable for a utf-symbol
 498    with this text. On success the element returned, otherwise a new
 499    hashtable element is created.
 500
 501    If the number of entries in the hashtable exceeds twice the size of
 502    the hashtable slots a reorganization of the hashtable is done and
 503    the utf symbols are copied to a new hashtable with doubled size.
 504
 505 *******************************************************************************/
 506
 507 utf *utf_new(const char *text, u2 length)
 508 {
 509         u4 key;                             /* hashkey computed from utf-text     */
 510         u4 slot;                            /* slot in hashtable                  */
 511         utf *u;                             /* hashtable element                  */
 512         u2 i;
 513
 514         LOCK_MONITOR_ENTER(hashtable_utf->header);
 515
 516 #if defined(ENABLE_STATISTICS)
 517         if (opt_stat)
 518                 count_utf_new++;
 519 #endif
 520
 521         key  = utf_hashkey(text, length);
 522         slot = key & (hashtable_utf->size - 1);
 523         u    = hashtable_utf->ptr[slot];
 524
 525         /* search external hash chain for utf-symbol */
 526
 527         while (u) {
 528                 if (u->blength == length) {
 529                         /* compare text of hashtable elements */
 530
 531                         for (i = 0; i < length; i++)
 532                                 if (text[i] != u->text[i])
 533                                         goto nomatch;
 534
 535 #if defined(ENABLE_STATISTICS)
 536                         if (opt_stat)
 537                                 count_utf_new_found++;
 538 #endif
 539
 540                         /* symbol found in hashtable */
 541
 542                         LOCK_MONITOR_EXIT(hashtable_utf->header);
 543
 544                         return u;
 545                 }
 546
 547         nomatch:
 548                 u = u->hashlink; /* next element in external chain */
 549         }
 550
 551 #if defined(ENABLE_STATISTICS)
 552         if (opt_stat)
 553                 count_utf_len += sizeof(utf) + length + 1;
 554 #endif
 555
 556         /* location in hashtable found, create new utf element */
 557         u = NEW(utf);
 558         u->blength  = length;               /* length in bytes of utfstring       */
 559         u->hashlink = hashtable_utf->ptr[slot]; /* link in external hashchain     */
 560         u->text     = mem_alloc(length + 1);/* allocate memory for utf-text       */
 561
 562         memcpy(u->text, text, length);      /* copy utf-text                      */
 563         u->text[length] = '\0';
 564
 565         hashtable_utf->ptr[slot] = u;       /* insert symbol into table           */
 566         hashtable_utf->entries++;           /* update number of entries           */
 567
 568         if (hashtable_utf->entries > (hashtable_utf->size * 2)) {
 569
 570         /* reorganization of hashtable, average length of the external
 571            chains is approx. 2 */
 572
 573                 hashtable *newhash;                              /* the new hashtable */
 574                 u4         i;
 575                 utf       *u;
 576                 utf       *nextu;
 577                 u4         slot;
 578
 579                 /* create new hashtable, double the size */
 580
 581                 newhash = hashtable_resize(hashtable_utf, hashtable_utf->size * 2);
 582
 583 #if defined(ENABLE_STATISTICS)
 584                 if (opt_stat)
 585                         count_utf_len += sizeof(utf*) * hashtable_utf->size;
 586 #endif
 587
 588                 /* transfer elements to new hashtable */
 589
 590                 for (i = 0; i < hashtable_utf->size; i++) {
 591                         u = hashtable_utf->ptr[i];
 592
 593                         while (u) {
 594                                 nextu = u->hashlink;
 595                                 slot  = utf_hashkey(u->text, u->blength) & (newhash->size - 1);
 596
 597                                 u->hashlink = (utf *) newhash->ptr[slot];
 598                                 newhash->ptr[slot] = u;
 599
 600                                 /* follow link in external hash chain */
 601
 602                                 u = nextu;
 603                         }
 604                 }
 605
 606                 /* dispose old table */
 607
 608                 hashtable_free(hashtable_utf);
 609
 610                 hashtable_utf = newhash;
 611         }
 612
 613         LOCK_MONITOR_EXIT(hashtable_utf->header);
 614
 615         return u;
 616 }
 617
 618
 619 /* utf_new_u2 ******************************************************************
 620
 621    Make utf symbol from u2 array, if isclassname is true '.' is
 622    replaced by '/'.
 623
 624 *******************************************************************************/
 625
 626 utf *utf_new_u2(u2 *unicode_pos, u4 unicode_length, bool isclassname)
 627 {
 628         char *buffer;                   /* memory buffer for  unicode characters  */
 629         char *pos;                      /* pointer to current position in buffer  */
 630         u4 left;                        /* unicode characters left                */
 631         u4 buflength;                   /* utf length in bytes of the u2 array    */
 632         utf *result;                    /* resulting utf-string                   */
 633         int i;
 634
 635         /* determine utf length in bytes and allocate memory */
 636
 637         buflength = u2_utflength(unicode_pos, unicode_length);
 638         buffer    = MNEW(char, buflength);
 639
 640         left = buflength;
 641         pos  = buffer;
 642
 643         for (i = 0; i++ < unicode_length; unicode_pos++) {
 644                 /* next unicode character */
 645                 u2 c = *unicode_pos;
 646
 647                 if ((c != 0) && (c < 0x80)) {
 648                         /* 1 character */
 649                         left--;
 650                 if ((int) left < 0) break;
 651                         /* convert classname */
 652                         if (isclassname && c == '.')
 653                                 *pos++ = '/';
 654                         else
 655                                 *pos++ = (char) c;
 656
 657                 } else if (c < 0x800) {
 658                         /* 2 characters */
 659                 unsigned char high = c >> 6;
 660                 unsigned char low  = c & 0x3F;
 661                         left = left - 2;
 662                 if ((int) left < 0) break;
 663                 *pos++ = high | 0xC0;
 664                 *pos++ = low  | 0x80;
 665
 666                 } else {
 667                 /* 3 characters */
 668                 char low  = c & 0x3f;
 669                 char mid  = (c >> 6) & 0x3F;
 670                 char high = c >> 12;
 671                         left = left - 3;
 672                 if ((int) left < 0) break;
 673                 *pos++ = high | 0xE0;
 674                 *pos++ = mid  | 0x80;
 675                 *pos++ = low  | 0x80;
 676                 }
 677         }
 678
 679         /* insert utf-string into symbol-table */
 680         result = utf_new(buffer,buflength);
 681
 682         MFREE(buffer, char, buflength);
 683
 684         return result;
 685 }
 686
 687
 688 /* utf_new_char ****************************************************************
 689
 690    Creates a new utf symbol, the text for this symbol is passed as a
 691    c-string ( = char* ).
 692
 693 *******************************************************************************/
 694
 695 utf *utf_new_char(const char *text)
 696 {
 697         return utf_new(text, strlen(text));
 698 }
 699
 700
 701 /* utf_new_char_classname ******************************************************
 702
 703    Creates a new utf symbol, the text for this symbol is passed as a
 704    c-string ( = char* ) "." characters are going to be replaced by
 705    "/". Since the above function is used often, this is a separte
 706    function, instead of an if.
 707
 708 *******************************************************************************/
 709
 710 utf *utf_new_char_classname(const char *text)
 711 {
 712         if (strchr(text, '.')) {
 713                 char *txt = strdup(text);
 714                 char *end = txt + strlen(txt);
 715                 char *c;
 716                 utf *tmpRes;
 717
 718                 for (c = txt; c < end; c++)
 719                         if (*c == '.') *c = '/';
 720
 721                 tmpRes = utf_new(txt, strlen(txt));
 722                 FREE(txt, 0);
 723
 724                 return tmpRes;
 725
 726         } else
 727                 return utf_new(text, strlen(text));
 728 }
 729
 730
 731 /* utf_nextu2 ******************************************************************
 732
 733    Read the next unicode character from the utf string and increment
 734    the utf-string pointer accordingly.
 735
 736 *******************************************************************************/
 737
 738 u2 utf_nextu2(char **utf_ptr)
 739 {
 740     /* uncompressed unicode character */
 741     u2 unicode_char = 0;
 742     /* current position in utf text */
 743     unsigned char *utf = (unsigned char *) (*utf_ptr);
 744     /* bytes representing the unicode character */
 745     unsigned char ch1, ch2, ch3;
 746     /* number of bytes used to represent the unicode character */
 747     int len = 0;
 748
 749     switch ((ch1 = utf[0]) >> 4) {
 750         default: /* 1 byte */
 751                 (*utf_ptr)++;
 752                 return (u2) ch1;
 753         case 0xC:
 754         case 0xD: /* 2 bytes */
 755                 if (((ch2 = utf[1]) & 0xC0) == 0x80) {
 756                         unsigned char high = ch1 & 0x1F;
 757                         unsigned char low  = ch2 & 0x3F;
 758                         unicode_char = (high << 6) + low;
 759                         len = 2;
 760                 }
 761                 break;
 762
 763         case 0xE: /* 2 or 3 bytes */
 764                 if (((ch2 = utf[1]) & 0xC0) == 0x80) {
 765                         if (((ch3 = utf[2]) & 0xC0) == 0x80) {
 766                                 unsigned char low  = ch3 & 0x3f;
 767                                 unsigned char mid  = ch2 & 0x3f;
 768                                 unsigned char high = ch1 & 0x0f;
 769                                 unicode_char = (((high << 6) + mid) << 6) + low;
 770                                 len = 3;
 771                         } else
 772                                 len = 2;
 773                 }
 774                 break;
 775     }
 776
 777     /* update position in utf-text */
 778     *utf_ptr = (char *) (utf + len);
 779
 780     return unicode_char;
 781 }
 782
 783
 784 /* utf_bytes *******************************************************************
 785
 786    Determine number of bytes (aka. octets) in the utf string.
 787
 788    IN:
 789       u............utf string
 790
 791    OUT:
 792       The number of octets of this utf string.
 793           There is _no_ terminating zero included in this count.
 794
 795 *******************************************************************************/
 796
 797 u4 utf_bytes(utf *u)
 798 {
 799         return u->blength;
 800 }
 801
 802 /* utf_get_number_of_u2s_for_buffer ********************************************
 803
 804    Determine number of UTF-16 u2s in the given UTF-8 buffer
 805
 806    CAUTION: Use this function *only* when you want to convert an UTF-8 buffer
 807    to an array of u2s (UTF-16) and want to know how many of them you will get.
 808    All other uses of this function are probably wrong.
 809
 810    IN:
 811       buffer........points to first char in buffer
 812           blength.......number of _bytes_ in the buffer
 813
 814    OUT:
 815       the number of u2s needed to hold this string in UTF-16 encoding.
 816           There is _no_ terminating zero included in this count.
 817
 818    NOTE: Unlike utf_get_number_of_u2s, this function never throws an
 819    exception.
 820
 821 *******************************************************************************/
 822
 823 u4 utf_get_number_of_u2s_for_buffer(const char *buffer, u4 blength)
 824 {
 825         const char *endpos;                 /* points behind utf string           */
 826         const char *utf_ptr;                /* current position in utf text       */
 827         u4 len = 0;                         /* number of unicode characters       */
 828
 829         utf_ptr = buffer;
 830         endpos = utf_ptr + blength;
 831
 832         while (utf_ptr < endpos) {
 833                 len++;
 834                 /* next unicode character */
 835                 utf_nextu2((char **)&utf_ptr);
 836         }
 837
 838         assert(utf_ptr == endpos);
 839
 840         return len;
 841 }
 842
 843
 844 /* utf_get_number_of_u2s *******************************************************
 845
 846    Determine number of UTF-16 u2s in the utf string.
 847
 848    CAUTION: Use this function *only* when you want to convert a utf string
 849    to an array of u2s and want to know how many of them you will get.
 850    All other uses of this function are probably wrong.
 851
 852    IN:
 853       u............utf string
 854
 855    OUT:
 856       the number of u2s needed to hold this string in UTF-16 encoding.
 857           There is _no_ terminating zero included in this count.
 858           XXX 0 if a NullPointerException has been thrown (see below)
 859
 860 *******************************************************************************/
 861
 862 u4 utf_get_number_of_u2s(utf *u)
 863 {
 864         char *endpos;                       /* points behind utf string           */
 865         char *utf_ptr;                      /* current position in utf text       */
 866         u4 len = 0;                         /* number of unicode characters       */
 867
 868         /* XXX this is probably not checked by most callers! Review this after */
 869         /* the invalid uses of this function have been eliminated */
 870         if (!u) {
 871                 exceptions_throw_nullpointerexception();
 872                 return 0;
 873         }
 874
 875         endpos = UTF_END(u);
 876         utf_ptr = u->text;
 877
 878         while (utf_ptr < endpos) {
 879                 len++;
 880                 /* next unicode character */
 881                 utf_nextu2(&utf_ptr);
 882         }
 883
 884         if (utf_ptr != endpos)
 885                 /* string ended abruptly */
 886                 throw_cacao_exception_exit(string_java_lang_InternalError,
 887                                                                    "Illegal utf8 string");
 888
 889         return len;
 890 }
 891
 892
 893 /* u2_utflength ****************************************************************
 894
 895    Returns the utf length in bytes of a u2 array.
 896
 897 *******************************************************************************/
 898
 899 u4 u2_utflength(u2 *text, u4 u2_length)
 900 {
 901         u4 result_len = 0;                  /* utf length in bytes                */
 902         u2 ch;                              /* current unicode character          */
 903         u4 len;
 904
 905         for (len = 0; len < u2_length; len++) {
 906                 /* next unicode character */
 907                 ch = *text++;
 908
 909                 /* determine bytes required to store unicode character as utf */
 910                 if (ch && (ch < 0x80))
 911                         result_len++;
 912                 else if (ch < 0x800)
 913                         result_len += 2;
 914                 else
 915                         result_len += 3;
 916         }
 917
 918     return result_len;
 919 }
 920
 921
 922 /* utf_copy ********************************************************************
 923
 924    Copy the given utf string byte-for-byte to a buffer.
 925
 926    IN:
 927       buffer.......the buffer
 928           u............the utf string
 929
 930 *******************************************************************************/
 931
 932 void utf_copy(char *buffer, utf *u)
 933 {
 934         /* our utf strings are zero-terminated (done by utf_new) */
 935         MCOPY(buffer, u->text, char, u->blength + 1);
 936 }
 937
 938
 939 /* utf_cat *********************************************************************
 940
 941    Append the given utf string byte-for-byte to a buffer.
 942
 943    IN:
 944       buffer.......the buffer
 945           u............the utf string
 946
 947 *******************************************************************************/
 948
 949 void utf_cat(char *buffer, utf *u)
 950 {
 951         /* our utf strings are zero-terminated (done by utf_new) */
 952         MCOPY(buffer + strlen(buffer), u->text, char, u->blength + 1);
 953 }
 954
 955
 956 /* utf_copy_classname **********************************************************
 957
 958    Copy the given utf classname byte-for-byte to a buffer.
 959    '/' is replaced by '.'
 960
 961    IN:
 962       buffer.......the buffer
 963           u............the utf string
 964
 965 *******************************************************************************/
 966
 967 void utf_copy_classname(char *buffer, utf *u)
 968 {
 969         char *bufptr;
 970         char *srcptr;
 971         char *endptr;
 972         char ch;
 973
 974         bufptr = buffer;
 975         srcptr = u->text;
 976         endptr = UTF_END(u) + 1; /* utfs are zero-terminared by utf_new */
 977
 978         while (srcptr != endptr) {
 979                 ch = *srcptr++;
 980                 if (ch == '/')
 981                         ch = '.';
 982                 *bufptr++ = ch;
 983         }
 984 }
 985
 986
 987 /* utf_cat *********************************************************************
 988
 989    Append the given utf classname byte-for-byte to a buffer.
 990    '/' is replaced by '.'
 991
 992    IN:
 993       buffer.......the buffer
 994           u............the utf string
 995
 996 *******************************************************************************/
 997
 998 void utf_cat_classname(char *buffer, utf *u)
 999 {
1000         utf_copy_classname(buffer + strlen(buffer), u);
1001 }
1002
1003 /* utf_display_printable_ascii *************************************************
1004
1005    Write utf symbol to stdout (for debugging purposes).
1006    Non-printable and non-ASCII characters are printed as '?'.
1007
1008 *******************************************************************************/
1009
1010 void utf_display_printable_ascii(utf *u)
1011 {
1012         char *endpos;                       /* points behind utf string           */
1013         char *utf_ptr;                      /* current position in utf text       */
1014
1015         if (u == NULL) {
1016                 printf("NULL");
1017                 fflush(stdout);
1018                 return;
1019         }
1020
1021         endpos = UTF_END(u);
1022         utf_ptr = u->text;
1023
1024         while (utf_ptr < endpos) {
1025                 /* read next unicode character */
1026
1027                 u2 c = utf_nextu2(&utf_ptr);
1028
1029                 if ((c >= 32) && (c <= 127))
1030                         printf("%c", c);
1031                 else
1032                         printf("?");
1033         }
1034
1035         fflush(stdout);
1036 }
1037
1038
1039 /* utf_display_printable_ascii_classname ***************************************
1040
1041    Write utf symbol to stdout with `/' converted to `.' (for debugging
1042    purposes).
1043    Non-printable and non-ASCII characters are printed as '?'.
1044
1045 *******************************************************************************/
1046
1047 void utf_display_printable_ascii_classname(utf *u)
1048 {
1049         char *endpos;                       /* points behind utf string           */
1050         char *utf_ptr;                      /* current position in utf text       */
1051
1052         if (u == NULL) {
1053                 printf("NULL");
1054                 fflush(stdout);
1055                 return;
1056         }
1057
1058         endpos = UTF_END(u);
1059         utf_ptr = u->text;
1060
1061         while (utf_ptr < endpos) {
1062                 /* read next unicode character */
1063
1064                 u2 c = utf_nextu2(&utf_ptr);
1065
1066                 if (c == '/')
1067                         c = '.';
1068
1069                 if ((c >= 32) && (c <= 127))
1070                         printf("%c", c);
1071                 else
1072                         printf("?");
1073         }
1074
1075         fflush(stdout);
1076 }
1077
1078
1079 /* utf_sprint_convert_to_latin1 ************************************************
1080
1081    Write utf symbol into c-string (for debugging purposes).
1082    Characters are converted to 8-bit Latin-1, non-Latin-1 characters yield
1083    invalid results.
1084
1085 *******************************************************************************/
1086
1087 void utf_sprint_convert_to_latin1(char *buffer, utf *u)
1088 {
1089         char *endpos;                       /* points behind utf string           */
1090         char *utf_ptr;                      /* current position in utf text       */
1091         u2 pos = 0;                         /* position in c-string               */
1092
1093         if (!u) {
1094                 strcpy(buffer, "NULL");
1095                 return;
1096         }
1097
1098         endpos = UTF_END(u);
1099         utf_ptr = u->text;
1100
1101         while (utf_ptr < endpos)
1102                 /* copy next unicode character */
1103                 buffer[pos++] = utf_nextu2(&utf_ptr);
1104
1105         /* terminate string */
1106         buffer[pos] = '\0';
1107 }
1108
1109
1110 /* utf_sprint_convert_to_latin1_classname **************************************
1111
1112    Write utf symbol into c-string with `/' converted to `.' (for debugging
1113    purposes).
1114    Characters are converted to 8-bit Latin-1, non-Latin-1 characters yield
1115    invalid results.
1116
1117 *******************************************************************************/
1118
1119 void utf_sprint_convert_to_latin1_classname(char *buffer, utf *u)
1120 {
1121         char *endpos;                       /* points behind utf string           */
1122         char *utf_ptr;                      /* current position in utf text       */
1123         u2 pos = 0;                         /* position in c-string               */
1124
1125         if (!u) {
1126                 strcpy(buffer, "NULL");
1127                 return;
1128         }
1129
1130         endpos = UTF_END(u);
1131         utf_ptr = u->text;
1132
1133         while (utf_ptr < endpos) {
1134                 /* copy next unicode character */
1135                 u2 c = utf_nextu2(&utf_ptr);
1136                 if (c == '/') c = '.';
1137                 buffer[pos++] = c;
1138         }
1139
1140         /* terminate string */
1141         buffer[pos] = '\0';
1142 }
1143
1144
1145 /* utf_strcat_convert_to_latin1 ************************************************
1146
1147    Like libc strcat, but uses an utf8 string.
1148    Characters are converted to 8-bit Latin-1, non-Latin-1 characters yield
1149    invalid results.
1150
1151 *******************************************************************************/
1152
1153 void utf_strcat_convert_to_latin1(char *buffer, utf *u)
1154 {
1155         utf_sprint_convert_to_latin1(buffer + strlen(buffer), u);
1156 }
1157
1158
1159 /* utf_strcat_convert_to_latin1_classname **************************************
1160
1161    Like libc strcat, but uses an utf8 string.
1162    Characters are converted to 8-bit Latin-1, non-Latin-1 characters yield
1163    invalid results.
1164
1165 *******************************************************************************/
1166
1167 void utf_strcat_convert_to_latin1_classname(char *buffer, utf *u)
1168 {
1169         utf_sprint_convert_to_latin1_classname(buffer + strlen(buffer), u);
1170 }
1171
1172
1173 /* utf_fprint_printable_ascii **************************************************
1174
1175    Write utf symbol into file.
1176    Non-printable and non-ASCII characters are printed as '?'.
1177
1178 *******************************************************************************/
1179
1180 void utf_fprint_printable_ascii(FILE *file, utf *u)
1181 {
1182         char *endpos;                       /* points behind utf string           */
1183         char *utf_ptr;                      /* current position in utf text       */
1184
1185         if (!u)
1186                 return;
1187
1188         endpos = UTF_END(u);
1189         utf_ptr = u->text;
1190
1191         while (utf_ptr < endpos) {
1192                 /* read next unicode character */
1193                 u2 c = utf_nextu2(&utf_ptr);
1194
1195                 if (c >= 32 && c <= 127) fprintf(file, "%c", c);
1196                 else fprintf(file, "?");
1197         }
1198 }
1199
1200
1201 /* utf_fprint_printable_ascii_classname ****************************************
1202
1203    Write utf symbol into file with `/' converted to `.'.
1204    Non-printable and non-ASCII characters are printed as '?'.
1205
1206 *******************************************************************************/
1207
1208 void utf_fprint_printable_ascii_classname(FILE *file, utf *u)
1209 {
1210         char *endpos;                       /* points behind utf string           */
1211         char *utf_ptr;                      /* current position in utf text       */
1212
1213     if (!u)
1214                 return;
1215
1216         endpos = UTF_END(u);
1217         utf_ptr = u->text;
1218
1219         while (utf_ptr < endpos) {
1220                 /* read next unicode character */
1221                 u2 c = utf_nextu2(&utf_ptr);
1222                 if (c == '/') c = '.';
1223
1224                 if (c >= 32 && c <= 127) fprintf(file, "%c", c);
1225                 else fprintf(file, "?");
1226         }
1227 }
1228
1229
1230 /* is_valid_utf ****************************************************************
1231
1232    Return true if the given string is a valid UTF-8 string.
1233
1234    utf_ptr...points to first character
1235    end_pos...points after last character
1236
1237 *******************************************************************************/
1238
1239 /*  static unsigned long min_codepoint[6] = {0,1L<<7,1L<<11,1L<<16,1L<<21,1L<<26}; */
1240
1241 bool is_valid_utf(char *utf_ptr, char *end_pos)
1242 {
1243         int bytes;
1244         int len,i;
1245         char c;
1246         unsigned long v;
1247
1248         if (end_pos < utf_ptr) return false;
1249         bytes = end_pos - utf_ptr;
1250         while (bytes--) {
1251                 c = *utf_ptr++;
1252
1253                 if (!c) return false;                     /* 0x00 is not allowed */
1254                 if ((c & 0x80) == 0) continue;            /* ASCII */
1255
1256                 if      ((c & 0xe0) == 0xc0) len = 1;     /* 110x xxxx */
1257                 else if ((c & 0xf0) == 0xe0) len = 2;     /* 1110 xxxx */
1258                 else if ((c & 0xf8) == 0xf0) len = 3;     /* 1111 0xxx */
1259                 else if ((c & 0xfc) == 0xf8) len = 4;     /* 1111 10xx */
1260                 else if ((c & 0xfe) == 0xfc) len = 5;     /* 1111 110x */
1261                 else return false;                        /* invalid leading byte */
1262
1263                 if (len > 2) return false;                /* Java limitation */
1264
1265                 v = (unsigned long)c & (0x3f >> len);
1266
1267                 if ((bytes -= len) < 0) return false;     /* missing bytes */
1268
1269                 for (i = len; i--; ) {
1270                         c = *utf_ptr++;
1271                         if ((c & 0xc0) != 0x80)               /* 10xx xxxx */
1272                                 return false;
1273                         v = (v << 6) | (c & 0x3f);
1274                 }
1275
1276                 if (v == 0) {
1277                         if (len != 1) return false;           /* Java special */
1278
1279                 } else {
1280                         /* Sun Java seems to allow overlong UTF-8 encodings */
1281
1282                         /* if (v < min_codepoint[len]) */
1283                                 /* XXX throw exception? */
1284                 }
1285
1286                 /* surrogates in UTF-8 seem to be allowed in Java classfiles */
1287                 /* if (v >= 0xd800 && v <= 0xdfff) return false; */ /* surrogates */
1288
1289                 /* even these seem to be allowed */
1290                 /* if (v == 0xfffe || v == 0xffff) return false; */ /* invalid codepoints */
1291         }
1292
1293         return true;
1294 }
1295
1296
1297 /* is_valid_name ***************************************************************
1298
1299    Return true if the given string may be used as a class/field/method
1300    name. (Currently this only disallows empty strings and control
1301    characters.)
1302
1303    NOTE: The string is assumed to have passed is_valid_utf!
1304
1305    utf_ptr...points to first character
1306    end_pos...points after last character
1307
1308 *******************************************************************************/
1309
1310 bool is_valid_name(char *utf_ptr, char *end_pos)
1311 {
1312         if (end_pos <= utf_ptr) return false; /* disallow empty names */
1313
1314         while (utf_ptr < end_pos) {
1315                 unsigned char c = *utf_ptr++;
1316
1317                 if (c < 0x20) return false; /* disallow control characters */
1318                 if (c == 0xc0 && (unsigned char) *utf_ptr == 0x80)  /* disallow zero */
1319                         return false;
1320         }
1321
1322         return true;
1323 }
1324
1325 bool is_valid_name_utf(utf *u)
1326 {
1327         return is_valid_name(u->text, UTF_END(u));
1328 }
1329
1330
1331 /* utf_show ********************************************************************
1332
1333    Writes the utf symbols in the utfhash to stdout and displays the
1334    number of external hash chains grouped according to the chainlength
1335    (for debugging purposes).
1336
1337 *******************************************************************************/
1338
1339 #if !defined(NDEBUG)
1340 void utf_show(void)
1341 {
1342
1343 #define CHAIN_LIMIT 20               /* limit for seperated enumeration */
1344
1345         u4 chain_count[CHAIN_LIMIT]; /* numbers of chains */
1346         u4 max_chainlength = 0;      /* maximum length of the chains */
1347         u4 sum_chainlength = 0;      /* sum of the chainlengths */
1348         u4 beyond_limit = 0;         /* number of utf-symbols in chains with length>=CHAIN_LIMIT-1 */
1349         u4 i;
1350
1351         printf("UTF-HASH:\n");
1352
1353         /* show element of utf-hashtable */
1354
1355         for (i = 0; i < hashtable_utf->size; i++) {
1356                 utf *u = hashtable_utf->ptr[i];
1357
1358                 if (u) {
1359                         printf("SLOT %d: ", (int) i);
1360
1361                         while (u) {
1362                                 printf("'");
1363                                 utf_display_printable_ascii(u);
1364                                 printf("' ");
1365                                 u = u->hashlink;
1366                         }
1367                         printf("\n");
1368                 }
1369         }
1370
1371         printf("UTF-HASH: %d slots for %d entries\n",
1372                    (int) hashtable_utf->size, (int) hashtable_utf->entries );
1373
1374         if (hashtable_utf->entries == 0)
1375                 return;
1376
1377         printf("chains:\n  chainlength    number of chains    %% of utfstrings\n");
1378
1379         for (i=0;i<CHAIN_LIMIT;i++)
1380                 chain_count[i]=0;
1381
1382         /* count numbers of hashchains according to their length */
1383         for (i=0; i<hashtable_utf->size; i++) {
1384
1385                 utf *u = (utf*) hashtable_utf->ptr[i];
1386                 u4 chain_length = 0;
1387
1388                 /* determine chainlength */
1389                 while (u) {
1390                         u = u->hashlink;
1391                         chain_length++;
1392                 }
1393
1394                 /* update sum of all chainlengths */
1395                 sum_chainlength+=chain_length;
1396
1397                 /* determine the maximum length of the chains */
1398                 if (chain_length>max_chainlength)
1399                         max_chainlength = chain_length;
1400
1401                 /* update number of utf-symbols in chains with length>=CHAIN_LIMIT-1 */
1402                 if (chain_length>=CHAIN_LIMIT) {
1403                         beyond_limit+=chain_length;
1404                         chain_length=CHAIN_LIMIT-1;
1405                 }
1406
1407                 /* update number of hashchains of current length */
1408                 chain_count[chain_length]++;
1409         }
1410
1411         /* display results */
1412         for (i=1;i<CHAIN_LIMIT-1;i++)
1413                 printf("       %2d %17d %18.2f%%\n",i,chain_count[i],(((float) chain_count[i]*i*100)/hashtable_utf->entries));
1414
1415         printf("     >=%2d %17d %18.2f%%\n",CHAIN_LIMIT-1,chain_count[CHAIN_LIMIT-1],((float) beyond_limit*100)/hashtable_utf->entries);
1416
1417
1418         printf("max. chainlength:%5d\n",max_chainlength);
1419
1420         /* avg. chainlength = sum of chainlengths / number of chains */
1421         printf("avg. chainlength:%5.2f\n",(float) sum_chainlength / (hashtable_utf->size-chain_count[0]));
1422 }
1423 #endif /* !defined(NDEBUG) */
1424
1425
1426 /*
1427  * These are local overrides for various environment variables in Emacs.
1428  * Please do not remove this and leave it at the end of the file, where
1429  * Emacs will automagically detect them.
1430  * ---------------------------------------------------------------------
1431  * Local variables:
1432  * mode: c
1433  * indent-tabs-mode: t
1434  * c-basic-offset: 4
1435  * tab-width: 4
1436  * End:
1437  * vim:noexpandtab:sw=4:ts=4:
1438  */