src/vm/utf8.c

   1 /* src/vm/utf8.c - utf8 string functions
   2
   3    Copyright (C) 1996-2005, 2006 R. Grafl, A. Krall, C. Kruegel,
   4    C. Oates, R. Obermaisser, M. Platter, M. Probst, S. Ring,
   5    E. Steiner, C. Thalinger, D. Thuernbeck, P. Tomsich, C. Ullrich,
   6    J. Wenninger, Institut f. Computersprachen - TU Wien
   7
   8    This file is part of CACAO.
   9
  10    This program is free software; you can redistribute it and/or
  11    modify it under the terms of the GNU General Public License as
  12    published by the Free Software Foundation; either version 2, or (at
  13    your option) any later version.
  14
  15    This program is distributed in the hope that it will be useful, but
  16    WITHOUT ANY WARRANTY; without even the implied warranty of
  17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18    General Public License for more details.
  19
  20    You should have received a copy of the GNU General Public License
  21    along with this program; if not, write to the Free Software
  22    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  23    02110-1301, USA.
  24
  25    Contact: cacao@cacaojvm.org
  26
  27    Authors: Reinhard Grafl
  28
  29    Changes: Mark Probst
  30             Andreas Krall
  31             Christian Thalinger
  32                         Edwin Steiner
  33
  34    $Id: utf8.c 5088 2006-07-08 20:16:05Z twisti $
  35
  36 */
  37
  38
  39 #include "config.h"
  40
  41 #include <string.h>
  42 #include <assert.h>
  43
  44 #include "vm/types.h"
  45
  46 #include "mm/memory.h"
  47
  48 #if defined(ENABLE_THREADS)
  49 # include "threads/native/threads.h"
  50 #endif
  51
  52 #include "vm/builtin.h"
  53 #include "vm/exceptions.h"
  54 #include "vm/hashtable.h"
  55 #include "vm/options.h"
  56 #include "vm/statistics.h"
  57 #include "vm/stringlocal.h"
  58 #include "vm/utf8.h"
  59
  60
  61 /* global variables ***********************************************************/
  62
  63 /* hashsize must be power of 2 */
  64
  65 #define HASHTABLE_UTF_SIZE    16384     /* initial size of utf-hash           */
  66
  67 hashtable *hashtable_utf;               /* hashtable for utf8-symbols         */
  68
  69
  70 /* utf-symbols for pointer comparison of frequently used strings **************/
  71
  72 utf *utf_java_lang_Object;
  73
  74 utf *utf_java_lang_Class;
  75 utf *utf_java_lang_ClassLoader;
  76 utf *utf_java_lang_Cloneable;
  77 utf *utf_java_lang_SecurityManager;
  78 utf *utf_java_lang_String;
  79 utf *utf_java_lang_System;
  80 utf *utf_java_lang_ThreadGroup;
  81 utf *utf_java_io_Serializable;
  82
  83 utf *utf_java_lang_Throwable;
  84 utf *utf_java_lang_VMThrowable;
  85 utf *utf_java_lang_Error;
  86 utf *utf_java_lang_AbstractMethodError;
  87 utf *utf_java_lang_LinkageError;
  88 utf *utf_java_lang_NoClassDefFoundError;
  89 utf *utf_java_lang_NoSuchMethodError;
  90 utf *utf_java_lang_OutOfMemoryError;
  91
  92 utf *utf_java_lang_Exception;
  93 utf *utf_java_lang_ClassCastException;
  94 utf *utf_java_lang_ClassNotFoundException;
  95 utf *utf_java_lang_IllegalArgumentException;
  96 utf *utf_java_lang_IllegalMonitorStateException;
  97
  98 utf *utf_java_lang_NullPointerException;
  99
 100 utf* utf_java_lang_Void;
 101 utf* utf_java_lang_Boolean;
 102 utf* utf_java_lang_Byte;
 103 utf* utf_java_lang_Character;
 104 utf* utf_java_lang_Short;
 105 utf* utf_java_lang_Integer;
 106 utf* utf_java_lang_Long;
 107 utf* utf_java_lang_Float;
 108 utf* utf_java_lang_Double;
 109
 110 utf *utf_java_lang_StackTraceElement;
 111 utf *utf_java_lang_reflect_Constructor;
 112 utf *utf_java_lang_reflect_Field;
 113 utf *utf_java_lang_reflect_Method;
 114 utf *utf_java_util_Vector;
 115
 116 utf *utf_InnerClasses;                  /* InnerClasses                       */
 117 utf *utf_ConstantValue;                 /* ConstantValue                      */
 118 utf *utf_Code;                          /* Code                               */
 119 utf *utf_Exceptions;                    /* Exceptions                         */
 120 utf *utf_LineNumberTable;               /* LineNumberTable                    */
 121 utf *utf_SourceFile;                    /* SourceFile                         */
 122
 123 utf *utf_init;                          /* <init>                             */
 124 utf *utf_clinit;                        /* <clinit>                           */
 125 utf *utf_clone;                         /* clone                              */
 126 utf *utf_finalize;                      /* finalize                           */
 127 utf *utf_run;                           /* run                                */
 128
 129 utf *utf_add;                           /* add                                */
 130 utf *utf_remove;                        /* remove                             */
 131 utf *utf_put;                           /* put                                */
 132 utf *utf_get;                           /* get                                */
 133 utf *utf_value;                         /* value                              */
 134
 135 utf *utf_fillInStackTrace;
 136 utf *utf_getSystemClassLoader;
 137 utf *utf_loadClass;
 138 utf *utf_printStackTrace;
 139
 140 utf *utf_Z;                             /* Z                                  */
 141 utf *utf_B;                             /* B                                  */
 142 utf *utf_C;                             /* C                                  */
 143 utf *utf_S;                             /* S                                  */
 144 utf *utf_I;                             /* I                                  */
 145 utf *utf_J;                             /* J                                  */
 146 utf *utf_F;                             /* F                                  */
 147 utf *utf_D;                             /* D                                  */
 148
 149 utf *utf_void__void;                    /* ()V                                */
 150 utf *utf_boolean__void;                 /* (Z)V                               */
 151 utf *utf_byte__void;                    /* (B)V                               */
 152 utf *utf_char__void;                    /* (C)V                               */
 153 utf *utf_short__void;                   /* (S)V                               */
 154 utf *utf_int__void;                     /* (I)V                               */
 155 utf *utf_long__void;                    /* (J)V                               */
 156 utf *utf_float__void;                   /* (F)V                               */
 157 utf *utf_double__void;                  /* (D)V                               */
 158
 159 utf *utf_void__java_lang_ClassLoader;   /* ()Ljava/lang/ClassLoader;          */
 160 utf *utf_void__java_lang_Object;        /* ()Ljava/lang/Object;               */
 161 utf *utf_void__java_lang_Throwable;     /* ()Ljava/lang/Throwable;            */
 162 utf *utf_java_lang_Object__java_lang_Object;
 163 utf *utf_java_lang_String__void;        /* (Ljava/lang/String;)V              */
 164 utf *utf_java_lang_String__java_lang_Class;
 165 utf *utf_java_lang_Throwable__void;     /* (Ljava/lang/Throwable;)V           */
 166
 167 utf *utf_not_named_yet;                 /* special name for unnamed classes   */
 168 utf *utf_null;
 169 utf *array_packagename;
 170
 171
 172 /* utf_init ********************************************************************
 173
 174    Initializes the utf8 subsystem.
 175
 176 *******************************************************************************/
 177
 178 bool utf8_init(void)
 179 {
 180         /* create utf8 hashtable */
 181
 182         hashtable_utf = NEW(hashtable);
 183
 184         hashtable_create(hashtable_utf, HASHTABLE_UTF_SIZE);
 185
 186 #if defined(ENABLE_STATISTICS)
 187         if (opt_stat)
 188                 count_utf_len += sizeof(utf*) * hashtable_utf->size;
 189 #endif
 190
 191         /* create utf-symbols for pointer comparison of frequently used strings */
 192
 193         utf_java_lang_Object           = utf_new_char("java/lang/Object");
 194
 195         utf_java_lang_Class            = utf_new_char("java/lang/Class");
 196         utf_java_lang_ClassLoader      = utf_new_char("java/lang/ClassLoader");
 197         utf_java_lang_Cloneable        = utf_new_char("java/lang/Cloneable");
 198         utf_java_lang_SecurityManager  = utf_new_char("java/lang/SecurityManager");
 199         utf_java_lang_String           = utf_new_char("java/lang/String");
 200         utf_java_lang_System           = utf_new_char("java/lang/System");
 201         utf_java_lang_ThreadGroup      = utf_new_char("java/lang/ThreadGroup");
 202         utf_java_io_Serializable       = utf_new_char("java/io/Serializable");
 203
 204         utf_java_lang_Throwable        = utf_new_char(string_java_lang_Throwable);
 205         utf_java_lang_VMThrowable      = utf_new_char(string_java_lang_VMThrowable);
 206         utf_java_lang_Error            = utf_new_char(string_java_lang_Error);
 207
 208         utf_java_lang_AbstractMethodError =
 209                 utf_new_char(string_java_lang_AbstractMethodError);
 210
 211         utf_java_lang_LinkageError =
 212                 utf_new_char(string_java_lang_LinkageError);
 213
 214         utf_java_lang_NoClassDefFoundError =
 215                 utf_new_char(string_java_lang_NoClassDefFoundError);
 216
 217         utf_java_lang_NoSuchMethodError =
 218                 utf_new_char(string_java_lang_NoSuchMethodError);
 219
 220         utf_java_lang_OutOfMemoryError =
 221                 utf_new_char(string_java_lang_OutOfMemoryError);
 222
 223         utf_java_lang_Exception        = utf_new_char(string_java_lang_Exception);
 224
 225         utf_java_lang_ClassCastException =
 226                 utf_new_char(string_java_lang_ClassCastException);
 227
 228         utf_java_lang_ClassNotFoundException =
 229                 utf_new_char(string_java_lang_ClassNotFoundException);
 230
 231         utf_java_lang_IllegalArgumentException =
 232                 utf_new_char(string_java_lang_IllegalArgumentException);
 233
 234         utf_java_lang_IllegalMonitorStateException =
 235                 utf_new_char(string_java_lang_IllegalMonitorStateException);
 236
 237         utf_java_lang_NullPointerException =
 238                 utf_new_char(string_java_lang_NullPointerException);
 239
 240         utf_java_lang_Void             = utf_new_char("java/lang/Void");
 241         utf_java_lang_Boolean          = utf_new_char("java/lang/Boolean");
 242         utf_java_lang_Byte             = utf_new_char("java/lang/Byte");
 243         utf_java_lang_Character        = utf_new_char("java/lang/Character");
 244         utf_java_lang_Short            = utf_new_char("java/lang/Short");
 245         utf_java_lang_Integer          = utf_new_char("java/lang/Integer");
 246         utf_java_lang_Long             = utf_new_char("java/lang/Long");
 247         utf_java_lang_Float            = utf_new_char("java/lang/Float");
 248         utf_java_lang_Double           = utf_new_char("java/lang/Double");
 249
 250         utf_java_lang_StackTraceElement =
 251                 utf_new_char("java/lang/StackTraceElement");
 252
 253         utf_java_lang_reflect_Constructor =
 254                 utf_new_char("java/lang/reflect/Constructor");
 255
 256         utf_java_lang_reflect_Field    = utf_new_char("java/lang/reflect/Field");
 257         utf_java_lang_reflect_Method   = utf_new_char("java/lang/reflect/Method");
 258         utf_java_util_Vector           = utf_new_char("java/util/Vector");
 259
 260         utf_InnerClasses               = utf_new_char("InnerClasses");
 261         utf_ConstantValue              = utf_new_char("ConstantValue");
 262         utf_Code                       = utf_new_char("Code");
 263         utf_Exceptions                 = utf_new_char("Exceptions");
 264         utf_LineNumberTable            = utf_new_char("LineNumberTable");
 265         utf_SourceFile                 = utf_new_char("SourceFile");
 266
 267         utf_init                           = utf_new_char("<init>");
 268         utf_clinit                         = utf_new_char("<clinit>");
 269         utf_clone                      = utf_new_char("clone");
 270         utf_finalize                   = utf_new_char("finalize");
 271         utf_run                        = utf_new_char("run");
 272
 273         utf_add                        = utf_new_char("add");
 274         utf_remove                     = utf_new_char("remove");
 275         utf_put                        = utf_new_char("put");
 276         utf_get                        = utf_new_char("get");
 277         utf_value                      = utf_new_char("value");
 278
 279         utf_printStackTrace            = utf_new_char("printStackTrace");
 280         utf_fillInStackTrace           = utf_new_char("fillInStackTrace");
 281         utf_loadClass                  = utf_new_char("loadClass");
 282         utf_getSystemClassLoader       = utf_new_char("getSystemClassLoader");
 283
 284         utf_Z                          = utf_new_char("Z");
 285         utf_B                          = utf_new_char("B");
 286         utf_C                          = utf_new_char("C");
 287         utf_S                          = utf_new_char("S");
 288         utf_I                          = utf_new_char("I");
 289         utf_J                          = utf_new_char("J");
 290         utf_F                          = utf_new_char("F");
 291         utf_D                          = utf_new_char("D");
 292
 293         utf_void__void                 = utf_new_char("()V");
 294         utf_boolean__void              = utf_new_char("(Z)V");
 295         utf_byte__void                 = utf_new_char("(B)V");
 296         utf_char__void                 = utf_new_char("(C)V");
 297         utf_short__void                = utf_new_char("(S)V");
 298         utf_int__void                  = utf_new_char("(I)V");
 299         utf_long__void                 = utf_new_char("(J)V");
 300         utf_float__void                = utf_new_char("(F)V");
 301         utf_double__void               = utf_new_char("(D)V");
 302         utf_void__java_lang_Object     = utf_new_char("()Ljava/lang/Object;");
 303         utf_void__java_lang_Throwable  = utf_new_char("()Ljava/lang/Throwable;");
 304
 305         utf_void__java_lang_ClassLoader =
 306                 utf_new_char("()Ljava/lang/ClassLoader;");
 307
 308         utf_java_lang_Object__java_lang_Object =
 309                 utf_new_char("(Ljava/lang/Object;)Ljava/lang/Object;");
 310
 311         utf_java_lang_String__void     = utf_new_char("(Ljava/lang/String;)V");
 312
 313         utf_java_lang_String__java_lang_Class =
 314                 utf_new_char("(Ljava/lang/String;)Ljava/lang/Class;");
 315
 316         utf_java_lang_Throwable__void  = utf_new_char("(Ljava/lang/Throwable;)V");
 317
 318         utf_null                       = utf_new_char("null");
 319         utf_not_named_yet              = utf_new_char("\t<not_named_yet>");
 320         array_packagename              = utf_new_char("\t<the array package>");
 321
 322         /* everything's ok */
 323
 324         return true;
 325 }
 326
 327
 328 /* utf_hashkey *****************************************************************
 329
 330    The hashkey is computed from the utf-text by using up to 8
 331    characters.  For utf-symbols longer than 15 characters 3 characters
 332    are taken from the beginning and the end, 2 characters are taken
 333    from the middle.
 334
 335 *******************************************************************************/
 336
 337 #define nbs(val) ((u4) *(++text) << val) /* get next byte, left shift by val  */
 338 #define fbs(val) ((u4) *(  text) << val) /* get first byte, left shift by val */
 339
 340 u4 utf_hashkey(const char *text, u4 length)
 341 {
 342         const char *start_pos = text;       /* pointer to utf text                */
 343         u4 a;
 344
 345         switch (length) {
 346         case 0: /* empty string */
 347                 return 0;
 348
 349         case 1: return fbs(0);
 350         case 2: return fbs(0) ^ nbs(3);
 351         case 3: return fbs(0) ^ nbs(3) ^ nbs(5);
 352         case 4: return fbs(0) ^ nbs(2) ^ nbs(4) ^ nbs(6);
 353         case 5: return fbs(0) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(6);
 354         case 6: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(5) ^ nbs(6);
 355         case 7: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(5) ^ nbs(6);
 356         case 8: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(5) ^ nbs(6) ^ nbs(7);
 357
 358         case 9:
 359                 a = fbs(0);
 360                 a ^= nbs(1);
 361                 a ^= nbs(2);
 362                 text++;
 363                 return a ^ nbs(4) ^ nbs(5) ^ nbs(6) ^ nbs(7) ^ nbs(8);
 364
 365         case 10:
 366                 a = fbs(0);
 367                 text++;
 368                 a ^= nbs(2);
 369                 a ^= nbs(3);
 370                 a ^= nbs(4);
 371                 text++;
 372                 return a ^ nbs(6) ^ nbs(7) ^ nbs(8) ^ nbs(9);
 373
 374         case 11:
 375                 a = fbs(0);
 376                 text++;
 377                 a ^= nbs(2);
 378                 a ^= nbs(3);
 379                 a ^= nbs(4);
 380                 text++;
 381                 return a ^ nbs(6) ^ nbs(7) ^ nbs(8) ^ nbs(9) ^ nbs(10);
 382
 383         case 12:
 384                 a = fbs(0);
 385                 text += 2;
 386                 a ^= nbs(2);
 387                 a ^= nbs(3);
 388                 text++;
 389                 a ^= nbs(5);
 390                 a ^= nbs(6);
 391                 a ^= nbs(7);
 392                 text++;
 393                 return a ^ nbs(9) ^ nbs(10);
 394
 395         case 13:
 396                 a = fbs(0);
 397                 a ^= nbs(1);
 398                 text++;
 399                 a ^= nbs(3);
 400                 a ^= nbs(4);
 401                 text += 2;
 402                 a ^= nbs(7);
 403                 a ^= nbs(8);
 404                 text += 2;
 405                 return a ^ nbs(9) ^ nbs(10);
 406
 407         case 14:
 408                 a = fbs(0);
 409                 text += 2;
 410                 a ^= nbs(3);
 411                 a ^= nbs(4);
 412                 text += 2;
 413                 a ^= nbs(7);
 414                 a ^= nbs(8);
 415                 text += 2;
 416                 return a ^ nbs(9) ^ nbs(10) ^ nbs(11);
 417
 418         case 15:
 419                 a = fbs(0);
 420                 text += 2;
 421                 a ^= nbs(3);
 422                 a ^= nbs(4);
 423                 text += 2;
 424                 a ^= nbs(7);
 425                 a ^= nbs(8);
 426                 text += 2;
 427                 return a ^ nbs(9) ^ nbs(10) ^ nbs(11);
 428
 429         default:  /* 3 characters from beginning */
 430                 a = fbs(0);
 431                 text += 2;
 432                 a ^= nbs(3);
 433                 a ^= nbs(4);
 434
 435                 /* 2 characters from middle */
 436                 text = start_pos + (length / 2);
 437                 a ^= fbs(5);
 438                 text += 2;
 439                 a ^= nbs(6);
 440
 441                 /* 3 characters from end */
 442                 text = start_pos + length - 4;
 443
 444                 a ^= fbs(7);
 445                 text++;
 446
 447                 return a ^ nbs(10) ^ nbs(11);
 448     }
 449 }
 450
 451 /* utf_full_hashkey ************************************************************
 452
 453    This function computes a hash value using all bytes in the string.
 454
 455    The algorithm is the "One-at-a-time" algorithm as published
 456    by Bob Jenkins on http://burtleburtle.net/bob/hash/doobs.html.
 457
 458 *******************************************************************************/
 459
 460 u4 utf_full_hashkey(const char *text, u4 length)
 461 {
 462         register const unsigned char *p = (const unsigned char *) text;
 463         register u4 hash;
 464         register u4 i;
 465
 466         hash = 0;
 467         for (i=length; i--;)
 468         {
 469             hash += *p++;
 470             hash += (hash << 10);
 471             hash ^= (hash >> 6);
 472         }
 473         hash += (hash << 3);
 474         hash ^= (hash >> 11);
 475         hash += (hash << 15);
 476
 477         return hash;
 478 }
 479
 480 /* unicode_hashkey *************************************************************
 481
 482    Compute the hashkey of a unicode string.
 483
 484 *******************************************************************************/
 485
 486 u4 unicode_hashkey(u2 *text, u2 len)
 487 {
 488         return utf_hashkey((char *) text, len);
 489 }
 490
 491
 492 /* utf_new *********************************************************************
 493
 494    Creates a new utf-symbol, the text of the symbol is passed as a
 495    u1-array. The function searches the utf-hashtable for a utf-symbol
 496    with this text. On success the element returned, otherwise a new
 497    hashtable element is created.
 498
 499    If the number of entries in the hashtable exceeds twice the size of
 500    the hashtable slots a reorganization of the hashtable is done and
 501    the utf symbols are copied to a new hashtable with doubled size.
 502
 503 *******************************************************************************/
 504
 505 utf *utf_new(const char *text, u2 length)
 506 {
 507         u4 key;                             /* hashkey computed from utf-text     */
 508         u4 slot;                            /* slot in hashtable                  */
 509         utf *u;                             /* hashtable element                  */
 510         u2 i;
 511
 512 #if defined(ENABLE_THREADS)
 513         builtin_monitorenter(hashtable_utf->header);
 514 #endif
 515
 516 #if defined(ENABLE_STATISTICS)
 517         if (opt_stat)
 518                 count_utf_new++;
 519 #endif
 520
 521         key  = utf_hashkey(text, length);
 522         slot = key & (hashtable_utf->size - 1);
 523         u    = hashtable_utf->ptr[slot];
 524
 525         /* search external hash chain for utf-symbol */
 526
 527         while (u) {
 528                 if (u->blength == length) {
 529                         /* compare text of hashtable elements */
 530
 531                         for (i = 0; i < length; i++)
 532                                 if (text[i] != u->text[i])
 533                                         goto nomatch;
 534
 535 #if defined(ENABLE_STATISTICS)
 536                         if (opt_stat)
 537                                 count_utf_new_found++;
 538 #endif
 539
 540                         /* symbol found in hashtable */
 541
 542 #if defined(ENABLE_THREADS)
 543                         builtin_monitorexit(hashtable_utf->header);
 544 #endif
 545
 546                         return u;
 547                 }
 548
 549         nomatch:
 550                 u = u->hashlink; /* next element in external chain */
 551         }
 552
 553 #if defined(ENABLE_STATISTICS)
 554         if (opt_stat)
 555                 count_utf_len += sizeof(utf) + length + 1;
 556 #endif
 557
 558         /* location in hashtable found, create new utf element */
 559         u = NEW(utf);
 560         u->blength  = length;               /* length in bytes of utfstring       */
 561         u->hashlink = hashtable_utf->ptr[slot]; /* link in external hashchain     */
 562         u->text     = mem_alloc(length + 1);/* allocate memory for utf-text       */
 563
 564         memcpy(u->text, text, length);      /* copy utf-text                      */
 565         u->text[length] = '\0';
 566
 567         hashtable_utf->ptr[slot] = u;       /* insert symbol into table           */
 568         hashtable_utf->entries++;           /* update number of entries           */
 569
 570         if (hashtable_utf->entries > (hashtable_utf->size * 2)) {
 571
 572         /* reorganization of hashtable, average length of the external
 573            chains is approx. 2 */
 574
 575                 hashtable *newhash;                              /* the new hashtable */
 576                 u4         i;
 577                 utf       *u;
 578                 utf       *nextu;
 579                 u4         slot;
 580
 581                 /* create new hashtable, double the size */
 582
 583                 newhash = hashtable_resize(hashtable_utf, hashtable_utf->size * 2);
 584
 585 #if defined(ENABLE_STATISTICS)
 586                 if (opt_stat)
 587                         count_utf_len += sizeof(utf*) * hashtable_utf->size;
 588 #endif
 589
 590                 /* transfer elements to new hashtable */
 591
 592                 for (i = 0; i < hashtable_utf->size; i++) {
 593                         u = hashtable_utf->ptr[i];
 594
 595                         while (u) {
 596                                 nextu = u->hashlink;
 597                                 slot  = utf_hashkey(u->text, u->blength) & (newhash->size - 1);
 598
 599                                 u->hashlink = (utf *) newhash->ptr[slot];
 600                                 newhash->ptr[slot] = u;
 601
 602                                 /* follow link in external hash chain */
 603
 604                                 u = nextu;
 605                         }
 606                 }
 607
 608                 /* dispose old table */
 609
 610                 hashtable_free(hashtable_utf);
 611
 612                 hashtable_utf = newhash;
 613         }
 614
 615 #if defined(ENABLE_THREADS)
 616         builtin_monitorexit(hashtable_utf->header);
 617 #endif
 618
 619         return u;
 620 }
 621
 622
 623 /* utf_new_u2 ******************************************************************
 624
 625    Make utf symbol from u2 array, if isclassname is true '.' is
 626    replaced by '/'.
 627
 628 *******************************************************************************/
 629
 630 utf *utf_new_u2(u2 *unicode_pos, u4 unicode_length, bool isclassname)
 631 {
 632         char *buffer;                   /* memory buffer for  unicode characters  */
 633         char *pos;                      /* pointer to current position in buffer  */
 634         u4 left;                        /* unicode characters left                */
 635         u4 buflength;                   /* utf length in bytes of the u2 array    */
 636         utf *result;                    /* resulting utf-string                   */
 637         int i;
 638
 639         /* determine utf length in bytes and allocate memory */
 640
 641         buflength = u2_utflength(unicode_pos, unicode_length);
 642         buffer    = MNEW(char, buflength);
 643
 644         left = buflength;
 645         pos  = buffer;
 646
 647         for (i = 0; i++ < unicode_length; unicode_pos++) {
 648                 /* next unicode character */
 649                 u2 c = *unicode_pos;
 650
 651                 if ((c != 0) && (c < 0x80)) {
 652                         /* 1 character */
 653                         left--;
 654                 if ((int) left < 0) break;
 655                         /* convert classname */
 656                         if (isclassname && c == '.')
 657                                 *pos++ = '/';
 658                         else
 659                                 *pos++ = (char) c;
 660
 661                 } else if (c < 0x800) {
 662                         /* 2 characters */
 663                 unsigned char high = c >> 6;
 664                 unsigned char low  = c & 0x3F;
 665                         left = left - 2;
 666                 if ((int) left < 0) break;
 667                 *pos++ = high | 0xC0;
 668                 *pos++ = low  | 0x80;
 669
 670                 } else {
 671                 /* 3 characters */
 672                 char low  = c & 0x3f;
 673                 char mid  = (c >> 6) & 0x3F;
 674                 char high = c >> 12;
 675                         left = left - 3;
 676                 if ((int) left < 0) break;
 677                 *pos++ = high | 0xE0;
 678                 *pos++ = mid  | 0x80;
 679                 *pos++ = low  | 0x80;
 680                 }
 681         }
 682
 683         /* insert utf-string into symbol-table */
 684         result = utf_new(buffer,buflength);
 685
 686         MFREE(buffer, char, buflength);
 687
 688         return result;
 689 }
 690
 691
 692 /* utf_new_char ****************************************************************
 693
 694    Creates a new utf symbol, the text for this symbol is passed as a
 695    c-string ( = char* ).
 696
 697 *******************************************************************************/
 698
 699 utf *utf_new_char(const char *text)
 700 {
 701         return utf_new(text, strlen(text));
 702 }
 703
 704
 705 /* utf_new_char_classname ******************************************************
 706
 707    Creates a new utf symbol, the text for this symbol is passed as a
 708    c-string ( = char* ) "." characters are going to be replaced by
 709    "/". Since the above function is used often, this is a separte
 710    function, instead of an if.
 711
 712 *******************************************************************************/
 713
 714 utf *utf_new_char_classname(const char *text)
 715 {
 716         if (strchr(text, '.')) {
 717                 char *txt = strdup(text);
 718                 char *end = txt + strlen(txt);
 719                 char *c;
 720                 utf *tmpRes;
 721
 722                 for (c = txt; c < end; c++)
 723                         if (*c == '.') *c = '/';
 724
 725                 tmpRes = utf_new(txt, strlen(txt));
 726                 FREE(txt, 0);
 727
 728                 return tmpRes;
 729
 730         } else
 731                 return utf_new(text, strlen(text));
 732 }
 733
 734
 735 /* utf_nextu2 ******************************************************************
 736
 737    Read the next unicode character from the utf string and increment
 738    the utf-string pointer accordingly.
 739
 740 *******************************************************************************/
 741
 742 u2 utf_nextu2(char **utf_ptr)
 743 {
 744     /* uncompressed unicode character */
 745     u2 unicode_char = 0;
 746     /* current position in utf text */
 747     unsigned char *utf = (unsigned char *) (*utf_ptr);
 748     /* bytes representing the unicode character */
 749     unsigned char ch1, ch2, ch3;
 750     /* number of bytes used to represent the unicode character */
 751     int len = 0;
 752
 753     switch ((ch1 = utf[0]) >> 4) {
 754         default: /* 1 byte */
 755                 (*utf_ptr)++;
 756                 return (u2) ch1;
 757         case 0xC:
 758         case 0xD: /* 2 bytes */
 759                 if (((ch2 = utf[1]) & 0xC0) == 0x80) {
 760                         unsigned char high = ch1 & 0x1F;
 761                         unsigned char low  = ch2 & 0x3F;
 762                         unicode_char = (high << 6) + low;
 763                         len = 2;
 764                 }
 765                 break;
 766
 767         case 0xE: /* 2 or 3 bytes */
 768                 if (((ch2 = utf[1]) & 0xC0) == 0x80) {
 769                         if (((ch3 = utf[2]) & 0xC0) == 0x80) {
 770                                 unsigned char low  = ch3 & 0x3f;
 771                                 unsigned char mid  = ch2 & 0x3f;
 772                                 unsigned char high = ch1 & 0x0f;
 773                                 unicode_char = (((high << 6) + mid) << 6) + low;
 774                                 len = 3;
 775                         } else
 776                                 len = 2;
 777                 }
 778                 break;
 779     }
 780
 781     /* update position in utf-text */
 782     *utf_ptr = (char *) (utf + len);
 783
 784     return unicode_char;
 785 }
 786
 787
 788 /* utf_bytes *******************************************************************
 789
 790    Determine number of bytes (aka. octets) in the utf string.
 791
 792    IN:
 793       u............utf string
 794
 795    OUT:
 796       The number of octets of this utf string.
 797           There is _no_ terminating zero included in this count.
 798
 799 *******************************************************************************/
 800
 801 u4 utf_bytes(utf *u)
 802 {
 803         return u->blength;
 804 }
 805
 806 /* utf_get_number_of_u2s_for_buffer ********************************************
 807
 808    Determine number of UTF-16 u2s in the given UTF-8 buffer
 809
 810    CAUTION: Use this function *only* when you want to convert an UTF-8 buffer
 811    to an array of u2s (UTF-16) and want to know how many of them you will get.
 812    All other uses of this function are probably wrong.
 813
 814    IN:
 815       buffer........points to first char in buffer
 816           blength.......number of _bytes_ in the buffer
 817
 818    OUT:
 819       the number of u2s needed to hold this string in UTF-16 encoding.
 820           There is _no_ terminating zero included in this count.
 821
 822    NOTE: Unlike utf_get_number_of_u2s, this function never throws an
 823    exception.
 824
 825 *******************************************************************************/
 826
 827 u4 utf_get_number_of_u2s_for_buffer(const char *buffer, u4 blength)
 828 {
 829         const char *endpos;                 /* points behind utf string           */
 830         const char *utf_ptr;                /* current position in utf text       */
 831         u4 len = 0;                         /* number of unicode characters       */
 832
 833         utf_ptr = buffer;
 834         endpos = utf_ptr + blength;
 835
 836         while (utf_ptr < endpos) {
 837                 len++;
 838                 /* next unicode character */
 839                 utf_nextu2((char **)&utf_ptr);
 840         }
 841
 842         assert(utf_ptr == endpos);
 843
 844         return len;
 845 }
 846
 847
 848 /* utf_get_number_of_u2s *******************************************************
 849
 850    Determine number of UTF-16 u2s in the utf string.
 851
 852    CAUTION: Use this function *only* when you want to convert a utf string
 853    to an array of u2s and want to know how many of them you will get.
 854    All other uses of this function are probably wrong.
 855
 856    IN:
 857       u............utf string
 858
 859    OUT:
 860       the number of u2s needed to hold this string in UTF-16 encoding.
 861           There is _no_ terminating zero included in this count.
 862           XXX 0 if a NullPointerException has been thrown (see below)
 863
 864 *******************************************************************************/
 865
 866 u4 utf_get_number_of_u2s(utf *u)
 867 {
 868         char *endpos;                       /* points behind utf string           */
 869         char *utf_ptr;                      /* current position in utf text       */
 870         u4 len = 0;                         /* number of unicode characters       */
 871
 872         /* XXX this is probably not checked by most callers! Review this after */
 873         /* the invalid uses of this function have been eliminated */
 874         if (!u) {
 875                 exceptions_throw_nullpointerexception();
 876                 return 0;
 877         }
 878
 879         endpos = UTF_END(u);
 880         utf_ptr = u->text;
 881
 882         while (utf_ptr < endpos) {
 883                 len++;
 884                 /* next unicode character */
 885                 utf_nextu2(&utf_ptr);
 886         }
 887
 888         if (utf_ptr != endpos)
 889                 /* string ended abruptly */
 890                 throw_cacao_exception_exit(string_java_lang_InternalError,
 891                                                                    "Illegal utf8 string");
 892
 893         return len;
 894 }
 895
 896
 897 /* u2_utflength ****************************************************************
 898
 899    Returns the utf length in bytes of a u2 array.
 900
 901 *******************************************************************************/
 902
 903 u4 u2_utflength(u2 *text, u4 u2_length)
 904 {
 905         u4 result_len = 0;                  /* utf length in bytes                */
 906         u2 ch;                              /* current unicode character          */
 907         u4 len;
 908
 909         for (len = 0; len < u2_length; len++) {
 910                 /* next unicode character */
 911                 ch = *text++;
 912
 913                 /* determine bytes required to store unicode character as utf */
 914                 if (ch && (ch < 0x80))
 915                         result_len++;
 916                 else if (ch < 0x800)
 917                         result_len += 2;
 918                 else
 919                         result_len += 3;
 920         }
 921
 922     return result_len;
 923 }
 924
 925
 926 /* utf_copy ********************************************************************
 927
 928    Copy the given utf string byte-for-byte to a buffer.
 929
 930    IN:
 931       buffer.......the buffer
 932           u............the utf string
 933
 934 *******************************************************************************/
 935
 936 void utf_copy(char *buffer, utf *u)
 937 {
 938         /* our utf strings are zero-terminated (done by utf_new) */
 939         MCOPY(buffer, u->text, char, u->blength + 1);
 940 }
 941
 942
 943 /* utf_cat *********************************************************************
 944
 945    Append the given utf string byte-for-byte to a buffer.
 946
 947    IN:
 948       buffer.......the buffer
 949           u............the utf string
 950
 951 *******************************************************************************/
 952
 953 void utf_cat(char *buffer, utf *u)
 954 {
 955         /* our utf strings are zero-terminated (done by utf_new) */
 956         MCOPY(buffer + strlen(buffer), u->text, char, u->blength + 1);
 957 }
 958
 959
 960 /* utf_copy_classname **********************************************************
 961
 962    Copy the given utf classname byte-for-byte to a buffer.
 963    '/' is replaced by '.'
 964
 965    IN:
 966       buffer.......the buffer
 967           u............the utf string
 968
 969 *******************************************************************************/
 970
 971 void utf_copy_classname(char *buffer, utf *u)
 972 {
 973         char *bufptr;
 974         char *srcptr;
 975         char *endptr;
 976         char ch;
 977
 978         bufptr = buffer;
 979         srcptr = u->text;
 980         endptr = UTF_END(u) + 1; /* utfs are zero-terminared by utf_new */
 981
 982         while (srcptr != endptr) {
 983                 ch = *srcptr++;
 984                 if (ch == '/')
 985                         ch = '.';
 986                 *bufptr++ = ch;
 987         }
 988 }
 989
 990
 991 /* utf_cat *********************************************************************
 992
 993    Append the given utf classname byte-for-byte to a buffer.
 994    '/' is replaced by '.'
 995
 996    IN:
 997       buffer.......the buffer
 998           u............the utf string
 999
1000 *******************************************************************************/
1001
1002 void utf_cat_classname(char *buffer, utf *u)
1003 {
1004         utf_copy_classname(buffer + strlen(buffer), u);
1005 }
1006
1007 /* utf_display_printable_ascii *************************************************
1008
1009    Write utf symbol to stdout (for debugging purposes).
1010    Non-printable and non-ASCII characters are printed as '?'.
1011
1012 *******************************************************************************/
1013
1014 void utf_display_printable_ascii(utf *u)
1015 {
1016         char *endpos;                       /* points behind utf string           */
1017         char *utf_ptr;                      /* current position in utf text       */
1018
1019         if (u == NULL) {
1020                 printf("NULL");
1021                 fflush(stdout);
1022                 return;
1023         }
1024
1025         endpos = UTF_END(u);
1026         utf_ptr = u->text;
1027
1028         while (utf_ptr < endpos) {
1029                 /* read next unicode character */
1030
1031                 u2 c = utf_nextu2(&utf_ptr);
1032
1033                 if ((c >= 32) && (c <= 127))
1034                         printf("%c", c);
1035                 else
1036                         printf("?");
1037         }
1038
1039         fflush(stdout);
1040 }
1041
1042
1043 /* utf_display_printable_ascii_classname ***************************************
1044
1045    Write utf symbol to stdout with `/' converted to `.' (for debugging
1046    purposes).
1047    Non-printable and non-ASCII characters are printed as '?'.
1048
1049 *******************************************************************************/
1050
1051 void utf_display_printable_ascii_classname(utf *u)
1052 {
1053         char *endpos;                       /* points behind utf string           */
1054         char *utf_ptr;                      /* current position in utf text       */
1055
1056         if (u == NULL) {
1057                 printf("NULL");
1058                 fflush(stdout);
1059                 return;
1060         }
1061
1062         endpos = UTF_END(u);
1063         utf_ptr = u->text;
1064
1065         while (utf_ptr < endpos) {
1066                 /* read next unicode character */
1067
1068                 u2 c = utf_nextu2(&utf_ptr);
1069
1070                 if (c == '/')
1071                         c = '.';
1072
1073                 if ((c >= 32) && (c <= 127))
1074                         printf("%c", c);
1075                 else
1076                         printf("?");
1077         }
1078
1079         fflush(stdout);
1080 }
1081
1082
1083 /* utf_sprint_convert_to_latin1 ************************************************
1084
1085    Write utf symbol into c-string (for debugging purposes).
1086    Characters are converted to 8-bit Latin-1, non-Latin-1 characters yield
1087    invalid results.
1088
1089 *******************************************************************************/
1090
1091 void utf_sprint_convert_to_latin1(char *buffer, utf *u)
1092 {
1093         char *endpos;                       /* points behind utf string           */
1094         char *utf_ptr;                      /* current position in utf text       */
1095         u2 pos = 0;                         /* position in c-string               */
1096
1097         if (!u) {
1098                 strcpy(buffer, "NULL");
1099                 return;
1100         }
1101
1102         endpos = UTF_END(u);
1103         utf_ptr = u->text;
1104
1105         while (utf_ptr < endpos)
1106                 /* copy next unicode character */
1107                 buffer[pos++] = utf_nextu2(&utf_ptr);
1108
1109         /* terminate string */
1110         buffer[pos] = '\0';
1111 }
1112
1113
1114 /* utf_sprint_convert_to_latin1_classname **************************************
1115
1116    Write utf symbol into c-string with `/' converted to `.' (for debugging
1117    purposes).
1118    Characters are converted to 8-bit Latin-1, non-Latin-1 characters yield
1119    invalid results.
1120
1121 *******************************************************************************/
1122
1123 void utf_sprint_convert_to_latin1_classname(char *buffer, utf *u)
1124 {
1125         char *endpos;                       /* points behind utf string           */
1126         char *utf_ptr;                      /* current position in utf text       */
1127         u2 pos = 0;                         /* position in c-string               */
1128
1129         if (!u) {
1130                 strcpy(buffer, "NULL");
1131                 return;
1132         }
1133
1134         endpos = UTF_END(u);
1135         utf_ptr = u->text;
1136
1137         while (utf_ptr < endpos) {
1138                 /* copy next unicode character */
1139                 u2 c = utf_nextu2(&utf_ptr);
1140                 if (c == '/') c = '.';
1141                 buffer[pos++] = c;
1142         }
1143
1144         /* terminate string */
1145         buffer[pos] = '\0';
1146 }
1147
1148
1149 /* utf_strcat_convert_to_latin1 ************************************************
1150
1151    Like libc strcat, but uses an utf8 string.
1152    Characters are converted to 8-bit Latin-1, non-Latin-1 characters yield
1153    invalid results.
1154
1155 *******************************************************************************/
1156
1157 void utf_strcat_convert_to_latin1(char *buffer, utf *u)
1158 {
1159         utf_sprint_convert_to_latin1(buffer + strlen(buffer), u);
1160 }
1161
1162
1163 /* utf_strcat_convert_to_latin1_classname **************************************
1164
1165    Like libc strcat, but uses an utf8 string.
1166    Characters are converted to 8-bit Latin-1, non-Latin-1 characters yield
1167    invalid results.
1168
1169 *******************************************************************************/
1170
1171 void utf_strcat_convert_to_latin1_classname(char *buffer, utf *u)
1172 {
1173         utf_sprint_convert_to_latin1_classname(buffer + strlen(buffer), u);
1174 }
1175
1176
1177 /* utf_fprint_printable_ascii **************************************************
1178
1179    Write utf symbol into file.
1180    Non-printable and non-ASCII characters are printed as '?'.
1181
1182 *******************************************************************************/
1183
1184 void utf_fprint_printable_ascii(FILE *file, utf *u)
1185 {
1186         char *endpos;                       /* points behind utf string           */
1187         char *utf_ptr;                      /* current position in utf text       */
1188
1189         if (!u)
1190                 return;
1191
1192         endpos = UTF_END(u);
1193         utf_ptr = u->text;
1194
1195         while (utf_ptr < endpos) {
1196                 /* read next unicode character */
1197                 u2 c = utf_nextu2(&utf_ptr);
1198
1199                 if (c >= 32 && c <= 127) fprintf(file, "%c", c);
1200                 else fprintf(file, "?");
1201         }
1202 }
1203
1204
1205 /* utf_fprint_printable_ascii_classname ****************************************
1206
1207    Write utf symbol into file with `/' converted to `.'.
1208    Non-printable and non-ASCII characters are printed as '?'.
1209
1210 *******************************************************************************/
1211
1212 void utf_fprint_printable_ascii_classname(FILE *file, utf *u)
1213 {
1214         char *endpos;                       /* points behind utf string           */
1215         char *utf_ptr;                      /* current position in utf text       */
1216
1217     if (!u)
1218                 return;
1219
1220         endpos = UTF_END(u);
1221         utf_ptr = u->text;
1222
1223         while (utf_ptr < endpos) {
1224                 /* read next unicode character */
1225                 u2 c = utf_nextu2(&utf_ptr);
1226                 if (c == '/') c = '.';
1227
1228                 if (c >= 32 && c <= 127) fprintf(file, "%c", c);
1229                 else fprintf(file, "?");
1230         }
1231 }
1232
1233
1234 /* is_valid_utf ****************************************************************
1235
1236    Return true if the given string is a valid UTF-8 string.
1237
1238    utf_ptr...points to first character
1239    end_pos...points after last character
1240
1241 *******************************************************************************/
1242
1243 /*  static unsigned long min_codepoint[6] = {0,1L<<7,1L<<11,1L<<16,1L<<21,1L<<26}; */
1244
1245 bool is_valid_utf(char *utf_ptr, char *end_pos)
1246 {
1247         int bytes;
1248         int len,i;
1249         char c;
1250         unsigned long v;
1251
1252         if (end_pos < utf_ptr) return false;
1253         bytes = end_pos - utf_ptr;
1254         while (bytes--) {
1255                 c = *utf_ptr++;
1256
1257                 if (!c) return false;                     /* 0x00 is not allowed */
1258                 if ((c & 0x80) == 0) continue;            /* ASCII */
1259
1260                 if      ((c & 0xe0) == 0xc0) len = 1;     /* 110x xxxx */
1261                 else if ((c & 0xf0) == 0xe0) len = 2;     /* 1110 xxxx */
1262                 else if ((c & 0xf8) == 0xf0) len = 3;     /* 1111 0xxx */
1263                 else if ((c & 0xfc) == 0xf8) len = 4;     /* 1111 10xx */
1264                 else if ((c & 0xfe) == 0xfc) len = 5;     /* 1111 110x */
1265                 else return false;                        /* invalid leading byte */
1266
1267                 if (len > 2) return false;                /* Java limitation */
1268
1269                 v = (unsigned long)c & (0x3f >> len);
1270
1271                 if ((bytes -= len) < 0) return false;     /* missing bytes */
1272
1273                 for (i = len; i--; ) {
1274                         c = *utf_ptr++;
1275                         if ((c & 0xc0) != 0x80)               /* 10xx xxxx */
1276                                 return false;
1277                         v = (v << 6) | (c & 0x3f);
1278                 }
1279
1280                 if (v == 0) {
1281                         if (len != 1) return false;           /* Java special */
1282
1283                 } else {
1284                         /* Sun Java seems to allow overlong UTF-8 encodings */
1285
1286                         /* if (v < min_codepoint[len]) */
1287                                 /* XXX throw exception? */
1288                 }
1289
1290                 /* surrogates in UTF-8 seem to be allowed in Java classfiles */
1291                 /* if (v >= 0xd800 && v <= 0xdfff) return false; */ /* surrogates */
1292
1293                 /* even these seem to be allowed */
1294                 /* if (v == 0xfffe || v == 0xffff) return false; */ /* invalid codepoints */
1295         }
1296
1297         return true;
1298 }
1299
1300
1301 /* is_valid_name ***************************************************************
1302
1303    Return true if the given string may be used as a class/field/method
1304    name. (Currently this only disallows empty strings and control
1305    characters.)
1306
1307    NOTE: The string is assumed to have passed is_valid_utf!
1308
1309    utf_ptr...points to first character
1310    end_pos...points after last character
1311
1312 *******************************************************************************/
1313
1314 bool is_valid_name(char *utf_ptr, char *end_pos)
1315 {
1316         if (end_pos <= utf_ptr) return false; /* disallow empty names */
1317
1318         while (utf_ptr < end_pos) {
1319                 unsigned char c = *utf_ptr++;
1320
1321                 if (c < 0x20) return false; /* disallow control characters */
1322                 if (c == 0xc0 && (unsigned char) *utf_ptr == 0x80)  /* disallow zero */
1323                         return false;
1324         }
1325
1326         return true;
1327 }
1328
1329 bool is_valid_name_utf(utf *u)
1330 {
1331         return is_valid_name(u->text, UTF_END(u));
1332 }
1333
1334
1335 /* utf_show ********************************************************************
1336
1337    Writes the utf symbols in the utfhash to stdout and displays the
1338    number of external hash chains grouped according to the chainlength
1339    (for debugging purposes).
1340
1341 *******************************************************************************/
1342
1343 #if !defined(NDEBUG)
1344 void utf_show(void)
1345 {
1346
1347 #define CHAIN_LIMIT 20               /* limit for seperated enumeration */
1348
1349         u4 chain_count[CHAIN_LIMIT]; /* numbers of chains */
1350         u4 max_chainlength = 0;      /* maximum length of the chains */
1351         u4 sum_chainlength = 0;      /* sum of the chainlengths */
1352         u4 beyond_limit = 0;         /* number of utf-symbols in chains with length>=CHAIN_LIMIT-1 */
1353         u4 i;
1354
1355         printf("UTF-HASH:\n");
1356
1357         /* show element of utf-hashtable */
1358
1359         for (i = 0; i < hashtable_utf->size; i++) {
1360                 utf *u = hashtable_utf->ptr[i];
1361
1362                 if (u) {
1363                         printf("SLOT %d: ", (int) i);
1364
1365                         while (u) {
1366                                 printf("'");
1367                                 utf_display_printable_ascii(u);
1368                                 printf("' ");
1369                                 u = u->hashlink;
1370                         }
1371                         printf("\n");
1372                 }
1373         }
1374
1375         printf("UTF-HASH: %d slots for %d entries\n",
1376                    (int) hashtable_utf->size, (int) hashtable_utf->entries );
1377
1378         if (hashtable_utf->entries == 0)
1379                 return;
1380
1381         printf("chains:\n  chainlength    number of chains    %% of utfstrings\n");
1382
1383         for (i=0;i<CHAIN_LIMIT;i++)
1384                 chain_count[i]=0;
1385
1386         /* count numbers of hashchains according to their length */
1387         for (i=0; i<hashtable_utf->size; i++) {
1388
1389                 utf *u = (utf*) hashtable_utf->ptr[i];
1390                 u4 chain_length = 0;
1391
1392                 /* determine chainlength */
1393                 while (u) {
1394                         u = u->hashlink;
1395                         chain_length++;
1396                 }
1397
1398                 /* update sum of all chainlengths */
1399                 sum_chainlength+=chain_length;
1400
1401                 /* determine the maximum length of the chains */
1402                 if (chain_length>max_chainlength)
1403                         max_chainlength = chain_length;
1404
1405                 /* update number of utf-symbols in chains with length>=CHAIN_LIMIT-1 */
1406                 if (chain_length>=CHAIN_LIMIT) {
1407                         beyond_limit+=chain_length;
1408                         chain_length=CHAIN_LIMIT-1;
1409                 }
1410
1411                 /* update number of hashchains of current length */
1412                 chain_count[chain_length]++;
1413         }
1414
1415         /* display results */
1416         for (i=1;i<CHAIN_LIMIT-1;i++)
1417                 printf("       %2d %17d %18.2f%%\n",i,chain_count[i],(((float) chain_count[i]*i*100)/hashtable_utf->entries));
1418
1419         printf("     >=%2d %17d %18.2f%%\n",CHAIN_LIMIT-1,chain_count[CHAIN_LIMIT-1],((float) beyond_limit*100)/hashtable_utf->entries);
1420
1421
1422         printf("max. chainlength:%5d\n",max_chainlength);
1423
1424         /* avg. chainlength = sum of chainlengths / number of chains */
1425         printf("avg. chainlength:%5.2f\n",(float) sum_chainlength / (hashtable_utf->size-chain_count[0]));
1426 }
1427 #endif /* !defined(NDEBUG) */
1428
1429
1430 /*
1431  * These are local overrides for various environment variables in Emacs.
1432  * Please do not remove this and leave it at the end of the file, where
1433  * Emacs will automagically detect them.
1434  * ---------------------------------------------------------------------
1435  * Local variables:
1436  * mode: c
1437  * indent-tabs-mode: t
1438  * c-basic-offset: 4
1439  * tab-width: 4
1440  * End:
1441  * vim:noexpandtab:sw=4:ts=4:
1442  */