src/vm/utf8.c

   1 /* src/vm/utf8.c - utf8 string functions
   2
   3    Copyright (C) 1996-2005, 2006 R. Grafl, A. Krall, C. Kruegel,
   4    C. Oates, R. Obermaisser, M. Platter, M. Probst, S. Ring,
   5    E. Steiner, C. Thalinger, D. Thuernbeck, P. Tomsich, C. Ullrich,
   6    J. Wenninger, Institut f. Computersprachen - TU Wien
   7
   8    This file is part of CACAO.
   9
  10    This program is free software; you can redistribute it and/or
  11    modify it under the terms of the GNU General Public License as
  12    published by the Free Software Foundation; either version 2, or (at
  13    your option) any later version.
  14
  15    This program is distributed in the hope that it will be useful, but
  16    WITHOUT ANY WARRANTY; without even the implied warranty of
  17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18    General Public License for more details.
  19
  20    You should have received a copy of the GNU General Public License
  21    along with this program; if not, write to the Free Software
  22    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  23    02110-1301, USA.
  24
  25    Contact: cacao@cacaojvm.org
  26
  27    Authors: Reinhard Grafl
  28
  29    Changes: Mark Probst
  30             Andreas Krall
  31             Christian Thalinger
  32                         Edwin Steiner
  33
  34    $Id: utf8.c 4921 2006-05-15 14:24:36Z twisti $
  35
  36 */
  37
  38
  39 #include "config.h"
  40
  41 #include <string.h>
  42 #include <assert.h>
  43
  44 #include "vm/types.h"
  45
  46 #include "mm/memory.h"
  47
  48 #if defined(ENABLE_THREADS)
  49 # include "threads/native/threads.h"
  50 #endif
  51
  52 #include "vm/builtin.h"
  53 #include "vm/exceptions.h"
  54 #include "vm/hashtable.h"
  55 #include "vm/options.h"
  56 #include "vm/statistics.h"
  57 #include "vm/stringlocal.h"
  58 #include "vm/utf8.h"
  59
  60
  61 /* global variables ***********************************************************/
  62
  63 /* hashsize must be power of 2 */
  64
  65 #define HASHTABLE_UTF_SIZE    16384     /* initial size of utf-hash           */
  66
  67 hashtable *hashtable_utf;               /* hashtable for utf8-symbols         */
  68
  69
  70 /* utf-symbols for pointer comparison of frequently used strings **************/
  71
  72 utf *utf_java_lang_Object;
  73
  74 utf *utf_java_lang_Class;
  75 utf *utf_java_lang_ClassLoader;
  76 utf *utf_java_lang_Cloneable;
  77 utf *utf_java_lang_SecurityManager;
  78 utf *utf_java_lang_String;
  79 utf *utf_java_lang_System;
  80 utf *utf_java_lang_ThreadGroup;
  81 utf *utf_java_io_Serializable;
  82
  83 utf *utf_java_lang_Throwable;
  84 utf *utf_java_lang_VMThrowable;
  85 utf *utf_java_lang_Error;
  86 utf *utf_java_lang_NoClassDefFoundError;
  87 utf *utf_java_lang_LinkageError;
  88 utf *utf_java_lang_NoSuchMethodError;
  89 utf *utf_java_lang_OutOfMemoryError;
  90
  91 utf *utf_java_lang_Exception;
  92 utf *utf_java_lang_ClassNotFoundException;
  93 utf *utf_java_lang_IllegalArgumentException;
  94 utf *utf_java_lang_IllegalMonitorStateException;
  95
  96 utf *utf_java_lang_NullPointerException;
  97
  98 utf* utf_java_lang_Void;
  99 utf* utf_java_lang_Boolean;
 100 utf* utf_java_lang_Byte;
 101 utf* utf_java_lang_Character;
 102 utf* utf_java_lang_Short;
 103 utf* utf_java_lang_Integer;
 104 utf* utf_java_lang_Long;
 105 utf* utf_java_lang_Float;
 106 utf* utf_java_lang_Double;
 107
 108 utf *utf_java_lang_StackTraceElement;
 109 utf *utf_java_lang_reflect_Constructor;
 110 utf *utf_java_lang_reflect_Field;
 111 utf *utf_java_lang_reflect_Method;
 112 utf *utf_java_util_Vector;
 113
 114 utf *utf_InnerClasses;                  /* InnerClasses                       */
 115 utf *utf_ConstantValue;                 /* ConstantValue                      */
 116 utf *utf_Code;                          /* Code                               */
 117 utf *utf_Exceptions;                    /* Exceptions                         */
 118 utf *utf_LineNumberTable;               /* LineNumberTable                    */
 119 utf *utf_SourceFile;                    /* SourceFile                         */
 120
 121 utf *utf_init;                          /* <init>                             */
 122 utf *utf_clinit;                        /* <clinit>                           */
 123 utf *utf_clone;                         /* clone                              */
 124 utf *utf_finalize;                      /* finalize                           */
 125 utf *utf_run;                           /* run                                */
 126
 127 utf *utf_add;                           /* add                                */
 128 utf *utf_remove;                        /* remove                             */
 129 utf *utf_put;                           /* put                                */
 130 utf *utf_get;                           /* get                                */
 131 utf *utf_value;                         /* value                              */
 132
 133 utf *utf_fillInStackTrace;
 134 utf *utf_getSystemClassLoader;
 135 utf *utf_loadClass;
 136 utf *utf_printStackTrace;
 137
 138 utf *utf_Z;                             /* Z                                  */
 139 utf *utf_B;                             /* B                                  */
 140 utf *utf_C;                             /* C                                  */
 141 utf *utf_S;                             /* S                                  */
 142 utf *utf_I;                             /* I                                  */
 143 utf *utf_J;                             /* J                                  */
 144 utf *utf_F;                             /* F                                  */
 145 utf *utf_D;                             /* D                                  */
 146
 147 utf *utf_void__void;                    /* ()V                                */
 148 utf *utf_boolean__void;                 /* (Z)V                               */
 149 utf *utf_byte__void;                    /* (B)V                               */
 150 utf *utf_char__void;                    /* (C)V                               */
 151 utf *utf_short__void;                   /* (S)V                               */
 152 utf *utf_int__void;                     /* (I)V                               */
 153 utf *utf_long__void;                    /* (J)V                               */
 154 utf *utf_float__void;                   /* (F)V                               */
 155 utf *utf_double__void;                  /* (D)V                               */
 156
 157 utf *utf_void__java_lang_ClassLoader;   /* ()Ljava/lang/ClassLoader;          */
 158 utf *utf_void__java_lang_Object;        /* ()Ljava/lang/Object;               */
 159 utf *utf_void__java_lang_Throwable;     /* ()Ljava/lang/Throwable;            */
 160 utf *utf_java_lang_Object__java_lang_Object;
 161 utf *utf_java_lang_String__void;        /* (Ljava/lang/String;)V              */
 162 utf *utf_java_lang_String__java_lang_Class;
 163 utf *utf_java_lang_Throwable__void;     /* (Ljava/lang/Throwable;)V           */
 164
 165 utf *utf_not_named_yet;                 /* special name for unnamed classes   */
 166
 167 utf *array_packagename;
 168
 169
 170 /* utf_init ********************************************************************
 171
 172    Initializes the utf8 subsystem.
 173
 174 *******************************************************************************/
 175
 176 bool utf8_init(void)
 177 {
 178         /* create utf8 hashtable */
 179
 180         hashtable_utf = NEW(hashtable);
 181
 182         hashtable_create(hashtable_utf, HASHTABLE_UTF_SIZE);
 183
 184 #if defined(ENABLE_STATISTICS)
 185         if (opt_stat)
 186                 count_utf_len += sizeof(utf*) * hashtable_utf.size;
 187 #endif
 188
 189         /* create utf-symbols for pointer comparison of frequently used strings */
 190
 191         utf_java_lang_Object           = utf_new_char("java/lang/Object");
 192
 193         utf_java_lang_Class            = utf_new_char("java/lang/Class");
 194         utf_java_lang_ClassLoader      = utf_new_char("java/lang/ClassLoader");
 195         utf_java_lang_Cloneable        = utf_new_char("java/lang/Cloneable");
 196         utf_java_lang_SecurityManager  = utf_new_char("java/lang/SecurityManager");
 197         utf_java_lang_String           = utf_new_char("java/lang/String");
 198         utf_java_lang_System           = utf_new_char("java/lang/System");
 199         utf_java_lang_ThreadGroup      = utf_new_char("java/lang/ThreadGroup");
 200         utf_java_io_Serializable       = utf_new_char("java/io/Serializable");
 201
 202         utf_java_lang_Throwable        = utf_new_char(string_java_lang_Throwable);
 203         utf_java_lang_VMThrowable      = utf_new_char(string_java_lang_VMThrowable);
 204         utf_java_lang_Error            = utf_new_char(string_java_lang_Error);
 205
 206         utf_java_lang_NoClassDefFoundError =
 207                 utf_new_char(string_java_lang_NoClassDefFoundError);
 208
 209         utf_java_lang_LinkageError =
 210                 utf_new_char(string_java_lang_LinkageError);
 211
 212         utf_java_lang_NoSuchMethodError =
 213                 utf_new_char(string_java_lang_NoSuchMethodError);
 214
 215         utf_java_lang_OutOfMemoryError =
 216                 utf_new_char(string_java_lang_OutOfMemoryError);
 217
 218         utf_java_lang_Exception        = utf_new_char(string_java_lang_Exception);
 219
 220         utf_java_lang_ClassNotFoundException =
 221                 utf_new_char(string_java_lang_ClassNotFoundException);
 222
 223         utf_java_lang_IllegalArgumentException =
 224                 utf_new_char(string_java_lang_IllegalArgumentException);
 225
 226         utf_java_lang_IllegalMonitorStateException =
 227                 utf_new_char(string_java_lang_IllegalMonitorStateException);
 228
 229         utf_java_lang_NullPointerException =
 230                 utf_new_char(string_java_lang_NullPointerException);
 231
 232         utf_java_lang_Void             = utf_new_char("java/lang/Void");
 233         utf_java_lang_Boolean          = utf_new_char("java/lang/Boolean");
 234         utf_java_lang_Byte             = utf_new_char("java/lang/Byte");
 235         utf_java_lang_Character        = utf_new_char("java/lang/Character");
 236         utf_java_lang_Short            = utf_new_char("java/lang/Short");
 237         utf_java_lang_Integer          = utf_new_char("java/lang/Integer");
 238         utf_java_lang_Long             = utf_new_char("java/lang/Long");
 239         utf_java_lang_Float            = utf_new_char("java/lang/Float");
 240         utf_java_lang_Double           = utf_new_char("java/lang/Double");
 241
 242         utf_java_lang_StackTraceElement =
 243                 utf_new_char("java/lang/StackTraceElement");
 244
 245         utf_java_lang_reflect_Constructor =
 246                 utf_new_char("java/lang/reflect/Constructor");
 247
 248         utf_java_lang_reflect_Field    = utf_new_char("java/lang/reflect/Field");
 249         utf_java_lang_reflect_Method   = utf_new_char("java/lang/reflect/Method");
 250         utf_java_util_Vector           = utf_new_char("java/util/Vector");
 251
 252         utf_InnerClasses               = utf_new_char("InnerClasses");
 253         utf_ConstantValue              = utf_new_char("ConstantValue");
 254         utf_Code                       = utf_new_char("Code");
 255         utf_Exceptions                 = utf_new_char("Exceptions");
 256         utf_LineNumberTable            = utf_new_char("LineNumberTable");
 257         utf_SourceFile                 = utf_new_char("SourceFile");
 258
 259         utf_init                           = utf_new_char("<init>");
 260         utf_clinit                         = utf_new_char("<clinit>");
 261         utf_clone                      = utf_new_char("clone");
 262         utf_finalize                   = utf_new_char("finalize");
 263         utf_run                        = utf_new_char("run");
 264
 265         utf_add                        = utf_new_char("add");
 266         utf_remove                     = utf_new_char("remove");
 267         utf_put                        = utf_new_char("put");
 268         utf_get                        = utf_new_char("get");
 269         utf_value                      = utf_new_char("value");
 270
 271         utf_printStackTrace            = utf_new_char("printStackTrace");
 272         utf_fillInStackTrace           = utf_new_char("fillInStackTrace");
 273         utf_loadClass                  = utf_new_char("loadClass");
 274         utf_getSystemClassLoader       = utf_new_char("getSystemClassLoader");
 275
 276         utf_Z                          = utf_new_char("Z");
 277         utf_B                          = utf_new_char("B");
 278         utf_C                          = utf_new_char("C");
 279         utf_S                          = utf_new_char("S");
 280         utf_I                          = utf_new_char("I");
 281         utf_J                          = utf_new_char("J");
 282         utf_F                          = utf_new_char("F");
 283         utf_D                          = utf_new_char("D");
 284
 285         utf_void__void                 = utf_new_char("()V");
 286         utf_boolean__void              = utf_new_char("(Z)V");
 287         utf_byte__void                 = utf_new_char("(B)V");
 288         utf_char__void                 = utf_new_char("(C)V");
 289         utf_short__void                = utf_new_char("(S)V");
 290         utf_int__void                  = utf_new_char("(I)V");
 291         utf_long__void                 = utf_new_char("(J)V");
 292         utf_float__void                = utf_new_char("(F)V");
 293         utf_double__void               = utf_new_char("(D)V");
 294         utf_void__java_lang_Object     = utf_new_char("()Ljava/lang/Object;");
 295         utf_void__java_lang_Throwable  = utf_new_char("()Ljava/lang/Throwable;");
 296
 297         utf_void__java_lang_ClassLoader =
 298                 utf_new_char("()Ljava/lang/ClassLoader;");
 299
 300         utf_java_lang_Object__java_lang_Object =
 301                 utf_new_char("(Ljava/lang/Object;)Ljava/lang/Object;");
 302
 303         utf_java_lang_String__void     = utf_new_char("(Ljava/lang/String;)V");
 304
 305         utf_java_lang_String__java_lang_Class =
 306                 utf_new_char("(Ljava/lang/String;)Ljava/lang/Class;");
 307
 308         utf_java_lang_Throwable__void  = utf_new_char("(Ljava/lang/Throwable;)V");
 309
 310         utf_not_named_yet              = utf_new_char("\t<not_named_yet>");
 311
 312         array_packagename              = utf_new_char("\t<the array package>");
 313
 314         /* everything's ok */
 315
 316         return true;
 317 }
 318
 319
 320 /* utf_hashkey *****************************************************************
 321
 322    The hashkey is computed from the utf-text by using up to 8
 323    characters.  For utf-symbols longer than 15 characters 3 characters
 324    are taken from the beginning and the end, 2 characters are taken
 325    from the middle.
 326
 327 *******************************************************************************/
 328
 329 #define nbs(val) ((u4) *(++text) << val) /* get next byte, left shift by val  */
 330 #define fbs(val) ((u4) *(  text) << val) /* get first byte, left shift by val */
 331
 332 u4 utf_hashkey(const char *text, u4 length)
 333 {
 334         const char *start_pos = text;       /* pointer to utf text                */
 335         u4 a;
 336
 337         switch (length) {
 338         case 0: /* empty string */
 339                 return 0;
 340
 341         case 1: return fbs(0);
 342         case 2: return fbs(0) ^ nbs(3);
 343         case 3: return fbs(0) ^ nbs(3) ^ nbs(5);
 344         case 4: return fbs(0) ^ nbs(2) ^ nbs(4) ^ nbs(6);
 345         case 5: return fbs(0) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(6);
 346         case 6: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(5) ^ nbs(6);
 347         case 7: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(5) ^ nbs(6);
 348         case 8: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(5) ^ nbs(6) ^ nbs(7);
 349
 350         case 9:
 351                 a = fbs(0);
 352                 a ^= nbs(1);
 353                 a ^= nbs(2);
 354                 text++;
 355                 return a ^ nbs(4) ^ nbs(5) ^ nbs(6) ^ nbs(7) ^ nbs(8);
 356
 357         case 10:
 358                 a = fbs(0);
 359                 text++;
 360                 a ^= nbs(2);
 361                 a ^= nbs(3);
 362                 a ^= nbs(4);
 363                 text++;
 364                 return a ^ nbs(6) ^ nbs(7) ^ nbs(8) ^ nbs(9);
 365
 366         case 11:
 367                 a = fbs(0);
 368                 text++;
 369                 a ^= nbs(2);
 370                 a ^= nbs(3);
 371                 a ^= nbs(4);
 372                 text++;
 373                 return a ^ nbs(6) ^ nbs(7) ^ nbs(8) ^ nbs(9) ^ nbs(10);
 374
 375         case 12:
 376                 a = fbs(0);
 377                 text += 2;
 378                 a ^= nbs(2);
 379                 a ^= nbs(3);
 380                 text++;
 381                 a ^= nbs(5);
 382                 a ^= nbs(6);
 383                 a ^= nbs(7);
 384                 text++;
 385                 return a ^ nbs(9) ^ nbs(10);
 386
 387         case 13:
 388                 a = fbs(0);
 389                 a ^= nbs(1);
 390                 text++;
 391                 a ^= nbs(3);
 392                 a ^= nbs(4);
 393                 text += 2;
 394                 a ^= nbs(7);
 395                 a ^= nbs(8);
 396                 text += 2;
 397                 return a ^ nbs(9) ^ nbs(10);
 398
 399         case 14:
 400                 a = fbs(0);
 401                 text += 2;
 402                 a ^= nbs(3);
 403                 a ^= nbs(4);
 404                 text += 2;
 405                 a ^= nbs(7);
 406                 a ^= nbs(8);
 407                 text += 2;
 408                 return a ^ nbs(9) ^ nbs(10) ^ nbs(11);
 409
 410         case 15:
 411                 a = fbs(0);
 412                 text += 2;
 413                 a ^= nbs(3);
 414                 a ^= nbs(4);
 415                 text += 2;
 416                 a ^= nbs(7);
 417                 a ^= nbs(8);
 418                 text += 2;
 419                 return a ^ nbs(9) ^ nbs(10) ^ nbs(11);
 420
 421         default:  /* 3 characters from beginning */
 422                 a = fbs(0);
 423                 text += 2;
 424                 a ^= nbs(3);
 425                 a ^= nbs(4);
 426
 427                 /* 2 characters from middle */
 428                 text = start_pos + (length / 2);
 429                 a ^= fbs(5);
 430                 text += 2;
 431                 a ^= nbs(6);
 432
 433                 /* 3 characters from end */
 434                 text = start_pos + length - 4;
 435
 436                 a ^= fbs(7);
 437                 text++;
 438
 439                 return a ^ nbs(10) ^ nbs(11);
 440     }
 441 }
 442
 443 /* utf_full_hashkey ************************************************************
 444
 445    This function computes a hash value using all bytes in the string.
 446
 447    The algorithm is the "One-at-a-time" algorithm as published
 448    by Bob Jenkins on http://burtleburtle.net/bob/hash/doobs.html.
 449
 450 *******************************************************************************/
 451
 452 u4 utf_full_hashkey(const char *text, u4 length)
 453 {
 454         register const unsigned char *p = (const unsigned char *) text;
 455         register u4 hash;
 456         register u4 i;
 457
 458         hash = 0;
 459         for (i=length; i--;)
 460         {
 461             hash += *p++;
 462             hash += (hash << 10);
 463             hash ^= (hash >> 6);
 464         }
 465         hash += (hash << 3);
 466         hash ^= (hash >> 11);
 467         hash += (hash << 15);
 468
 469         return hash;
 470 }
 471
 472 /* unicode_hashkey *************************************************************
 473
 474    Compute the hashkey of a unicode string.
 475
 476 *******************************************************************************/
 477
 478 u4 unicode_hashkey(u2 *text, u2 len)
 479 {
 480         return utf_hashkey((char *) text, len);
 481 }
 482
 483
 484 /* utf_new *********************************************************************
 485
 486    Creates a new utf-symbol, the text of the symbol is passed as a
 487    u1-array. The function searches the utf-hashtable for a utf-symbol
 488    with this text. On success the element returned, otherwise a new
 489    hashtable element is created.
 490
 491    If the number of entries in the hashtable exceeds twice the size of
 492    the hashtable slots a reorganization of the hashtable is done and
 493    the utf symbols are copied to a new hashtable with doubled size.
 494
 495 *******************************************************************************/
 496
 497 utf *utf_new(const char *text, u2 length)
 498 {
 499         u4 key;                             /* hashkey computed from utf-text     */
 500         u4 slot;                            /* slot in hashtable                  */
 501         utf *u;                             /* hashtable element                  */
 502         u2 i;
 503
 504 #if defined(ENABLE_THREADS)
 505         builtin_monitorenter(hashtable_utf->header);
 506 #endif
 507
 508 #if defined(ENABLE_STATISTICS)
 509         if (opt_stat)
 510                 count_utf_new++;
 511 #endif
 512
 513         key  = utf_hashkey(text, length);
 514         slot = key & (hashtable_utf->size - 1);
 515         u    = hashtable_utf->ptr[slot];
 516
 517         /* search external hash chain for utf-symbol */
 518
 519         while (u) {
 520                 if (u->blength == length) {
 521                         /* compare text of hashtable elements */
 522
 523                         for (i = 0; i < length; i++)
 524                                 if (text[i] != u->text[i])
 525                                         goto nomatch;
 526
 527 #if defined(ENABLE_STATISTICS)
 528                         if (opt_stat)
 529                                 count_utf_new_found++;
 530 #endif
 531
 532                         /* symbol found in hashtable */
 533
 534 #if defined(ENABLE_THREADS)
 535                         builtin_monitorexit(hashtable_utf->header);
 536 #endif
 537
 538                         return u;
 539                 }
 540
 541         nomatch:
 542                 u = u->hashlink; /* next element in external chain */
 543         }
 544
 545 #if defined(ENABLE_STATISTICS)
 546         if (opt_stat)
 547                 count_utf_len += sizeof(utf) + length + 1;
 548 #endif
 549
 550         /* location in hashtable found, create new utf element */
 551         u = NEW(utf);
 552         u->blength  = length;               /* length in bytes of utfstring       */
 553         u->hashlink = hashtable_utf->ptr[slot]; /* link in external hashchain     */
 554         u->text     = mem_alloc(length + 1);/* allocate memory for utf-text       */
 555
 556         memcpy(u->text, text, length);      /* copy utf-text                      */
 557         u->text[length] = '\0';
 558
 559         hashtable_utf->ptr[slot] = u;       /* insert symbol into table           */
 560         hashtable_utf->entries++;           /* update number of entries           */
 561
 562         if (hashtable_utf->entries > (hashtable_utf->size * 2)) {
 563
 564         /* reorganization of hashtable, average length of the external
 565            chains is approx. 2 */
 566
 567                 hashtable *newhash;                              /* the new hashtable */
 568                 u4         i;
 569                 utf       *u;
 570                 utf       *nextu;
 571                 u4         slot;
 572
 573                 /* create new hashtable, double the size */
 574
 575                 newhash = hashtable_resize(hashtable_utf, hashtable_utf->size * 2);
 576
 577 #if defined(ENABLE_STATISTICS)
 578                 if (opt_stat)
 579                         count_utf_len += sizeof(utf*) * hashtable_utf->size;
 580 #endif
 581
 582                 /* transfer elements to new hashtable */
 583
 584                 for (i = 0; i < hashtable_utf->size; i++) {
 585                         u = hashtable_utf->ptr[i];
 586
 587                         while (u) {
 588                                 nextu = u->hashlink;
 589                                 slot  = utf_hashkey(u->text, u->blength) & (newhash->size - 1);
 590
 591                                 u->hashlink = (utf *) newhash->ptr[slot];
 592                                 newhash->ptr[slot] = u;
 593
 594                                 /* follow link in external hash chain */
 595
 596                                 u = nextu;
 597                         }
 598                 }
 599
 600                 /* dispose old table */
 601
 602                 hashtable_free(hashtable_utf);
 603
 604                 hashtable_utf = newhash;
 605         }
 606
 607 #if defined(ENABLE_THREADS)
 608         builtin_monitorexit(hashtable_utf->header);
 609 #endif
 610
 611         return u;
 612 }
 613
 614
 615 /* utf_new_u2 ******************************************************************
 616
 617    Make utf symbol from u2 array, if isclassname is true '.' is
 618    replaced by '/'.
 619
 620 *******************************************************************************/
 621
 622 utf *utf_new_u2(u2 *unicode_pos, u4 unicode_length, bool isclassname)
 623 {
 624         char *buffer;                   /* memory buffer for  unicode characters  */
 625         char *pos;                      /* pointer to current position in buffer  */
 626         u4 left;                        /* unicode characters left                */
 627         u4 buflength;                   /* utf length in bytes of the u2 array    */
 628         utf *result;                    /* resulting utf-string                   */
 629         int i;
 630
 631         /* determine utf length in bytes and allocate memory */
 632
 633         buflength = u2_utflength(unicode_pos, unicode_length);
 634         buffer    = MNEW(char, buflength);
 635
 636         left = buflength;
 637         pos  = buffer;
 638
 639         for (i = 0; i++ < unicode_length; unicode_pos++) {
 640                 /* next unicode character */
 641                 u2 c = *unicode_pos;
 642
 643                 if ((c != 0) && (c < 0x80)) {
 644                         /* 1 character */
 645                         left--;
 646                 if ((int) left < 0) break;
 647                         /* convert classname */
 648                         if (isclassname && c == '.')
 649                                 *pos++ = '/';
 650                         else
 651                                 *pos++ = (char) c;
 652
 653                 } else if (c < 0x800) {
 654                         /* 2 characters */
 655                 unsigned char high = c >> 6;
 656                 unsigned char low  = c & 0x3F;
 657                         left = left - 2;
 658                 if ((int) left < 0) break;
 659                 *pos++ = high | 0xC0;
 660                 *pos++ = low  | 0x80;
 661
 662                 } else {
 663                 /* 3 characters */
 664                 char low  = c & 0x3f;
 665                 char mid  = (c >> 6) & 0x3F;
 666                 char high = c >> 12;
 667                         left = left - 3;
 668                 if ((int) left < 0) break;
 669                 *pos++ = high | 0xE0;
 670                 *pos++ = mid  | 0x80;
 671                 *pos++ = low  | 0x80;
 672                 }
 673         }
 674
 675         /* insert utf-string into symbol-table */
 676         result = utf_new(buffer,buflength);
 677
 678         MFREE(buffer, char, buflength);
 679
 680         return result;
 681 }
 682
 683
 684 /* utf_new_char ****************************************************************
 685
 686    Creates a new utf symbol, the text for this symbol is passed as a
 687    c-string ( = char* ).
 688
 689 *******************************************************************************/
 690
 691 utf *utf_new_char(const char *text)
 692 {
 693         return utf_new(text, strlen(text));
 694 }
 695
 696
 697 /* utf_new_char_classname ******************************************************
 698
 699    Creates a new utf symbol, the text for this symbol is passed as a
 700    c-string ( = char* ) "." characters are going to be replaced by
 701    "/". Since the above function is used often, this is a separte
 702    function, instead of an if.
 703
 704 *******************************************************************************/
 705
 706 utf *utf_new_char_classname(const char *text)
 707 {
 708         if (strchr(text, '.')) {
 709                 char *txt = strdup(text);
 710                 char *end = txt + strlen(txt);
 711                 char *c;
 712                 utf *tmpRes;
 713
 714                 for (c = txt; c < end; c++)
 715                         if (*c == '.') *c = '/';
 716
 717                 tmpRes = utf_new(txt, strlen(txt));
 718                 FREE(txt, 0);
 719
 720                 return tmpRes;
 721
 722         } else
 723                 return utf_new(text, strlen(text));
 724 }
 725
 726
 727 /* utf_nextu2 ******************************************************************
 728
 729    Read the next unicode character from the utf string and increment
 730    the utf-string pointer accordingly.
 731
 732 *******************************************************************************/
 733
 734 u2 utf_nextu2(char **utf_ptr)
 735 {
 736     /* uncompressed unicode character */
 737     u2 unicode_char = 0;
 738     /* current position in utf text */
 739     unsigned char *utf = (unsigned char *) (*utf_ptr);
 740     /* bytes representing the unicode character */
 741     unsigned char ch1, ch2, ch3;
 742     /* number of bytes used to represent the unicode character */
 743     int len = 0;
 744
 745     switch ((ch1 = utf[0]) >> 4) {
 746         default: /* 1 byte */
 747                 (*utf_ptr)++;
 748                 return (u2) ch1;
 749         case 0xC:
 750         case 0xD: /* 2 bytes */
 751                 if (((ch2 = utf[1]) & 0xC0) == 0x80) {
 752                         unsigned char high = ch1 & 0x1F;
 753                         unsigned char low  = ch2 & 0x3F;
 754                         unicode_char = (high << 6) + low;
 755                         len = 2;
 756                 }
 757                 break;
 758
 759         case 0xE: /* 2 or 3 bytes */
 760                 if (((ch2 = utf[1]) & 0xC0) == 0x80) {
 761                         if (((ch3 = utf[2]) & 0xC0) == 0x80) {
 762                                 unsigned char low  = ch3 & 0x3f;
 763                                 unsigned char mid  = ch2 & 0x3f;
 764                                 unsigned char high = ch1 & 0x0f;
 765                                 unicode_char = (((high << 6) + mid) << 6) + low;
 766                                 len = 3;
 767                         } else
 768                                 len = 2;
 769                 }
 770                 break;
 771     }
 772
 773     /* update position in utf-text */
 774     *utf_ptr = (char *) (utf + len);
 775
 776     return unicode_char;
 777 }
 778
 779
 780 /* utf_bytes *******************************************************************
 781
 782    Determine number of bytes (aka. octets) in the utf string.
 783
 784    IN:
 785       u............utf string
 786
 787    OUT:
 788       The number of octets of this utf string.
 789           There is _no_ terminating zero included in this count.
 790
 791 *******************************************************************************/
 792
 793 u4 utf_bytes(utf *u)
 794 {
 795         return u->blength;
 796 }
 797
 798 /* utf_get_number_of_u2s_for_buffer ********************************************
 799
 800    Determine number of UTF-16 u2s in the given UTF-8 buffer
 801
 802    CAUTION: Use this function *only* when you want to convert an UTF-8 buffer
 803    to an array of u2s (UTF-16) and want to know how many of them you will get.
 804    All other uses of this function are probably wrong.
 805
 806    IN:
 807       buffer........points to first char in buffer
 808           blength.......number of _bytes_ in the buffer
 809
 810    OUT:
 811       the number of u2s needed to hold this string in UTF-16 encoding.
 812           There is _no_ terminating zero included in this count.
 813
 814    NOTE: Unlike utf_get_number_of_u2s, this function never throws an
 815    exception.
 816
 817 *******************************************************************************/
 818
 819 u4 utf_get_number_of_u2s_for_buffer(const char *buffer, u4 blength)
 820 {
 821         const char *endpos;                 /* points behind utf string           */
 822         const char *utf_ptr;                /* current position in utf text       */
 823         u4 len = 0;                         /* number of unicode characters       */
 824
 825         utf_ptr = buffer;
 826         endpos = utf_ptr + blength;
 827
 828         while (utf_ptr < endpos) {
 829                 len++;
 830                 /* next unicode character */
 831                 utf_nextu2((char **)&utf_ptr);
 832         }
 833
 834         assert(utf_ptr == endpos);
 835
 836         return len;
 837 }
 838
 839
 840 /* utf_get_number_of_u2s *******************************************************
 841
 842    Determine number of UTF-16 u2s in the utf string.
 843
 844    CAUTION: Use this function *only* when you want to convert a utf string
 845    to an array of u2s and want to know how many of them you will get.
 846    All other uses of this function are probably wrong.
 847
 848    IN:
 849       u............utf string
 850
 851    OUT:
 852       the number of u2s needed to hold this string in UTF-16 encoding.
 853           There is _no_ terminating zero included in this count.
 854           XXX 0 if a NullPointerException has been thrown (see below)
 855
 856 *******************************************************************************/
 857
 858 u4 utf_get_number_of_u2s(utf *u)
 859 {
 860         char *endpos;                       /* points behind utf string           */
 861         char *utf_ptr;                      /* current position in utf text       */
 862         u4 len = 0;                         /* number of unicode characters       */
 863
 864         /* XXX this is probably not checked by most callers! Review this after */
 865         /* the invalid uses of this function have been eliminated */
 866         if (!u) {
 867                 exceptions_throw_nullpointerexception();
 868                 return 0;
 869         }
 870
 871         endpos = UTF_END(u);
 872         utf_ptr = u->text;
 873
 874         while (utf_ptr < endpos) {
 875                 len++;
 876                 /* next unicode character */
 877                 utf_nextu2(&utf_ptr);
 878         }
 879
 880         if (utf_ptr != endpos)
 881                 /* string ended abruptly */
 882                 throw_cacao_exception_exit(string_java_lang_InternalError,
 883                                                                    "Illegal utf8 string");
 884
 885         return len;
 886 }
 887
 888
 889 /* u2_utflength ****************************************************************
 890
 891    Returns the utf length in bytes of a u2 array.
 892
 893 *******************************************************************************/
 894
 895 u4 u2_utflength(u2 *text, u4 u2_length)
 896 {
 897         u4 result_len = 0;                  /* utf length in bytes                */
 898         u2 ch;                              /* current unicode character          */
 899         u4 len;
 900
 901         for (len = 0; len < u2_length; len++) {
 902                 /* next unicode character */
 903                 ch = *text++;
 904
 905                 /* determine bytes required to store unicode character as utf */
 906                 if (ch && (ch < 0x80))
 907                         result_len++;
 908                 else if (ch < 0x800)
 909                         result_len += 2;
 910                 else
 911                         result_len += 3;
 912         }
 913
 914     return result_len;
 915 }
 916
 917
 918 /* utf_copy ********************************************************************
 919
 920    Copy the given utf string byte-for-byte to a buffer.
 921
 922    IN:
 923       buffer.......the buffer
 924           u............the utf string
 925
 926 *******************************************************************************/
 927
 928 void utf_copy(char *buffer, utf *u)
 929 {
 930         /* our utf strings are zero-terminated (done by utf_new) */
 931         MCOPY(buffer, u->text, char, u->blength + 1);
 932 }
 933
 934
 935 /* utf_cat *********************************************************************
 936
 937    Append the given utf string byte-for-byte to a buffer.
 938
 939    IN:
 940       buffer.......the buffer
 941           u............the utf string
 942
 943 *******************************************************************************/
 944
 945 void utf_cat(char *buffer, utf *u)
 946 {
 947         /* our utf strings are zero-terminated (done by utf_new) */
 948         MCOPY(buffer + strlen(buffer), u->text, char, u->blength + 1);
 949 }
 950
 951
 952 /* utf_copy_classname **********************************************************
 953
 954    Copy the given utf classname byte-for-byte to a buffer.
 955    '/' is replaced by '.'
 956
 957    IN:
 958       buffer.......the buffer
 959           u............the utf string
 960
 961 *******************************************************************************/
 962
 963 void utf_copy_classname(char *buffer, utf *u)
 964 {
 965         char *bufptr;
 966         char *srcptr;
 967         char *endptr;
 968         char ch;
 969
 970         bufptr = buffer;
 971         srcptr = u->text;
 972         endptr = UTF_END(u) + 1; /* utfs are zero-terminared by utf_new */
 973
 974         while (srcptr != endptr) {
 975                 ch = *srcptr++;
 976                 if (ch == '/')
 977                         ch = '.';
 978                 *bufptr++ = ch;
 979         }
 980 }
 981
 982
 983 /* utf_cat *********************************************************************
 984
 985    Append the given utf classname byte-for-byte to a buffer.
 986    '/' is replaced by '.'
 987
 988    IN:
 989       buffer.......the buffer
 990           u............the utf string
 991
 992 *******************************************************************************/
 993
 994 void utf_cat_classname(char *buffer, utf *u)
 995 {
 996         utf_copy_classname(buffer + strlen(buffer), u);
 997 }
 998
 999 /* utf_display_printable_ascii *************************************************
1000
1001    Write utf symbol to stdout (for debugging purposes).
1002    Non-printable and non-ASCII characters are printed as '?'.
1003
1004 *******************************************************************************/
1005
1006 void utf_display_printable_ascii(utf *u)
1007 {
1008         char *endpos;                       /* points behind utf string           */
1009         char *utf_ptr;                      /* current position in utf text       */
1010
1011         if (u == NULL) {
1012                 printf("NULL");
1013                 fflush(stdout);
1014                 return;
1015         }
1016
1017         endpos = UTF_END(u);
1018         utf_ptr = u->text;
1019
1020         while (utf_ptr < endpos) {
1021                 /* read next unicode character */
1022
1023                 u2 c = utf_nextu2(&utf_ptr);
1024
1025                 if ((c >= 32) && (c <= 127))
1026                         printf("%c", c);
1027                 else
1028                         printf("?");
1029         }
1030
1031         fflush(stdout);
1032 }
1033
1034
1035 /* utf_display_printable_ascii_classname ***************************************
1036
1037    Write utf symbol to stdout with `/' converted to `.' (for debugging
1038    purposes).
1039    Non-printable and non-ASCII characters are printed as '?'.
1040
1041 *******************************************************************************/
1042
1043 void utf_display_printable_ascii_classname(utf *u)
1044 {
1045         char *endpos;                       /* points behind utf string           */
1046         char *utf_ptr;                      /* current position in utf text       */
1047
1048         if (u == NULL) {
1049                 printf("NULL");
1050                 fflush(stdout);
1051                 return;
1052         }
1053
1054         endpos = UTF_END(u);
1055         utf_ptr = u->text;
1056
1057         while (utf_ptr < endpos) {
1058                 /* read next unicode character */
1059
1060                 u2 c = utf_nextu2(&utf_ptr);
1061
1062                 if (c == '/')
1063                         c = '.';
1064
1065                 if ((c >= 32) && (c <= 127))
1066                         printf("%c", c);
1067                 else
1068                         printf("?");
1069         }
1070
1071         fflush(stdout);
1072 }
1073
1074
1075 /* utf_sprint_convert_to_latin1 ************************************************
1076
1077    Write utf symbol into c-string (for debugging purposes).
1078    Characters are converted to 8-bit Latin-1, non-Latin-1 characters yield
1079    invalid results.
1080
1081 *******************************************************************************/
1082
1083 void utf_sprint_convert_to_latin1(char *buffer, utf *u)
1084 {
1085         char *endpos;                       /* points behind utf string           */
1086         char *utf_ptr;                      /* current position in utf text       */
1087         u2 pos = 0;                         /* position in c-string               */
1088
1089         if (!u) {
1090                 strcpy(buffer, "NULL");
1091                 return;
1092         }
1093
1094         endpos = UTF_END(u);
1095         utf_ptr = u->text;
1096
1097         while (utf_ptr < endpos)
1098                 /* copy next unicode character */
1099                 buffer[pos++] = utf_nextu2(&utf_ptr);
1100
1101         /* terminate string */
1102         buffer[pos] = '\0';
1103 }
1104
1105
1106 /* utf_sprint_convert_to_latin1_classname **************************************
1107
1108    Write utf symbol into c-string with `/' converted to `.' (for debugging
1109    purposes).
1110    Characters are converted to 8-bit Latin-1, non-Latin-1 characters yield
1111    invalid results.
1112
1113 *******************************************************************************/
1114
1115 void utf_sprint_convert_to_latin1_classname(char *buffer, utf *u)
1116 {
1117         char *endpos;                       /* points behind utf string           */
1118         char *utf_ptr;                      /* current position in utf text       */
1119         u2 pos = 0;                         /* position in c-string               */
1120
1121         if (!u) {
1122                 strcpy(buffer, "NULL");
1123                 return;
1124         }
1125
1126         endpos = UTF_END(u);
1127         utf_ptr = u->text;
1128
1129         while (utf_ptr < endpos) {
1130                 /* copy next unicode character */
1131                 u2 c = utf_nextu2(&utf_ptr);
1132                 if (c == '/') c = '.';
1133                 buffer[pos++] = c;
1134         }
1135
1136         /* terminate string */
1137         buffer[pos] = '\0';
1138 }
1139
1140
1141 /* utf_strcat_convert_to_latin1 ************************************************
1142
1143    Like libc strcat, but uses an utf8 string.
1144    Characters are converted to 8-bit Latin-1, non-Latin-1 characters yield
1145    invalid results.
1146
1147 *******************************************************************************/
1148
1149 void utf_strcat_convert_to_latin1(char *buffer, utf *u)
1150 {
1151         utf_sprint_convert_to_latin1(buffer + strlen(buffer), u);
1152 }
1153
1154
1155 /* utf_strcat_convert_to_latin1_classname **************************************
1156
1157    Like libc strcat, but uses an utf8 string.
1158    Characters are converted to 8-bit Latin-1, non-Latin-1 characters yield
1159    invalid results.
1160
1161 *******************************************************************************/
1162
1163 void utf_strcat_convert_to_latin1_classname(char *buffer, utf *u)
1164 {
1165         utf_sprint_convert_to_latin1_classname(buffer + strlen(buffer), u);
1166 }
1167
1168
1169 /* utf_fprint_printable_ascii **************************************************
1170
1171    Write utf symbol into file.
1172    Non-printable and non-ASCII characters are printed as '?'.
1173
1174 *******************************************************************************/
1175
1176 void utf_fprint_printable_ascii(FILE *file, utf *u)
1177 {
1178         char *endpos;                       /* points behind utf string           */
1179         char *utf_ptr;                      /* current position in utf text       */
1180
1181         if (!u)
1182                 return;
1183
1184         endpos = UTF_END(u);
1185         utf_ptr = u->text;
1186
1187         while (utf_ptr < endpos) {
1188                 /* read next unicode character */
1189                 u2 c = utf_nextu2(&utf_ptr);
1190
1191                 if (c >= 32 && c <= 127) fprintf(file, "%c", c);
1192                 else fprintf(file, "?");
1193         }
1194 }
1195
1196
1197 /* utf_fprint_printable_ascii_classname ****************************************
1198
1199    Write utf symbol into file with `/' converted to `.'.
1200    Non-printable and non-ASCII characters are printed as '?'.
1201
1202 *******************************************************************************/
1203
1204 void utf_fprint_printable_ascii_classname(FILE *file, utf *u)
1205 {
1206         char *endpos;                       /* points behind utf string           */
1207         char *utf_ptr;                      /* current position in utf text       */
1208
1209     if (!u)
1210                 return;
1211
1212         endpos = UTF_END(u);
1213         utf_ptr = u->text;
1214
1215         while (utf_ptr < endpos) {
1216                 /* read next unicode character */
1217                 u2 c = utf_nextu2(&utf_ptr);
1218                 if (c == '/') c = '.';
1219
1220                 if (c >= 32 && c <= 127) fprintf(file, "%c", c);
1221                 else fprintf(file, "?");
1222         }
1223 }
1224
1225
1226 /* is_valid_utf ****************************************************************
1227
1228    Return true if the given string is a valid UTF-8 string.
1229
1230    utf_ptr...points to first character
1231    end_pos...points after last character
1232
1233 *******************************************************************************/
1234
1235 /*  static unsigned long min_codepoint[6] = {0,1L<<7,1L<<11,1L<<16,1L<<21,1L<<26}; */
1236
1237 bool is_valid_utf(char *utf_ptr, char *end_pos)
1238 {
1239         int bytes;
1240         int len,i;
1241         char c;
1242         unsigned long v;
1243
1244         if (end_pos < utf_ptr) return false;
1245         bytes = end_pos - utf_ptr;
1246         while (bytes--) {
1247                 c = *utf_ptr++;
1248
1249                 if (!c) return false;                     /* 0x00 is not allowed */
1250                 if ((c & 0x80) == 0) continue;            /* ASCII */
1251
1252                 if      ((c & 0xe0) == 0xc0) len = 1;     /* 110x xxxx */
1253                 else if ((c & 0xf0) == 0xe0) len = 2;     /* 1110 xxxx */
1254                 else if ((c & 0xf8) == 0xf0) len = 3;     /* 1111 0xxx */
1255                 else if ((c & 0xfc) == 0xf8) len = 4;     /* 1111 10xx */
1256                 else if ((c & 0xfe) == 0xfc) len = 5;     /* 1111 110x */
1257                 else return false;                        /* invalid leading byte */
1258
1259                 if (len > 2) return false;                /* Java limitation */
1260
1261                 v = (unsigned long)c & (0x3f >> len);
1262
1263                 if ((bytes -= len) < 0) return false;     /* missing bytes */
1264
1265                 for (i = len; i--; ) {
1266                         c = *utf_ptr++;
1267                         if ((c & 0xc0) != 0x80)               /* 10xx xxxx */
1268                                 return false;
1269                         v = (v << 6) | (c & 0x3f);
1270                 }
1271
1272                 if (v == 0) {
1273                         if (len != 1) return false;           /* Java special */
1274
1275                 } else {
1276                         /* Sun Java seems to allow overlong UTF-8 encodings */
1277
1278                         /* if (v < min_codepoint[len]) */
1279                                 /* XXX throw exception? */
1280                 }
1281
1282                 /* surrogates in UTF-8 seem to be allowed in Java classfiles */
1283                 /* if (v >= 0xd800 && v <= 0xdfff) return false; */ /* surrogates */
1284
1285                 /* even these seem to be allowed */
1286                 /* if (v == 0xfffe || v == 0xffff) return false; */ /* invalid codepoints */
1287         }
1288
1289         return true;
1290 }
1291
1292
1293 /* is_valid_name ***************************************************************
1294
1295    Return true if the given string may be used as a class/field/method
1296    name. (Currently this only disallows empty strings and control
1297    characters.)
1298
1299    NOTE: The string is assumed to have passed is_valid_utf!
1300
1301    utf_ptr...points to first character
1302    end_pos...points after last character
1303
1304 *******************************************************************************/
1305
1306 bool is_valid_name(char *utf_ptr, char *end_pos)
1307 {
1308         if (end_pos <= utf_ptr) return false; /* disallow empty names */
1309
1310         while (utf_ptr < end_pos) {
1311                 unsigned char c = *utf_ptr++;
1312
1313                 if (c < 0x20) return false; /* disallow control characters */
1314                 if (c == 0xc0 && (unsigned char) *utf_ptr == 0x80)  /* disallow zero */
1315                         return false;
1316         }
1317
1318         return true;
1319 }
1320
1321 bool is_valid_name_utf(utf *u)
1322 {
1323         return is_valid_name(u->text, UTF_END(u));
1324 }
1325
1326
1327 /* utf_show ********************************************************************
1328
1329    Writes the utf symbols in the utfhash to stdout and displays the
1330    number of external hash chains grouped according to the chainlength
1331    (for debugging purposes).
1332
1333 *******************************************************************************/
1334
1335 #if !defined(NDEBUG)
1336 void utf_show(void)
1337 {
1338
1339 #define CHAIN_LIMIT 20               /* limit for seperated enumeration */
1340
1341         u4 chain_count[CHAIN_LIMIT]; /* numbers of chains */
1342         u4 max_chainlength = 0;      /* maximum length of the chains */
1343         u4 sum_chainlength = 0;      /* sum of the chainlengths */
1344         u4 beyond_limit = 0;         /* number of utf-symbols in chains with length>=CHAIN_LIMIT-1 */
1345         u4 i;
1346
1347         printf("UTF-HASH:\n");
1348
1349         /* show element of utf-hashtable */
1350
1351         for (i = 0; i < hashtable_utf->size; i++) {
1352                 utf *u = hashtable_utf->ptr[i];
1353
1354                 if (u) {
1355                         printf("SLOT %d: ", (int) i);
1356
1357                         while (u) {
1358                                 printf("'");
1359                                 utf_display_printable_ascii(u);
1360                                 printf("' ");
1361                                 u = u->hashlink;
1362                         }
1363                         printf("\n");
1364                 }
1365         }
1366
1367         printf("UTF-HASH: %d slots for %d entries\n",
1368                    (int) hashtable_utf->size, (int) hashtable_utf->entries );
1369
1370         if (hashtable_utf->entries == 0)
1371                 return;
1372
1373         printf("chains:\n  chainlength    number of chains    %% of utfstrings\n");
1374
1375         for (i=0;i<CHAIN_LIMIT;i++)
1376                 chain_count[i]=0;
1377
1378         /* count numbers of hashchains according to their length */
1379         for (i=0; i<hashtable_utf->size; i++) {
1380
1381                 utf *u = (utf*) hashtable_utf->ptr[i];
1382                 u4 chain_length = 0;
1383
1384                 /* determine chainlength */
1385                 while (u) {
1386                         u = u->hashlink;
1387                         chain_length++;
1388                 }
1389
1390                 /* update sum of all chainlengths */
1391                 sum_chainlength+=chain_length;
1392
1393                 /* determine the maximum length of the chains */
1394                 if (chain_length>max_chainlength)
1395                         max_chainlength = chain_length;
1396
1397                 /* update number of utf-symbols in chains with length>=CHAIN_LIMIT-1 */
1398                 if (chain_length>=CHAIN_LIMIT) {
1399                         beyond_limit+=chain_length;
1400                         chain_length=CHAIN_LIMIT-1;
1401                 }
1402
1403                 /* update number of hashchains of current length */
1404                 chain_count[chain_length]++;
1405         }
1406
1407         /* display results */
1408         for (i=1;i<CHAIN_LIMIT-1;i++)
1409                 printf("       %2d %17d %18.2f%%\n",i,chain_count[i],(((float) chain_count[i]*i*100)/hashtable_utf->entries));
1410
1411         printf("     >=%2d %17d %18.2f%%\n",CHAIN_LIMIT-1,chain_count[CHAIN_LIMIT-1],((float) beyond_limit*100)/hashtable_utf->entries);
1412
1413
1414         printf("max. chainlength:%5d\n",max_chainlength);
1415
1416         /* avg. chainlength = sum of chainlengths / number of chains */
1417         printf("avg. chainlength:%5.2f\n",(float) sum_chainlength / (hashtable_utf->size-chain_count[0]));
1418 }
1419 #endif /* !defined(NDEBUG) */
1420
1421
1422 /*
1423  * These are local overrides for various environment variables in Emacs.
1424  * Please do not remove this and leave it at the end of the file, where
1425  * Emacs will automagically detect them.
1426  * ---------------------------------------------------------------------
1427  * Local variables:
1428  * mode: c
1429  * indent-tabs-mode: t
1430  * c-basic-offset: 4
1431  * tab-width: 4
1432  * End:
1433  * vim:noexpandtab:sw=4:ts=4:
1434  */