3 Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003
4 R. Grafl, A. Krall, C. Kruegel, C. Oates, R. Obermaisser,
5 M. Probst, S. Ring, E. Steiner, C. Thalinger, D. Thuernbeck,
6 P. Tomsich, J. Wenninger
8 This file is part of CACAO.
10 This program is free software; you can redistribute it and/or
11 modify it under the terms of the GNU General Public License as
12 published by the Free Software Foundation; either version 2, or (at
13 your option) any later version.
15 This program is distributed in the hope that it will be useful, but
16 WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
25 Contact: cacao@complang.tuwien.ac.at
27 Authors: Reinhard Grafl
32 Contains support functions for:
33 - Reading of Java class files
36 - additional support functions
38 $Id: tables.c 562 2003-11-03 00:34:34Z twisti $
44 #include <sys/types.h>
51 #include "threads/thread.h"
52 #include "threads/locks.h"
53 #include "toolbox/loging.h"
54 #include "toolbox/memory.h"
57 bool runverbose = false;
60 int count_utf_len = 0; /* size of utf hash */
61 int count_utf_new = 0; /* calls of utf_new */
62 int count_utf_new_found = 0; /* calls of utf_new with fast return */
64 hashtable utf_hash; /* hashtable for utf8-symbols */
65 hashtable string_hash; /* hashtable for javastrings */
66 hashtable class_hash; /* hashtable for classes */
68 /******************************************************************************
69 *********************** hashtable functions **********************************
70 ******************************************************************************/
72 /* hashsize must be power of 2 */
74 #define UTF_HASHSTART 16384 /* initial size of utf-hash */
75 #define HASHSTART 2048 /* initial size of javastring and class-hash */
78 /******************** function: init_hashtable ******************************
80 Initializes a hashtable structure and allocates memory.
81 The parameter size specifies the initial size of the hashtable.
83 *****************************************************************************/
85 void init_hashtable(hashtable *hash, u4 size)
91 hash->ptr = MNEW(void*, size);
94 for (i = 0; i < size; i++) hash->ptr[i] = NULL;
97 /*********************** function: tables_init *****************************
99 creates hashtables for symboltables
100 (called once at startup)
102 *****************************************************************************/
106 init_hashtable(&utf_hash, UTF_HASHSTART); /* hashtable for utf8-symbols */
107 init_hashtable(&string_hash, HASHSTART); /* hashtable for javastrings */
108 init_hashtable(&class_hash, HASHSTART); /* hashtable for classes */
111 count_utf_len += sizeof(utf*) * utf_hash.size;
116 /********************** function: tables_close ******************************
118 free memory for hashtables
120 *****************************************************************************/
122 void tables_close (stringdeleter del)
128 /* dispose utf symbols */
129 for (i=0; i<utf_hash.size; i++) {
132 /* process elements in external hash chain */
133 utf *nextu = u->hashlink;
134 MFREE (u->text, u1, u->blength);
140 /* dispose javastrings */
141 for (i=0; i<string_hash.size; i++) {
142 s = string_hash.ptr[i];
144 /* process elements in external hash chain */
145 literalstring *nexts = s->hashlink;
147 FREE(s, literalstring);
152 /* dispose hashtable structures */
153 MFREE (utf_hash.ptr, void*, utf_hash.size);
154 MFREE (string_hash.ptr, void*, string_hash.size);
155 MFREE (class_hash.ptr, void*, class_hash.size);
158 /********************* function: utf_display *********************************
160 write utf symbol to stdout (debugging purposes)
162 ******************************************************************************/
164 void utf_display (utf *u)
166 char *endpos = utf_end(u); /* points behind utf string */
167 char *utf_ptr = u->text; /* current position in utf text */
169 while (utf_ptr<endpos) {
171 /* read next unicode character */
172 u2 c = utf_nextu2(&utf_ptr);
173 if (c>=32 && c<=127) printf ("%c",c);
180 /************************ function: utf_sprint *******************************
182 write utf symbol into c-string (debugging purposes)
184 ******************************************************************************/
186 void utf_sprint (char *buffer, utf *u)
188 char *endpos = utf_end(u); /* points behind utf string */
189 char *utf_ptr = u->text; /* current position in utf text */
190 u2 pos = 0; /* position in c-string */
192 while (utf_ptr<endpos)
193 /* copy next unicode character */
194 buffer[pos++] = utf_nextu2(&utf_ptr);
196 /* terminate string */
201 /********************* Funktion: utf_fprint **********************************
203 write utf symbol into file
205 ******************************************************************************/
207 void utf_fprint (FILE *file, utf *u)
209 char *endpos = utf_end(u); /* points behind utf string */
210 char *utf_ptr = u->text; /* current position in utf text */
212 while (utf_ptr<endpos) {
213 /* read next unicode character */
214 u2 c = utf_nextu2(&utf_ptr);
216 if (c>=32 && c<=127) fprintf (file,"%c",c);
217 else fprintf (file,"?");
222 /****************** internal function: utf_hashkey ***************************
224 The hashkey is computed from the utf-text by using up to 8 characters.
225 For utf-symbols longer than 15 characters 3 characters are taken from
226 the beginning and the end, 2 characters are taken from the middle.
228 ******************************************************************************/
230 #define nbs(val) ((u4) *(++text) << val) /* get next byte, left shift by val */
231 #define fbs(val) ((u4) *( text) << val) /* get first byte, left shift by val */
233 static u4 utf_hashkey (char *text, u4 length)
235 char *start_pos = text; /* pointer to utf text */
240 case 0: /* empty string */
243 case 1: return fbs(0);
244 case 2: return fbs(0) ^ nbs(3);
245 case 3: return fbs(0) ^ nbs(3) ^ nbs(5);
246 case 4: return fbs(0) ^ nbs(2) ^ nbs(4) ^ nbs(6);
247 case 5: return fbs(0) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(6);
248 case 6: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(5) ^ nbs(6);
249 case 7: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(5) ^ nbs(6);
250 case 8: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(5) ^ nbs(6) ^ nbs(7);
252 case 9: a = fbs(0) ^ nbs(1) ^ nbs(2);
254 return a ^ nbs(4) ^ nbs(5) ^ nbs(6) ^ nbs(7) ^ nbs(8);
258 a^= nbs(2) ^ nbs(3) ^ nbs(4);
260 return a ^ nbs(6) ^ nbs(7) ^ nbs(8) ^ nbs(9);
264 a^= nbs(2) ^ nbs(3) ^ nbs(4);
266 return a ^ nbs(6) ^ nbs(7) ^ nbs(8) ^ nbs(9) ^ nbs(10);
272 a^= nbs(5) ^ nbs(6) ^ nbs(7);
274 return a ^ nbs(9) ^ nbs(10);
276 case 13: a = fbs(0) ^ nbs(1);
282 return a ^ nbs(9) ^ nbs(10);
290 return a ^ nbs(9) ^ nbs(10) ^ nbs(11);
298 return a ^ nbs(9) ^ nbs(10) ^ nbs(11);
300 default: /* 3 characters from beginning */
305 /* 2 characters from middle */
306 text = start_pos + (length / 2);
311 /* 3 characters from end */
312 text = start_pos + length - 4;
317 return a ^ nbs(10) ^ nbs(11);
322 /*************************** function: utf_hashkey ***************************
324 compute the hashkey of a unicode string
326 ******************************************************************************/
328 u4 unicode_hashkey (u2 *text, u2 len)
330 return utf_hashkey((char*) text, len);
333 /************************ function: utf_new **********************************
335 Creates a new utf-symbol, the text of the symbol is passed as a
336 u1-array. The function searches the utf-hashtable for a utf-symbol
337 with this text. On success the element returned, otherwise a new
338 hashtable element is created.
340 If the number of entries in the hashtable exceeds twice the size of the
341 hashtable slots a reorganization of the hashtable is done and the utf
342 symbols are copied to a new hashtable with doubled size.
344 ******************************************************************************/
346 utf *utf_new (char *text, u2 length)
348 u4 key; /* hashkey computed from utf-text */
349 u4 slot; /* slot in hashtable */
350 utf *u; /* hashtable element */
357 key = utf_hashkey (text, length);
358 slot = key & (utf_hash.size-1);
359 u = utf_hash.ptr[slot];
361 /* search external hash chain for utf-symbol */
363 if (u->blength == length) {
365 /* compare text of hashtable elements */
366 for (i=0; i<length; i++)
367 if (text[i] != u->text[i]) goto nomatch;
370 count_utf_new_found++;
372 /* symbol found in hashtable */
376 u = u->hashlink; /* next element in external chain */
380 count_utf_len += sizeof(utf) + length;
383 /* location in hashtable found, create new utf element */
385 u->blength = length; /* length in bytes of utfstring */
386 u->hashlink = utf_hash.ptr[slot]; /* link in external hashchain */
387 u->text = mem_alloc(length); /* allocate memory for utf-text */
388 memcpy(u->text,text,length); /* copy utf-text */
389 utf_hash.ptr[slot] = u; /* insert symbol into table */
391 utf_hash.entries++; /* update number of entries */
393 if ( utf_hash.entries > (utf_hash.size*2)) {
395 /* reorganization of hashtable, average length of
396 the external chains is approx. 2 */
400 hashtable newhash; /* the new hashtable */
402 /* create new hashtable, double the size */
403 init_hashtable(&newhash, utf_hash.size*2);
404 newhash.entries=utf_hash.entries;
407 count_utf_len += sizeof(utf*) * utf_hash.size;
410 /* transfer elements to new hashtable */
411 for (i=0; i<utf_hash.size; i++) {
412 u = (utf*) utf_hash.ptr[i];
414 utf *nextu = u -> hashlink;
415 u4 slot = (utf_hashkey(u->text,u->blength)) & (newhash.size-1);
417 u->hashlink = (utf*) newhash.ptr[slot];
418 newhash.ptr[slot] = u;
420 /* follow link in external hash chain */
425 /* dispose old table */
426 MFREE (utf_hash.ptr, void*, utf_hash.size);
434 /********************* function: utf_new_char ********************************
436 creates a new utf symbol, the text for this symbol is passed
437 as a c-string ( = char* )
439 ******************************************************************************/
441 utf *utf_new_char (char *text)
443 return utf_new(text, strlen(text));
446 /************************** Funktion: utf_show ******************************
448 writes the utf symbols in the utfhash to stdout and
449 displays the number of external hash chains grouped
450 according to the chainlength
453 *****************************************************************************/
458 #define CHAIN_LIMIT 20 /* limit for seperated enumeration */
460 u4 chain_count[CHAIN_LIMIT]; /* numbers of chains */
461 u4 max_chainlength = 0; /* maximum length of the chains */
462 u4 sum_chainlength = 0; /* sum of the chainlengths */
463 u4 beyond_limit = 0; /* number of utf-symbols in chains with length>=CHAIN_LIMIT-1 */
466 printf ("UTF-HASH:\n");
468 /* show element of utf-hashtable */
469 for (i=0; i<utf_hash.size; i++) {
470 utf *u = utf_hash.ptr[i];
472 printf ("SLOT %d: ", (int) i);
484 printf ("UTF-HASH: %d slots for %d entries\n",
485 (int) utf_hash.size, (int) utf_hash.entries );
488 if (utf_hash.entries == 0)
491 printf("chains:\n chainlength number of chains %% of utfstrings\n");
493 for (i=0;i<CHAIN_LIMIT;i++)
496 /* count numbers of hashchains according to their length */
497 for (i=0; i<utf_hash.size; i++) {
499 utf *u = (utf*) utf_hash.ptr[i];
502 /* determine chainlength */
508 /* update sum of all chainlengths */
509 sum_chainlength+=chain_length;
511 /* determine the maximum length of the chains */
512 if (chain_length>max_chainlength)
513 max_chainlength = chain_length;
515 /* update number of utf-symbols in chains with length>=CHAIN_LIMIT-1 */
516 if (chain_length>=CHAIN_LIMIT) {
517 beyond_limit+=chain_length;
518 chain_length=CHAIN_LIMIT-1;
521 /* update number of hashchains of current length */
522 chain_count[chain_length]++;
525 /* display results */
526 for (i=1;i<CHAIN_LIMIT-1;i++)
527 printf(" %2d %17d %18.2f%%\n",i,chain_count[i],(((float) chain_count[i]*i*100)/utf_hash.entries));
529 printf(" >=%2d %17d %18.2f%%\n",CHAIN_LIMIT-1,chain_count[CHAIN_LIMIT-1],((float) beyond_limit*100)/utf_hash.entries);
532 printf("max. chainlength:%5d\n",max_chainlength);
534 /* avg. chainlength = sum of chainlengths / number of chains */
535 printf("avg. chainlength:%5.2f\n",(float) sum_chainlength / (utf_hash.size-chain_count[0]));
538 /******************************************************************************
539 *********************** Misc support functions ********************************
540 ******************************************************************************/
543 /******************** Function: desc_to_type **********************************
545 Determines the corresponding Java base data type for a given type
548 ******************************************************************************/
550 u2 desc_to_type(utf *descriptor)
552 char *utf_ptr = descriptor->text; /* current position in utf text */
554 if (descriptor->blength < 1) panic("Type-Descriptor is empty string");
556 switch (*utf_ptr++) {
561 case 'Z': return TYPE_INT;
562 case 'D': return TYPE_DOUBLE;
563 case 'F': return TYPE_FLOAT;
564 case 'J': return TYPE_LONG;
566 case '[': return TYPE_ADDRESS;
569 sprintf(logtext, "Invalid Type-Descriptor: ");
570 utf_sprint(logtext+strlen(logtext), descriptor);
577 /********************** Function: desc_typesize *******************************
579 Calculates the lenght in bytes needed for a data element of the type given
580 by its type descriptor.
582 ******************************************************************************/
584 u2 desc_typesize (utf *descriptor)
586 switch (desc_to_type(descriptor)) {
587 case TYPE_INT: return 4;
588 case TYPE_LONG: return 8;
589 case TYPE_FLOAT: return 4;
590 case TYPE_DOUBLE: return 8;
591 case TYPE_ADDRESS: return sizeof(voidptr);
597 /********************** function: utf_nextu2 *********************************
599 read the next unicode character from the utf string and
600 increment the utf-string pointer accordingly
602 ******************************************************************************/
604 u2 utf_nextu2(char **utf_ptr)
606 /* uncompressed unicode character */
608 /* current position in utf text */
609 unsigned char *utf = (unsigned char *) (*utf_ptr);
610 /* bytes representing the unicode character */
611 unsigned char ch1, ch2, ch3;
612 /* number of bytes used to represent the unicode character */
615 switch ((ch1 = utf[0]) >> 4) {
616 default: /* 1 byte */
620 case 0xD: /* 2 bytes */
621 if (((ch2 = utf[1]) & 0xC0) == 0x80) {
622 unsigned char high = ch1 & 0x1F;
623 unsigned char low = ch2 & 0x3F;
624 unicode_char = (high << 6) + low;
629 case 0xE: /* 2 or 3 bytes */
630 if (((ch2 = utf[1]) & 0xC0) == 0x80) {
631 if (((ch3 = utf[2]) & 0xC0) == 0x80) {
632 unsigned char low = ch3 & 0x3f;
633 unsigned char mid = ch2 & 0x3f;
634 unsigned char high = ch1 & 0x0f;
635 unicode_char = (((high << 6) + mid) << 6) + low;
643 /* update position in utf-text */
644 *utf_ptr = (char *) (utf + len);
649 /******************** Function: class_new **************************************
651 searches for the class with the specified name in the classes hashtable,
652 if there is no such class a new classinfo structure is created and inserted
653 into the list of classes to be loaded
655 *******************************************************************************/
657 classinfo *class_new(utf *u)
659 classinfo *c; /* hashtable element */
660 u4 key; /* hashkey computed from classname */
661 u4 slot; /* slot in hashtable */
664 key = utf_hashkey (u->text, u->blength);
665 slot = key & (class_hash.size-1);
666 c = class_hash.ptr[slot];
668 /* search external hash chain for the class */
670 if (c->name->blength == u->blength) {
671 for (i=0; i<u->blength; i++)
672 if (u->text[i] != c->name->text[i]) goto nomatch;
674 /* class found in hashtable */
679 c = c->hashlink; /* next element in external chain */
682 /* location in hashtable found, create new classinfo structure */
685 count_class_infos += sizeof(classinfo);
697 c->interfacescount = 0;
698 c->interfaces = NULL;
706 c->header.vftbl = NULL;
707 c->innerclasscount = 0;
708 c->innerclass = NULL;
710 c->initialized = false;
711 c->classvftbl = false;
713 /* prepare loading of the class */
714 list_addlast(&unloadedclasses, c);
716 /* insert class into the hashtable */
717 c->hashlink = class_hash.ptr[slot];
718 class_hash.ptr[slot] = c;
720 /* update number of hashtable-entries */
721 class_hash.entries++;
723 if (class_hash.entries > (class_hash.size*2)) {
725 /* reorganization of hashtable, average length of
726 the external chains is approx. 2 */
730 hashtable newhash; /* the new hashtable */
732 /* create new hashtable, double the size */
733 init_hashtable(&newhash, class_hash.size*2);
734 newhash.entries = class_hash.entries;
736 /* transfer elements to new hashtable */
737 for (i = 0; i < class_hash.size; i++) {
738 c = (classinfo*) class_hash.ptr[i];
740 classinfo *nextc = c->hashlink;
741 u4 slot = (utf_hashkey(c->name->text, c->name->blength)) & (newhash.size - 1);
743 c->hashlink = newhash.ptr[slot];
744 newhash.ptr[slot] = c;
750 /* dispose old table */
751 MFREE(class_hash.ptr, void*, class_hash.size);
752 class_hash = newhash;
758 /******************** Function: class_get **************************************
760 searches for the class with the specified name in the classes hashtable
761 if there is no such class NULL is returned
763 *******************************************************************************/
765 classinfo *class_get(utf *u)
767 classinfo *c; /* hashtable element */
768 u4 key; /* hashkey computed from classname */
769 u4 slot; /* slot in hashtable */
772 key = utf_hashkey (u->text, u->blength);
773 slot = key & (class_hash.size-1);
774 c = class_hash.ptr[slot];
776 /* search external hash-chain */
778 if (c->name->blength == u->blength) {
780 /* compare classnames */
781 for (i=0; i<u->blength; i++)
782 if (u->text[i] != c->name->text[i]) goto nomatch;
784 /* class found in hashtable */
792 /* class not found */
797 /************************** function: utf_strlen ******************************
799 determine number of unicode characters in the utf string
801 *******************************************************************************/
803 u4 utf_strlen(utf *u)
805 char *endpos = utf_end(u); /* points behind utf string */
806 char *utf_ptr = u->text; /* current position in utf text */
807 u4 len = 0; /* number of unicode characters */
809 while (utf_ptr<endpos) {
811 /* next unicode character */
812 utf_nextu2(&utf_ptr);
816 /* string ended abruptly */
817 panic("illegal utf string");
824 * These are local overrides for various environment variables in Emacs.
825 * Please do not remove this and leave it at the end of the file, where
826 * Emacs will automagically detect them.
827 * ---------------------------------------------------------------------
830 * indent-tabs-mode: t