1 /* -*- mode: c; tab-width: 4; c-basic-offset: 4 -*- */
2 /****************************** tables.c ***************************************
4 Copyright (c) 1997 A. Krall, R. Grafl, M. Gschwind, M. Probst
6 See file COPYRIGHT for information on usage and disclaimer of warranties
8 Contains support functions for:
9 - Reading of Java class files
12 - additional support functions
14 Authors: Reinhard Grafl EMAIL: cacao@complang.tuwien.ac.at
15 Changes: Mark Probst EMAIL: cacao@complang.tuwien.ac.at
16 Andreas Krall EMAIL: cacao@complang.tuwien.ac.at
18 Last Change: 1998/03/24
20 *******************************************************************************/
23 #include <sys/types.h>
31 #include "threads/thread.h" /* schani */
32 #include "threads/locks.h"
34 bool runverbose = false;
37 int count_utf_len = 0; /* size of utf hash */
38 int count_utf_new = 0; /* calls of utf_new */
39 int count_utf_new_found = 0; /* calls of utf_new with fast return */
41 hashtable utf_hash; /* hashtable for utf8-symbols */
42 hashtable string_hash; /* hashtable for javastrings */
43 hashtable class_hash; /* hashtable for classes */
45 /******************************************************************************
46 *********************** hashtable functions **********************************
47 ******************************************************************************/
49 /* hashsize must be power of 2 */
51 #define UTF_HASHSTART 16384 /* initial size of utf-hash */
52 #define HASHSTART 2048 /* initial size of javastring and class-hash */
55 /******************** function: init_hashtable ******************************
57 Initializes a hashtable structure and allocates memory.
58 The parameter size specifies the initial size of the hashtable.
60 *****************************************************************************/
62 void init_hashtable(hashtable *hash, u4 size)
68 hash->ptr = MNEW (void*, size);
71 for (i=0; i<size; i++) hash->ptr[i] = NULL;
74 /*********************** function: tables_init *****************************
76 creates hashtables for symboltables
77 (called once at startup)
79 *****************************************************************************/
83 init_hashtable(&utf_hash, UTF_HASHSTART); /* hashtable for utf8-symbols */
84 init_hashtable(&string_hash, HASHSTART); /* hashtable for javastrings */
85 init_hashtable(&class_hash, HASHSTART); /* hashtable for classes */
88 count_utf_len += sizeof(utf*) * utf_hash.size;
93 /********************** function: tables_close ******************************
95 free memory for hashtables
97 *****************************************************************************/
99 void tables_close (stringdeleter del)
105 /* dispose utf symbols */
106 for (i=0; i<utf_hash.size; i++) {
109 /* process elements in external hash chain */
110 utf *nextu = u->hashlink;
111 MFREE (u->text, u1, u->blength);
117 /* dispose javastrings */
118 for (i=0; i<string_hash.size; i++) {
119 s = string_hash.ptr[i];
121 /* process elements in external hash chain */
122 literalstring *nexts = s->hashlink;
124 FREE(s, literalstring);
129 /* dispose hashtable structures */
130 MFREE (utf_hash.ptr, void*, utf_hash.size);
131 MFREE (string_hash.ptr, void*, string_hash.size);
132 MFREE (class_hash.ptr, void*, class_hash.size);
135 /********************* function: utf_display *********************************
137 write utf symbol to stdout (debugging purposes)
139 ******************************************************************************/
141 void utf_display (utf *u)
143 char *endpos = utf_end(u); /* points behind utf string */
144 char *utf_ptr = u->text; /* current position in utf text */
146 while (utf_ptr<endpos) {
148 /* read next unicode character */
149 u2 c = utf_nextu2(&utf_ptr);
150 if (c>=32 && c<=127) printf ("%c",c);
157 /************************ function: utf_sprint *******************************
159 write utf symbol into c-string (debugging purposes)
161 ******************************************************************************/
163 void utf_sprint (char *buffer, utf *u)
165 char *endpos = utf_end(u); /* points behind utf string */
166 char *utf_ptr = u->text; /* current position in utf text */
167 u2 pos = 0; /* position in c-string */
169 while (utf_ptr<endpos)
170 /* copy next unicode character */
171 buffer[pos++] = utf_nextu2(&utf_ptr);
173 /* terminate string */
178 /********************* Funktion: utf_fprint **********************************
180 write utf symbol into file
182 ******************************************************************************/
184 void utf_fprint (FILE *file, utf *u)
186 char *endpos = utf_end(u); /* points behind utf string */
187 char *utf_ptr = u->text; /* current position in utf text */
189 while (utf_ptr<endpos) {
190 /* read next unicode character */
191 u2 c = utf_nextu2(&utf_ptr);
193 if (c>=32 && c<=127) fprintf (file,"%c",c);
194 else fprintf (file,"?");
199 /****************** internal function: utf_hashkey ***************************
201 The hashkey is computed from the utf-text by using up to 8 characters.
202 For utf-symbols longer than 15 characters 3 characters are taken from
203 the beginning and the end, 2 characters are taken from the middle.
205 ******************************************************************************/
207 #define nbs(val) ((u4) *(++text) << val) /* get next byte, left shift by val */
208 #define fbs(val) ((u4) *( text) << val) /* get first byte, left shift by val */
210 static u4 utf_hashkey (char *text, u4 length)
212 char *start_pos = text; /* pointer to utf text */
217 case 0: /* empty string */
220 case 1: return fbs(0);
221 case 2: return fbs(0) ^ nbs(3);
222 case 3: return fbs(0) ^ nbs(3) ^ nbs(5);
223 case 4: return fbs(0) ^ nbs(2) ^ nbs(4) ^ nbs(6);
224 case 5: return fbs(0) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(6);
225 case 6: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(5) ^ nbs(6);
226 case 7: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(5) ^ nbs(6);
227 case 8: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(5) ^ nbs(6) ^ nbs(7);
229 case 9: a = fbs(0) ^ nbs(1) ^ nbs(2);
231 return a ^ nbs(4) ^ nbs(5) ^ nbs(6) ^ nbs(7) ^ nbs(8);
235 a^= nbs(2) ^ nbs(3) ^ nbs(4);
237 return a ^ nbs(6) ^ nbs(7) ^ nbs(8) ^ nbs(9);
241 a^= nbs(2) ^ nbs(3) ^ nbs(4);
243 return a ^ nbs(6) ^ nbs(7) ^ nbs(8) ^ nbs(9) ^ nbs(10);
249 a^= nbs(5) ^ nbs(6) ^ nbs(7);
251 return a ^ nbs(9) ^ nbs(10);
253 case 13: a = fbs(0) ^ nbs(1);
259 return a ^ nbs(9) ^ nbs(10);
267 return a ^ nbs(9) ^ nbs(10) ^ nbs(11);
275 return a ^ nbs(9) ^ nbs(10) ^ nbs(11);
277 default: /* 3 characters from beginning */
282 /* 2 characters from middle */
283 text = start_pos + (length / 2);
288 /* 3 characters from end */
289 text = start_pos + length - 4;
294 return a ^ nbs(10) ^ nbs(11);
299 /*************************** function: utf_hashkey ***************************
301 compute the hashkey of a unicode string
303 ******************************************************************************/
305 u4 unicode_hashkey (u2 *text, u2 len)
307 return utf_hashkey((char*) text, len);
310 /************************ function: utf_new **********************************
312 Creates a new utf-symbol, the text of the symbol is passed as a
313 u1-array. The function searches the utf-hashtable for a utf-symbol
314 with this text. On success the element returned, otherwise a new
315 hashtable element is created.
317 If the number of entries in the hashtable exceeds twice the size of the
318 hashtable slots a reorganization of the hashtable is done and the utf
319 symbols are copied to a new hashtable with doubled size.
321 ******************************************************************************/
323 utf *utf_new (char *text, u2 length)
325 u4 key; /* hashkey computed from utf-text */
326 u4 slot; /* slot in hashtable */
327 utf *u; /* hashtable element */
334 key = utf_hashkey (text, length);
335 slot = key & (utf_hash.size-1);
336 u = utf_hash.ptr[slot];
338 /* search external hash chain for utf-symbol */
340 if (u->blength == length) {
342 /* compare text of hashtable elements */
343 for (i=0; i<length; i++)
344 if (text[i] != u->text[i]) goto nomatch;
347 count_utf_new_found++;
349 /* symbol found in hashtable */
353 u = u->hashlink; /* next element in external chain */
357 count_utf_len += sizeof(utf) + length;
360 /* location in hashtable found, create new utf element */
362 u->blength = length; /* length in bytes of utfstring */
363 u->hashlink = utf_hash.ptr[slot]; /* link in external hashchain */
364 u->text = mem_alloc(length); /* allocate memory for utf-text */
365 memcpy(u->text,text,length); /* copy utf-text */
366 utf_hash.ptr[slot] = u; /* insert symbol into table */
368 utf_hash.entries++; /* update number of entries */
370 if ( utf_hash.entries > (utf_hash.size*2)) {
372 /* reorganization of hashtable, average length of
373 the external chains is approx. 2 */
377 hashtable newhash; /* the new hashtable */
379 /* create new hashtable, double the size */
380 init_hashtable(&newhash, utf_hash.size*2);
381 newhash.entries=utf_hash.entries;
384 count_utf_len += sizeof(utf*) * utf_hash.size;
387 /* transfer elements to new hashtable */
388 for (i=0; i<utf_hash.size; i++) {
389 u = (utf*) utf_hash.ptr[i];
391 utf *nextu = u -> hashlink;
392 u4 slot = (utf_hashkey(u->text,u->blength)) & (newhash.size-1);
394 u->hashlink = (utf*) newhash.ptr[slot];
395 newhash.ptr[slot] = u;
397 /* follow link in external hash chain */
402 /* dispose old table */
403 MFREE (utf_hash.ptr, void*, utf_hash.size);
411 /********************* function: utf_new_char ********************************
413 creates a new utf symbol, the text for this symbol is passed
414 as a c-string ( = char* )
416 ******************************************************************************/
418 utf *utf_new_char (char *text)
420 return utf_new(text, strlen(text));
423 /************************** Funktion: utf_show ******************************
425 writes the utf symbols in the utfhash to stdout and
426 displays the number of external hash chains grouped
427 according to the chainlength
430 *****************************************************************************/
435 #define CHAIN_LIMIT 20 /* limit for seperated enumeration */
437 u4 chain_count[CHAIN_LIMIT]; /* numbers of chains */
438 u4 max_chainlength = 0; /* maximum length of the chains */
439 u4 sum_chainlength = 0; /* sum of the chainlengths */
440 u4 beyond_limit = 0; /* number of utf-symbols in chains with length>=CHAIN_LIMIT-1 */
443 printf ("UTF-HASH:\n");
445 /* show element of utf-hashtable */
446 for (i=0; i<utf_hash.size; i++) {
447 utf *u = utf_hash.ptr[i];
449 printf ("SLOT %d: ", (int) i);
461 printf ("UTF-HASH: %d slots for %d entries\n",
462 (int) utf_hash.size, (int) utf_hash.entries );
465 if (utf_hash.entries == 0)
468 printf("chains:\n chainlength number of chains %% of utfstrings\n");
470 for (i=0;i<CHAIN_LIMIT;i++)
473 /* count numbers of hashchains according to their length */
474 for (i=0; i<utf_hash.size; i++) {
476 utf *u = (utf*) utf_hash.ptr[i];
479 /* determine chainlength */
485 /* update sum of all chainlengths */
486 sum_chainlength+=chain_length;
488 /* determine the maximum length of the chains */
489 if (chain_length>max_chainlength)
490 max_chainlength = chain_length;
492 /* update number of utf-symbols in chains with length>=CHAIN_LIMIT-1 */
493 if (chain_length>=CHAIN_LIMIT) {
494 beyond_limit+=chain_length;
495 chain_length=CHAIN_LIMIT-1;
498 /* update number of hashchains of current length */
499 chain_count[chain_length]++;
502 /* display results */
503 for (i=1;i<CHAIN_LIMIT-1;i++)
504 printf(" %2d %17d %18.2f%%\n",i,chain_count[i],(((float) chain_count[i]*i*100)/utf_hash.entries));
506 printf(" >=%2d %17d %18.2f%%\n",CHAIN_LIMIT-1,chain_count[CHAIN_LIMIT-1],((float) beyond_limit*100)/utf_hash.entries);
509 printf("max. chainlength:%5d\n",max_chainlength);
511 /* avg. chainlength = sum of chainlengths / number of chains */
512 printf("avg. chainlength:%5.2f\n",(float) sum_chainlength / (utf_hash.size-chain_count[0]));
515 /******************************************************************************
516 *********************** Misc support functions ********************************
517 ******************************************************************************/
520 /******************** Function: desc_to_type **********************************
522 Determines the corresponding Java base data type for a given type
525 ******************************************************************************/
527 u2 desc_to_type (utf *descriptor)
529 char *utf_ptr = descriptor->text; /* current position in utf text */
531 if (descriptor->blength < 1) panic ("Type-Descriptor is empty string");
533 switch (*utf_ptr++) {
538 case 'Z': return TYPE_INT;
539 case 'D': return TYPE_DOUBLE;
540 case 'F': return TYPE_FLOAT;
541 case 'J': return TYPE_LONG;
543 case '[': return TYPE_ADDRESS;
546 sprintf (logtext, "Invalid Type-Descriptor: ");
547 utf_sprint (logtext+strlen(logtext), descriptor);
553 /********************** Function: desc_typesize *******************************
555 Calculates the lenght in bytes needed for a data element of the type given
556 by its type descriptor.
558 ******************************************************************************/
560 u2 desc_typesize (utf *descriptor)
562 switch (desc_to_type(descriptor)) {
563 case TYPE_INT: return 4;
564 case TYPE_LONG: return 8;
565 case TYPE_FLOAT: return 4;
566 case TYPE_DOUBLE: return 8;
567 case TYPE_ADDRESS: return sizeof(voidptr);
573 /********************** function: utf_nextu2 *********************************
575 read the next unicode character from the utf string and
576 increment the utf-string pointer accordingly
578 ******************************************************************************/
580 u2 utf_nextu2(char **utf_ptr)
582 /* uncompressed unicode character */
584 /* current position in utf text */
585 unsigned char *utf = (unsigned char *) (*utf_ptr);
586 /* bytes representing the unicode character */
587 unsigned char ch1, ch2, ch3;
588 /* number of bytes used to represent the unicode character */
591 switch ((ch1 = utf[0]) >> 4) {
592 default: /* 1 byte */
596 case 0xD: /* 2 bytes */
597 if (((ch2 = utf[1]) & 0xC0) == 0x80) {
598 unsigned char high = ch1 & 0x1F;
599 unsigned char low = ch2 & 0x3F;
600 unicode_char = (high << 6) + low;
605 case 0xE: /* 2 or 3 bytes */
606 if (((ch2 = utf[1]) & 0xC0) == 0x80) {
607 if (((ch3 = utf[2]) & 0xC0) == 0x80) {
608 unsigned char low = ch3 & 0x3f;
609 unsigned char mid = ch2 & 0x3f;
610 unsigned char high = ch1 & 0x0f;
611 unicode_char = (((high << 6) + mid) << 6) + low;
619 /* update position in utf-text */
620 *utf_ptr = (char *) (utf + len);
624 /******************** Function: class_new **************************************
626 searches for the class with the specified name in the classes hashtable,
627 if there is no such class a new classinfo structure is created and inserted
628 into the list of classes to be loaded
630 *******************************************************************************/
632 classinfo *class_new (utf *u)
634 classinfo *c; /* hashtable element */
635 u4 key; /* hashkey computed from classname */
636 u4 slot; /* slot in hashtable */
639 key = utf_hashkey (u->text, u->blength);
640 slot = key & (class_hash.size-1);
641 c = class_hash.ptr[slot];
643 /* search external hash chain for the class */
645 if (c->name->blength == u->blength) {
646 for (i=0; i<u->blength; i++)
647 if (u->text[i] != c->name->text[i]) goto nomatch;
649 /* class found in hashtable */
654 c = c->hashlink; /* next element in external chain */
657 /* location in hashtable found, create new classinfo structure */
660 count_class_infos += sizeof(classinfo);
672 c -> interfacescount = 0;
673 c -> interfaces = NULL;
674 c -> fieldscount = 0;
676 c -> methodscount = 0;
680 c -> instancesize = 0;
681 c -> header.vftbl = NULL;
682 c -> innerclasscount = 0;
683 c -> innerclass = NULL;
685 c -> initialized = false;
686 c -> classvftbl = false;
688 /* prepare loading of the class */
689 list_addlast (&unloadedclasses, c);
691 /* insert class into the hashtable */
692 c->hashlink = class_hash.ptr[slot];
693 class_hash.ptr[slot] = c;
695 /* update number of hashtable-entries */
696 class_hash.entries++;
698 if ( class_hash.entries > (class_hash.size*2)) {
700 /* reorganization of hashtable, average length of
701 the external chains is approx. 2 */
705 hashtable newhash; /* the new hashtable */
707 /* create new hashtable, double the size */
708 init_hashtable(&newhash, class_hash.size*2);
709 newhash.entries = class_hash.entries;
711 /* transfer elements to new hashtable */
712 for (i=0; i<class_hash.size; i++) {
713 c = (classinfo*) class_hash.ptr[i];
715 classinfo *nextc = c -> hashlink;
716 u4 slot = (utf_hashkey(c->name->text,c->name->blength)) & (newhash.size-1);
718 c->hashlink = newhash.ptr[slot];
719 newhash.ptr[slot] = c;
725 /* dispose old table */
726 MFREE (class_hash.ptr, void*, class_hash.size);
727 class_hash = newhash;
733 /******************** Function: class_get **************************************
735 searches for the class with the specified name in the classes hashtable
736 if there is no such class NULL is returned
738 *******************************************************************************/
740 classinfo *class_get (utf *u)
742 classinfo *c; /* hashtable element */
743 u4 key; /* hashkey computed from classname */
744 u4 slot; /* slot in hashtable */
747 key = utf_hashkey (u->text, u->blength);
748 slot = key & (class_hash.size-1);
749 c = class_hash.ptr[slot];
751 /* search external hash-chain */
753 if (c->name->blength == u->blength) {
755 /* compare classnames */
756 for (i=0; i<u->blength; i++)
757 if (u->text[i] != c->name->text[i]) goto nomatch;
759 /* class found in hashtable */
767 /* class not found */
772 /************************** function: utf_strlen ******************************
774 determine number of unicode characters in the utf string
776 *******************************************************************************/
778 u4 utf_strlen(utf *u)
780 char *endpos = utf_end(u); /* points behind utf string */
781 char *utf_ptr = u->text; /* current position in utf text */
782 u4 len = 0; /* number of unicode characters */
784 while (utf_ptr<endpos) {
786 /* next unicode character */
787 utf_nextu2(&utf_ptr);
791 /* string ended abruptly */
792 panic("illegal utf string");