1 /* -*- mode: c; tab-width: 4; c-basic-offset: 4 -*- */
2 /****************************** tables.c ***************************************
4 Copyright (c) 1997 A. Krall, R. Grafl, M. Gschwind, M. Probst
6 See file COPYRIGHT for information on usage and disclaimer of warranties
8 Enth"alt Supportfunktionen f"ur:
9 - Lesen von JavaClass-Files
12 - zus"atzliche Support-Funktionen
14 Authors: Reinhard Grafl EMAIL: cacao@complang.tuwien.ac.at
15 Changes: Mark Probst EMAIL: cacao@complang.tuwien.ac.at
16 Andreas Krall EMAIL: cacao@complang.tuwien.ac.at
18 Last Change: 1998/03/24
20 *******************************************************************************/
30 #include "threads/thread.h" /* schani */
31 #include "threads/locks.h"
33 bool runverbose = false;
36 int count_utf_len = 0; /* size of utf hash */
37 int count_utf_new = 0; /* calls of utf_new */
38 int count_utf_new_found = 0; /* calls of utf_new with fast return */
40 hashtable utf_hash; /* hashtable for utf8-symbols */
41 hashtable string_hash; /* hashtable for javastrings */
42 hashtable class_hash; /* hashtable for classes */
44 /******************************************************************************
45 *********************** hashtable functions **********************************
46 ******************************************************************************/
48 /* hashsize must be power of 2 */
50 #define UTF_HASHSTART 16384 /* initial size of utf-hash */
51 #define HASHSTART 2048 /* initial size of javastring and class-hash */
54 /******************** function: init_hashtable ******************************
56 Initializes a hashtable structure and allocates memory.
57 The parameter size specifies the initial size of the hashtable.
59 *****************************************************************************/
61 void init_hashtable(hashtable *hash, u4 size)
67 hash->ptr = MNEW (void*, size);
70 for (i=0; i<size; i++) hash->ptr[i] = NULL;
73 /*********************** function: tables_init *****************************
75 creates hashtables for symboltables
76 (called once at startup)
78 *****************************************************************************/
82 init_hashtable(&utf_hash, UTF_HASHSTART); /* hashtable for utf8-symbols */
83 init_hashtable(&string_hash, HASHSTART); /* hashtable for javastrings */
84 init_hashtable(&class_hash, HASHSTART); /* hashtable for classes */
87 count_utf_len += sizeof(utf*) * utf_hash.size;
92 /********************** function: tables_close ******************************
94 free memory for hashtables
96 *****************************************************************************/
98 void tables_close (stringdeleter del)
104 /* dispose utf symbols */
105 for (i=0; i<utf_hash.size; i++) {
108 /* process elements in external hash chain */
109 utf *nextu = u->hashlink;
110 MFREE (u->text, u1, u->blength);
116 /* dispose javastrings */
117 for (i=0; i<string_hash.size; i++) {
118 s = string_hash.ptr[i];
120 /* process elements in external hash chain */
121 literalstring *nexts = s->hashlink;
123 FREE(s, literalstring);
128 /* dispose hashtable structures */
129 MFREE (utf_hash.ptr, void*, utf_hash.size);
130 MFREE (string_hash.ptr, void*, string_hash.size);
131 MFREE (class_hash.ptr, void*, class_hash.size);
134 /********************* function: utf_display *********************************
136 write utf symbol to stdout (debugging purposes)
138 ******************************************************************************/
140 void utf_display (utf *u)
142 char *endpos = utf_end(u); /* points behind utf string */
143 char *utf_ptr = u->text; /* current position in utf text */
145 while (utf_ptr<endpos) {
147 /* read next unicode character */
148 u2 c = utf_nextu2(&utf_ptr);
149 if (c>=32 && c<=127) printf ("%c",c);
156 /************************ function: utf_sprint *******************************
158 write utf symbol into c-string (debugging purposes)
160 ******************************************************************************/
162 void utf_sprint (char *buffer, utf *u)
164 char *endpos = utf_end(u); /* points behind utf string */
165 char *utf_ptr = u->text; /* current position in utf text */
166 u2 pos = 0; /* position in c-string */
168 while (utf_ptr<endpos)
169 /* copy next unicode character */
170 buffer[pos++] = utf_nextu2(&utf_ptr);
172 /* terminate string */
177 /********************* Funktion: utf_fprint **********************************
179 write utf symbol into file
181 ******************************************************************************/
183 void utf_fprint (FILE *file, utf *u)
185 char *endpos = utf_end(u); /* points behind utf string */
186 char *utf_ptr = u->text; /* current position in utf text */
188 while (utf_ptr<endpos)
189 /* write next unicode character */
190 putc ( utf_nextu2(&utf_ptr), file );
194 /****************** internal function: utf_hashkey ***************************
196 The hashkey is computed from the utf-text by using up to 8 characters.
197 For utf-symbols longer than 15 characters 3 characters are taken from
198 the beginning and the end, 2 characters are taken from the middle.
200 ******************************************************************************/
202 #define nbs(val) ((u4) *(++text) << val) /* get next byte, left shift by val */
203 #define fbs(val) ((u4) *( text) << val) /* get first byte, left shift by val */
205 static u4 utf_hashkey (char *text, u4 length)
207 char *start_pos = text; /* pointer to utf text */
212 case 0: /* empty string */
215 case 1: return fbs(0);
216 case 2: return fbs(0) ^ nbs(3);
217 case 3: return fbs(0) ^ nbs(3) ^ nbs(5);
218 case 4: return fbs(0) ^ nbs(2) ^ nbs(4) ^ nbs(6);
219 case 5: return fbs(0) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(6);
220 case 6: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(5) ^ nbs(6);
221 case 7: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(5) ^ nbs(6);
222 case 8: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(5) ^ nbs(6) ^ nbs(7);
224 case 9: a = fbs(0) ^ nbs(1) ^ nbs(2);
226 return a ^ nbs(4) ^ nbs(5) ^ nbs(6) ^ nbs(7) ^ nbs(8);
230 a^= nbs(2) ^ nbs(3) ^ nbs(4);
232 return a ^ nbs(6) ^ nbs(7) ^ nbs(8) ^ nbs(9);
236 a^= nbs(2) ^ nbs(3) ^ nbs(4);
238 return a ^ nbs(6) ^ nbs(7) ^ nbs(8) ^ nbs(9) ^ nbs(10);
244 a^= nbs(5) ^ nbs(6) ^ nbs(7);
246 return a ^ nbs(9) ^ nbs(10);
248 case 13: a = fbs(0) ^ nbs(1);
254 return a ^ nbs(9) ^ nbs(10);
262 return a ^ nbs(9) ^ nbs(10) ^ nbs(11);
270 return a ^ nbs(9) ^ nbs(10) ^ nbs(11);
272 default: /* 3 characters from beginning */
277 /* 2 characters from middle */
278 text = start_pos + (length / 2);
283 /* 3 characters from end */
284 text = start_pos + length - 4;
289 return a ^ nbs(10) ^ nbs(11);
294 /*************************** function: utf_hashkey ***************************
296 compute the hashkey of a unicode string
298 ******************************************************************************/
300 u4 unicode_hashkey (u2 *text, u2 len)
302 utf_hashkey((char*) text, len);
305 /************************ function: utf_new **********************************
307 Creates a new utf-symbol, the text of the symbol is passed as a
308 u1-array. The function searches the utf-hashtable for a utf-symbol
309 with this text. On success the element returned, otherwise a new
310 hashtable element is created.
312 If the number of entries in the hashtable exceeds twice the size of the
313 hashtable slots a reorganization of the hashtable is done and the utf
314 symbols are copied to a new hashtable with doubled size.
316 ******************************************************************************/
318 utf *utf_new (char *text, u2 length)
320 u4 key; /* hashkey computed from utf-text */
321 u4 slot; /* slot in hashtable */
322 utf *u; /* hashtable element */
329 key = utf_hashkey (text, length);
330 slot = key & (utf_hash.size-1);
331 u = utf_hash.ptr[slot];
333 /* search external hash chain for utf-symbol */
335 if (u->blength == length) {
337 /* compare text of hashtable elements */
338 for (i=0; i<length; i++)
339 if (text[i] != u->text[i]) goto nomatch;
342 count_utf_new_found++;
344 /* symbol found in hashtable */
348 u = u->hashlink; /* next element in external chain */
352 count_utf_len += sizeof(utf) + length;
355 /* location in hashtable found, create new utf element */
357 u->blength = length; /* length in bytes of utfstring */
358 u->hashlink = utf_hash.ptr[slot]; /* link in external hashchain */
359 u->text = mem_alloc(length); /* allocate memory for utf-text */
360 memcpy(u->text,text,length); /* copy utf-text */
361 utf_hash.ptr[slot] = u; /* insert symbol into table */
363 utf_hash.entries++; /* update number of entries */
365 if ( utf_hash.entries > (utf_hash.size*2)) {
367 /* reorganization of hashtable, average length of
368 the external chains is approx. 2 */
372 hashtable newhash; /* the new hashtable */
374 /* create new hashtable, double the size */
375 init_hashtable(&newhash, utf_hash.size*2);
376 newhash.entries=utf_hash.entries;
379 count_utf_len += sizeof(utf*) * utf_hash.size;
382 /* transfer elements to new hashtable */
383 for (i=0; i<utf_hash.size; i++) {
384 u = (utf*) utf_hash.ptr[i];
386 utf *nextu = u -> hashlink;
387 u4 slot = (utf_hashkey(u->text,u->blength)) & (newhash.size-1);
389 u->hashlink = (utf*) newhash.ptr[slot];
390 newhash.ptr[slot] = u;
392 /* follow link in external hash chain */
397 /* dispose old table */
398 MFREE (utf_hash.ptr, void*, utf_hash.size);
406 /********************* function: utf_new_char ********************************
408 creates a new utf symbol, the text for this symbol is passed
409 as a c-string ( = char* )
411 ******************************************************************************/
413 utf *utf_new_char (char *text)
415 return utf_new(text, strlen(text));
418 /************************** Funktion: utf_show ******************************
420 writes the utf symbols in the utfhash to stdout and
421 displays the number of external hash chains grouped
422 according to the chainlength
425 *****************************************************************************/
430 #define CHAIN_LIMIT 20 /* limit for seperated enumeration */
432 u4 chain_count[CHAIN_LIMIT]; /* numbers of chains */
433 u4 max_chainlength = 0; /* maximum length of the chains */
434 u4 sum_chainlength = 0; /* sum of the chainlengths */
435 u4 beyond_limit = 0; /* number of utf-symbols in chains with length>=CHAIN_LIMIT-1 */
438 printf ("UTF-HASH:\n");
440 /* show element of utf-hashtable */
441 for (i=0; i<utf_hash.size; i++) {
442 utf *u = utf_hash.ptr[i];
444 printf ("SLOT %d: ", (int) i);
456 printf ("UTF-HASH: %d slots for %d entries\n",
457 (int) utf_hash.size, (int) utf_hash.entries );
460 if (utf_hash.entries == 0)
463 printf("chains:\n chainlength number of chains %% of utfstrings\n");
465 for (i=0;i<CHAIN_LIMIT;i++)
468 /* count numbers of hashchains according to their length */
469 for (i=0; i<utf_hash.size; i++) {
471 utf *u = (utf*) utf_hash.ptr[i];
474 /* determine chainlength */
480 /* update sum of all chainlengths */
481 sum_chainlength+=chain_length;
483 /* determine the maximum length of the chains */
484 if (chain_length>max_chainlength)
485 max_chainlength = chain_length;
487 /* update number of utf-symbols in chains with length>=CHAIN_LIMIT-1 */
488 if (chain_length>=CHAIN_LIMIT) {
489 beyond_limit+=chain_length;
490 chain_length=CHAIN_LIMIT-1;
493 /* update number of hashchains of current length */
494 chain_count[chain_length]++;
497 /* display results */
498 for (i=1;i<CHAIN_LIMIT-1;i++)
499 printf(" %2d %17d %18.2f%%\n",i,chain_count[i],(((float) chain_count[i]*i*100)/utf_hash.entries));
501 printf(" >=%2d %17d %18.2f%%\n",CHAIN_LIMIT-1,chain_count[CHAIN_LIMIT-1],((float) beyond_limit*100)/utf_hash.entries);
504 printf("max. chainlength:%5d\n",max_chainlength);
506 /* avg. chainlength = sum of chainlengths / number of chains */
507 printf("avg. chainlength:%5.2f\n",(float) sum_chainlength / (utf_hash.size-chain_count[0]));
510 /******************************************************************************
511 *********************** Diverse Support-Funktionen ****************************
512 ******************************************************************************/
515 /******************** Funktion: desc_to_type **********************************
517 Findet zu einem gegebenen Typdescriptor den entsprechenden
520 ******************************************************************************/
522 u2 desc_to_type (utf *descriptor)
524 char *utf_ptr = descriptor->text; /* current position in utf text */
526 if (descriptor->blength < 1) panic ("Type-Descriptor is empty string");
528 switch (*utf_ptr++) {
533 case 'Z': return TYPE_INT;
534 case 'D': return TYPE_DOUBLE;
535 case 'F': return TYPE_FLOAT;
536 case 'J': return TYPE_LONG;
538 case '[': return TYPE_ADDRESS;
541 sprintf (logtext, "Invalid Type-Descriptor: ");
542 utf_sprint (logtext+strlen(logtext), descriptor);
548 /********************** Funktion: desc_typesize *******************************
550 Berechnet die L"ange (in Byte) eines Datenelements gegebenen Typs,
551 der durch den Typdescriptor gegeben ist.
553 ******************************************************************************/
555 u2 desc_typesize (utf *descriptor)
557 switch (desc_to_type(descriptor)) {
558 case TYPE_INT: return 4;
559 case TYPE_LONG: return 8;
560 case TYPE_FLOAT: return 4;
561 case TYPE_DOUBLE: return 8;
562 case TYPE_ADDRESS: return sizeof(voidptr);
568 /********************** function: utf_nextu2 *********************************
570 read the next unicode character from the utf string and
571 increment the utf-string pointer accordingly
573 ******************************************************************************/
575 u2 utf_nextu2(char **utf_ptr)
577 /* uncompressed unicode character */
579 /* current position in utf text */
580 unsigned char *utf = (unsigned char *) (*utf_ptr);
581 /* bytes representing the unicode character */
582 unsigned char ch1, ch2, ch3;
583 /* number of bytes used to represent the unicode character */
586 switch ((ch1 = utf[0]) >> 4) {
587 default: /* 1 byte */
591 case 0xD: /* 2 bytes */
592 if (((ch2 = utf[1]) & 0xC0) == 0x80) {
593 unsigned char high = ch1 & 0x1F;
594 unsigned char low = ch2 & 0x3F;
595 unicode_char = (high << 6) + low;
600 case 0xE: /* 2 or 3 bytes */
601 if (((ch2 = utf[1]) & 0xC0) == 0x80) {
602 if (((ch3 = utf[2]) & 0xC0) == 0x80) {
603 unsigned char low = ch3 & 0x3f;
604 unsigned char mid = ch2 & 0x3f;
605 unsigned char high = ch1 & 0x0f;
606 unicode_char = (((high << 6) + mid) << 6) + low;
614 /* update position in utf-text */
615 *utf_ptr = (char *) (utf + len);
619 /******************** Funktion: class_new **************************************
621 searches for the class with the specified name in the classes hashtable,
622 if there is no such class a new classinfo structure is created and inserted
623 into the list of classes to be loaded
625 *******************************************************************************/
627 classinfo *class_new (utf *u)
629 classinfo *c; /* hashtable element */
630 u4 key; /* hashkey computed from classname */
631 u4 slot; /* slot in hashtable */
634 key = utf_hashkey (u->text, u->blength);
635 slot = key & (class_hash.size-1);
636 c = class_hash.ptr[slot];
638 /* search external hash chain for the class */
640 if (c->name->blength == u->blength) {
641 for (i=0; i<u->blength; i++)
642 if (u->text[i] != c->name->text[i]) goto nomatch;
644 /* class found in hashtable */
649 c = c->hashlink; /* next element in external chain */
652 /* location in hashtable found, create new classinfo structure */
655 count_class_infos += sizeof(classinfo);
667 c -> interfacescount = 0;
668 c -> interfaces = NULL;
669 c -> fieldscount = 0;
671 c -> methodscount = 0;
675 c -> instancesize = 0;
676 c -> header.vftbl = NULL;
677 c -> innerclasscount = 0;
678 c -> innerclass = NULL;
680 c -> initialized = false;
682 /* prepare loading of the class */
683 list_addlast (&unloadedclasses, c);
685 /* insert class into the hashtable */
686 c->hashlink = class_hash.ptr[slot];
687 class_hash.ptr[slot] = c;
689 /* update number of hashtable-entries */
690 class_hash.entries++;
692 if ( class_hash.entries > (class_hash.size*2)) {
694 /* reorganization of hashtable, average length of
695 the external chains is approx. 2 */
699 hashtable newhash; /* the new hashtable */
701 /* create new hashtable, double the size */
702 init_hashtable(&newhash, class_hash.size*2);
703 newhash.entries = class_hash.entries;
705 /* transfer elements to new hashtable */
706 for (i=0; i<class_hash.size; i++) {
707 c = (classinfo*) class_hash.ptr[i];
709 classinfo *nextc = c -> hashlink;
710 u4 slot = (utf_hashkey(c->name->text,c->name->blength)) & (newhash.size-1);
712 c->hashlink = newhash.ptr[slot];
713 newhash.ptr[slot] = c;
719 /* dispose old table */
720 MFREE (class_hash.ptr, void*, class_hash.size);
721 class_hash = newhash;
727 /******************** Funktion: class_get **************************************
729 searches for the class with the specified name in the classes hashtable
730 if there is no such class NULL is returned
732 *******************************************************************************/
734 classinfo *class_get (utf *u)
736 classinfo *c; /* hashtable element */
737 u4 key; /* hashkey computed from classname */
738 u4 slot; /* slot in hashtable */
741 key = utf_hashkey (u->text, u->blength);
742 slot = key & (class_hash.size-1);
743 c = class_hash.ptr[slot];
745 /* search external hash-chain */
747 if (c->name->blength == u->blength) {
749 /* compare classnames */
750 for (i=0; i<u->blength; i++)
751 if (u->text[i] != c->name->text[i]) goto nomatch;
753 /* class found in hashtable */
761 /* class not found */
766 /************************** function: utf_strlen ******************************
768 determine number of unicode characters in the utf string
770 *******************************************************************************/
772 u4 utf_strlen(utf *u)
774 char *endpos = utf_end(u); /* points behind utf string */
775 char *utf_ptr = u->text; /* current position in utf text */
776 u4 len = 0; /* number of unicode characters */
778 while (utf_ptr<endpos) {
780 /* next unicode character */
781 utf_nextu2(&utf_ptr);
785 /* string ended abruptly */
786 panic("illegal utf string");