3 Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003
4 R. Grafl, A. Krall, C. Kruegel, C. Oates, R. Obermaisser,
5 M. Probst, S. Ring, E. Steiner, C. Thalinger, D. Thuernbeck,
6 P. Tomsich, J. Wenninger
8 This file is part of CACAO.
10 This program is free software; you can redistribute it and/or
11 modify it under the terms of the GNU General Public License as
12 published by the Free Software Foundation; either version 2, or (at
13 your option) any later version.
15 This program is distributed in the hope that it will be useful, but
16 WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
25 Contact: cacao@complang.tuwien.ac.at
27 Authors: Reinhard Grafl
32 Contains support functions for:
33 - Reading of Java class files
36 - additional support functions
38 $Id: tables.c 700 2003-12-07 15:54:28Z edwin $
44 #include <sys/types.h>
51 #include "threads/thread.h"
52 #include "threads/locks.h"
53 #include "toolbox/loging.h"
54 #include "toolbox/memory.h"
58 int count_utf_len = 0; /* size of utf hash */
59 int count_utf_new = 0; /* calls of utf_new */
60 int count_utf_new_found = 0; /* calls of utf_new with fast return */
62 hashtable utf_hash; /* hashtable for utf8-symbols */
63 hashtable string_hash; /* hashtable for javastrings */
64 hashtable class_hash; /* hashtable for classes */
66 /******************************************************************************
67 *********************** hashtable functions **********************************
68 ******************************************************************************/
70 /* hashsize must be power of 2 */
72 #define UTF_HASHSTART 16384 /* initial size of utf-hash */
73 #define HASHSTART 2048 /* initial size of javastring and class-hash */
76 /******************** function: init_hashtable ******************************
78 Initializes a hashtable structure and allocates memory.
79 The parameter size specifies the initial size of the hashtable.
81 *****************************************************************************/
83 void init_hashtable(hashtable *hash, u4 size)
89 hash->ptr = MNEW(void*, size);
92 for (i = 0; i < size; i++) hash->ptr[i] = NULL;
95 /*********************** function: tables_init *****************************
97 creates hashtables for symboltables
98 (called once at startup)
100 *****************************************************************************/
104 init_hashtable(&utf_hash, UTF_HASHSTART); /* hashtable for utf8-symbols */
105 init_hashtable(&string_hash, HASHSTART); /* hashtable for javastrings */
106 init_hashtable(&class_hash, HASHSTART); /* hashtable for classes */
109 count_utf_len += sizeof(utf*) * utf_hash.size;
114 /********************** function: tables_close ******************************
116 free memory for hashtables
118 *****************************************************************************/
120 void tables_close (stringdeleter del)
126 /* dispose utf symbols */
127 for (i=0; i<utf_hash.size; i++) {
130 /* process elements in external hash chain */
131 utf *nextu = u->hashlink;
132 MFREE (u->text, u1, u->blength);
138 /* dispose javastrings */
139 for (i=0; i<string_hash.size; i++) {
140 s = string_hash.ptr[i];
142 /* process elements in external hash chain */
143 literalstring *nexts = s->hashlink;
145 FREE(s, literalstring);
150 /* dispose hashtable structures */
151 MFREE (utf_hash.ptr, void*, utf_hash.size);
152 MFREE (string_hash.ptr, void*, string_hash.size);
153 MFREE (class_hash.ptr, void*, class_hash.size);
156 /********************* function: utf_display *********************************
158 write utf symbol to stdout (debugging purposes)
160 ******************************************************************************/
162 void utf_display (utf *u)
164 char *endpos = utf_end(u); /* points behind utf string */
165 char *utf_ptr = u->text; /* current position in utf text */
167 while (utf_ptr<endpos) {
169 /* read next unicode character */
170 u2 c = utf_nextu2(&utf_ptr);
171 if (c>=32 && c<=127) printf ("%c",c);
178 /************************* function: log_utf *********************************
182 ******************************************************************************/
186 char buf[MAXLOGTEXT];
191 /********************** function: log_plain_utf ******************************
193 log utf symbol (without printing "LOG: " and newline)
195 ******************************************************************************/
197 void log_plain_utf(utf *u)
199 char buf[MAXLOGTEXT];
204 /************************ function: utf_sprint *******************************
206 write utf symbol into c-string (debugging purposes)
208 ******************************************************************************/
210 void utf_sprint (char *buffer, utf *u)
212 char *endpos = utf_end(u); /* points behind utf string */
213 char *utf_ptr = u->text; /* current position in utf text */
214 u2 pos = 0; /* position in c-string */
216 while (utf_ptr<endpos)
217 /* copy next unicode character */
218 buffer[pos++] = utf_nextu2(&utf_ptr);
220 /* terminate string */
225 /********************* Funktion: utf_fprint **********************************
227 write utf symbol into file
229 ******************************************************************************/
231 void utf_fprint (FILE *file, utf *u)
233 char *endpos = utf_end(u); /* points behind utf string */
234 char *utf_ptr = u->text; /* current position in utf text */
236 while (utf_ptr<endpos) {
237 /* read next unicode character */
238 u2 c = utf_nextu2(&utf_ptr);
240 if (c>=32 && c<=127) fprintf (file,"%c",c);
241 else fprintf (file,"?");
246 /****************** internal function: utf_hashkey ***************************
248 The hashkey is computed from the utf-text by using up to 8 characters.
249 For utf-symbols longer than 15 characters 3 characters are taken from
250 the beginning and the end, 2 characters are taken from the middle.
252 ******************************************************************************/
254 #define nbs(val) ((u4) *(++text) << val) /* get next byte, left shift by val */
255 #define fbs(val) ((u4) *( text) << val) /* get first byte, left shift by val */
257 static u4 utf_hashkey (char *text, u4 length)
259 char *start_pos = text; /* pointer to utf text */
264 case 0: /* empty string */
267 case 1: return fbs(0);
268 case 2: return fbs(0) ^ nbs(3);
269 case 3: return fbs(0) ^ nbs(3) ^ nbs(5);
270 case 4: return fbs(0) ^ nbs(2) ^ nbs(4) ^ nbs(6);
271 case 5: return fbs(0) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(6);
272 case 6: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(5) ^ nbs(6);
273 case 7: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(5) ^ nbs(6);
274 case 8: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(5) ^ nbs(6) ^ nbs(7);
276 case 9: a = fbs(0) ^ nbs(1) ^ nbs(2);
278 return a ^ nbs(4) ^ nbs(5) ^ nbs(6) ^ nbs(7) ^ nbs(8);
282 a^= nbs(2) ^ nbs(3) ^ nbs(4);
284 return a ^ nbs(6) ^ nbs(7) ^ nbs(8) ^ nbs(9);
288 a^= nbs(2) ^ nbs(3) ^ nbs(4);
290 return a ^ nbs(6) ^ nbs(7) ^ nbs(8) ^ nbs(9) ^ nbs(10);
296 a^= nbs(5) ^ nbs(6) ^ nbs(7);
298 return a ^ nbs(9) ^ nbs(10);
300 case 13: a = fbs(0) ^ nbs(1);
306 return a ^ nbs(9) ^ nbs(10);
314 return a ^ nbs(9) ^ nbs(10) ^ nbs(11);
322 return a ^ nbs(9) ^ nbs(10) ^ nbs(11);
324 default: /* 3 characters from beginning */
329 /* 2 characters from middle */
330 text = start_pos + (length / 2);
335 /* 3 characters from end */
336 text = start_pos + length - 4;
341 return a ^ nbs(10) ^ nbs(11);
346 /*************************** function: utf_hashkey ***************************
348 compute the hashkey of a unicode string
350 ******************************************************************************/
352 u4 unicode_hashkey (u2 *text, u2 len)
354 return utf_hashkey((char*) text, len);
357 /************************ function: utf_new **********************************
359 Creates a new utf-symbol, the text of the symbol is passed as a
360 u1-array. The function searches the utf-hashtable for a utf-symbol
361 with this text. On success the element returned, otherwise a new
362 hashtable element is created.
364 If the number of entries in the hashtable exceeds twice the size of the
365 hashtable slots a reorganization of the hashtable is done and the utf
366 symbols are copied to a new hashtable with doubled size.
368 ******************************************************************************/
370 utf *utf_new (char *text, u2 length)
372 u4 key; /* hashkey computed from utf-text */
373 u4 slot; /* slot in hashtable */
374 utf *u; /* hashtable element */
377 /* log_text("utf_new entered");*/
382 key = utf_hashkey (text, length);
383 slot = key & (utf_hash.size-1);
384 u = utf_hash.ptr[slot];
386 /* search external hash chain for utf-symbol */
388 if (u->blength == length) {
390 /* compare text of hashtable elements */
391 for (i=0; i<length; i++)
392 if (text[i] != u->text[i]) goto nomatch;
395 count_utf_new_found++;
397 /* log_text("symbol found in hash table");*/
398 /* symbol found in hashtable */
409 u = u->hashlink; /* next element in external chain */
413 count_utf_len += sizeof(utf) + length;
416 /* location in hashtable found, create new utf element */
418 u->blength = length; /* length in bytes of utfstring */
419 u->hashlink = utf_hash.ptr[slot]; /* link in external hashchain */
420 u->text = mem_alloc(length/*JOWENN*/+1); /* allocate memory for utf-text */
421 memcpy(u->text,text,length); /* copy utf-text */
422 u->text[length]='\0';/*JOWENN*/
423 utf_hash.ptr[slot] = u; /* insert symbol into table */
425 utf_hash.entries++; /* update number of entries */
427 if ( utf_hash.entries > (utf_hash.size*2)) {
429 /* reorganization of hashtable, average length of
430 the external chains is approx. 2 */
434 hashtable newhash; /* the new hashtable */
436 /* create new hashtable, double the size */
437 init_hashtable(&newhash, utf_hash.size*2);
438 newhash.entries=utf_hash.entries;
441 count_utf_len += sizeof(utf*) * utf_hash.size;
444 /* transfer elements to new hashtable */
445 for (i=0; i<utf_hash.size; i++) {
446 u = (utf*) utf_hash.ptr[i];
448 utf *nextu = u -> hashlink;
449 u4 slot = (utf_hashkey(u->text,u->blength)) & (newhash.size-1);
451 u->hashlink = (utf*) newhash.ptr[slot];
452 newhash.ptr[slot] = u;
454 /* follow link in external hash chain */
459 /* dispose old table */
460 MFREE (utf_hash.ptr, void*, utf_hash.size);
468 /********************* function: utf_new_char ********************************
470 creates a new utf symbol, the text for this symbol is passed
471 as a c-string ( = char* )
473 ******************************************************************************/
475 utf *utf_new_char (char *text)
477 return utf_new(text, strlen(text));
481 /********************* function: utf_new_char ********************************
483 creates a new utf symbol, the text for this symbol is passed
484 as a c-string ( = char* )
485 "." characters are going to be replaced by "/". since the above function is
486 used often, this is a separte function, instead of an if
488 ******************************************************************************/
490 utf *utf_new_char_classname (char *text)
492 if (strchr(text,'.')) {
493 char *txt=strdup(text);
494 char *end=txt+strlen(txt);
497 for (c=txt;c<end;c++)
499 tmpRes=utf_new(txt,strlen(txt));
504 return utf_new(text, strlen(text));
507 /************************** Funktion: utf_show ******************************
509 writes the utf symbols in the utfhash to stdout and
510 displays the number of external hash chains grouped
511 according to the chainlength
514 *****************************************************************************/
519 #define CHAIN_LIMIT 20 /* limit for seperated enumeration */
521 u4 chain_count[CHAIN_LIMIT]; /* numbers of chains */
522 u4 max_chainlength = 0; /* maximum length of the chains */
523 u4 sum_chainlength = 0; /* sum of the chainlengths */
524 u4 beyond_limit = 0; /* number of utf-symbols in chains with length>=CHAIN_LIMIT-1 */
527 printf ("UTF-HASH:\n");
529 /* show element of utf-hashtable */
530 for (i=0; i<utf_hash.size; i++) {
531 utf *u = utf_hash.ptr[i];
533 printf ("SLOT %d: ", (int) i);
545 printf ("UTF-HASH: %d slots for %d entries\n",
546 (int) utf_hash.size, (int) utf_hash.entries );
549 if (utf_hash.entries == 0)
552 printf("chains:\n chainlength number of chains %% of utfstrings\n");
554 for (i=0;i<CHAIN_LIMIT;i++)
557 /* count numbers of hashchains according to their length */
558 for (i=0; i<utf_hash.size; i++) {
560 utf *u = (utf*) utf_hash.ptr[i];
563 /* determine chainlength */
569 /* update sum of all chainlengths */
570 sum_chainlength+=chain_length;
572 /* determine the maximum length of the chains */
573 if (chain_length>max_chainlength)
574 max_chainlength = chain_length;
576 /* update number of utf-symbols in chains with length>=CHAIN_LIMIT-1 */
577 if (chain_length>=CHAIN_LIMIT) {
578 beyond_limit+=chain_length;
579 chain_length=CHAIN_LIMIT-1;
582 /* update number of hashchains of current length */
583 chain_count[chain_length]++;
586 /* display results */
587 for (i=1;i<CHAIN_LIMIT-1;i++)
588 printf(" %2d %17d %18.2f%%\n",i,chain_count[i],(((float) chain_count[i]*i*100)/utf_hash.entries));
590 printf(" >=%2d %17d %18.2f%%\n",CHAIN_LIMIT-1,chain_count[CHAIN_LIMIT-1],((float) beyond_limit*100)/utf_hash.entries);
593 printf("max. chainlength:%5d\n",max_chainlength);
595 /* avg. chainlength = sum of chainlengths / number of chains */
596 printf("avg. chainlength:%5.2f\n",(float) sum_chainlength / (utf_hash.size-chain_count[0]));
599 /******************************************************************************
600 *********************** Misc support functions ********************************
601 ******************************************************************************/
604 /******************** Function: desc_to_type **********************************
606 Determines the corresponding Java base data type for a given type
609 ******************************************************************************/
611 u2 desc_to_type(utf *descriptor)
613 char *utf_ptr = descriptor->text; /* current position in utf text */
614 char logtext[MAXLOGTEXT];
616 if (descriptor->blength < 1) panic("Type-Descriptor is empty string");
618 switch (*utf_ptr++) {
623 case 'Z': return TYPE_INT;
624 case 'D': return TYPE_DOUBLE;
625 case 'F': return TYPE_FLOAT;
626 case 'J': return TYPE_LONG;
628 case '[': return TYPE_ADDRESS;
631 sprintf(logtext, "Invalid Type-Descriptor: ");
632 utf_sprint(logtext+strlen(logtext), descriptor);
639 /********************** Function: desc_typesize *******************************
641 Calculates the lenght in bytes needed for a data element of the type given
642 by its type descriptor.
644 ******************************************************************************/
646 u2 desc_typesize (utf *descriptor)
648 switch (desc_to_type(descriptor)) {
649 case TYPE_INT: return 4;
650 case TYPE_LONG: return 8;
651 case TYPE_FLOAT: return 4;
652 case TYPE_DOUBLE: return 8;
653 case TYPE_ADDRESS: return sizeof(voidptr);
659 /********************** function: utf_nextu2 *********************************
661 read the next unicode character from the utf string and
662 increment the utf-string pointer accordingly
664 ******************************************************************************/
666 u2 utf_nextu2(char **utf_ptr)
668 /* uncompressed unicode character */
670 /* current position in utf text */
671 unsigned char *utf = (unsigned char *) (*utf_ptr);
672 /* bytes representing the unicode character */
673 unsigned char ch1, ch2, ch3;
674 /* number of bytes used to represent the unicode character */
677 switch ((ch1 = utf[0]) >> 4) {
678 default: /* 1 byte */
682 case 0xD: /* 2 bytes */
683 if (((ch2 = utf[1]) & 0xC0) == 0x80) {
684 unsigned char high = ch1 & 0x1F;
685 unsigned char low = ch2 & 0x3F;
686 unicode_char = (high << 6) + low;
691 case 0xE: /* 2 or 3 bytes */
692 if (((ch2 = utf[1]) & 0xC0) == 0x80) {
693 if (((ch3 = utf[2]) & 0xC0) == 0x80) {
694 unsigned char low = ch3 & 0x3f;
695 unsigned char mid = ch2 & 0x3f;
696 unsigned char high = ch1 & 0x0f;
697 unicode_char = (((high << 6) + mid) << 6) + low;
705 /* update position in utf-text */
706 *utf_ptr = (char *) (utf + len);
711 /******************** Function: class_new **************************************
713 searches for the class with the specified name in the classes hashtable,
714 if there is no such class a new classinfo structure is created and inserted
715 into the list of classes to be loaded
717 *******************************************************************************/
719 classinfo *class_new(utf *u)
721 classinfo *c; /* hashtable element */
722 u4 key; /* hashkey computed from classname */
723 u4 slot; /* slot in hashtable */
726 key = utf_hashkey (u->text, u->blength);
727 slot = key & (class_hash.size-1);
728 c = class_hash.ptr[slot];
730 /* search external hash chain for the class */
732 if (c->name->blength == u->blength) {
733 for (i=0; i<u->blength; i++)
734 if (u->text[i] != c->name->text[i]) goto nomatch;
736 /* class found in hashtable */
741 c = c->hashlink; /* next element in external chain */
744 /* location in hashtable found, create new classinfo structure */
747 count_class_infos += sizeof(classinfo);
750 c = GCNEW (classinfo,1); /*JOWENN: NEW*/
760 c -> interfacescount = 0;
761 c -> interfaces = NULL;
762 c -> fieldscount = 0;
764 c -> methodscount = 0;
769 c -> instancesize = 0;
770 c -> header.vftbl = NULL;
771 c -> innerclasscount = 0;
772 c -> innerclass = NULL;
774 c -> initialized = false;
775 c -> classvftbl = false;
779 /* prepare loading of the class */
780 list_addlast(&unloadedclasses, c);
782 /* insert class into the hashtable */
783 c->hashlink = class_hash.ptr[slot];
784 class_hash.ptr[slot] = c;
786 /* update number of hashtable-entries */
787 class_hash.entries++;
789 if (class_hash.entries > (class_hash.size*2)) {
791 /* reorganization of hashtable, average length of
792 the external chains is approx. 2 */
796 hashtable newhash; /* the new hashtable */
798 /* create new hashtable, double the size */
799 init_hashtable(&newhash, class_hash.size*2);
800 newhash.entries = class_hash.entries;
802 /* transfer elements to new hashtable */
803 for (i = 0; i < class_hash.size; i++) {
804 c = (classinfo*) class_hash.ptr[i];
806 classinfo *nextc = c->hashlink;
807 u4 slot = (utf_hashkey(c->name->text, c->name->blength)) & (newhash.size - 1);
809 c->hashlink = newhash.ptr[slot];
810 newhash.ptr[slot] = c;
816 /* dispose old table */
817 MFREE(class_hash.ptr, void*, class_hash.size);
818 class_hash = newhash;
821 /* Array classes need further initialization. */
822 if (u->text[0] == '[')
828 /******************** Function: class_get **************************************
830 searches for the class with the specified name in the classes hashtable
831 if there is no such class NULL is returned
833 *******************************************************************************/
835 classinfo *class_get(utf *u)
837 classinfo *c; /* hashtable element */
838 u4 key; /* hashkey computed from classname */
839 u4 slot; /* slot in hashtable */
842 key = utf_hashkey (u->text, u->blength);
843 slot = key & (class_hash.size-1);
844 c = class_hash.ptr[slot];
847 log_text("class_get: looking for class:");
849 /* search external hash-chain */
851 if (c->name->blength == u->blength) {
853 /* compare classnames */
854 for (i=0; i<u->blength; i++)
855 if (u->text[i] != c->name->text[i]) goto nomatch;
857 log_text("class_get: class found");
860 utf_display(c->name); */
861 /* class found in hashtable */
869 /* class not found */
873 /***************** Function: class_array_of ***********************************
875 Returns an array class with the given component class.
876 The array class is dynamically created if neccessary.
878 *******************************************************************************/
880 classinfo *class_array_of(classinfo *component)
885 /* Assemble the array class name */
886 namelen = component->name->blength;
888 if (component->name->text[0] == '[') {
889 /* the component is itself an array */
890 namebuf = DMNEW(char,namelen+1);
892 memcpy(namebuf+1,component->name->text,namelen);
896 /* the component is a non-array class */
897 namebuf = DMNEW(char,namelen+3);
900 memcpy(namebuf+2,component->name->text,namelen);
901 namebuf[2+namelen] = ';';
905 return class_new( utf_new(namebuf,namelen) );
908 /*************** Function: class_multiarray_of ********************************
910 Returns an array class with the given dimension and element class.
911 The array class is dynamically created if neccessary.
913 *******************************************************************************/
915 classinfo *class_multiarray_of(int dim,classinfo *element)
921 panic("Invalid array dimension requested");
923 /* Assemble the array class name */
924 namelen = element->name->blength;
926 if (element->name->text[0] == '[') {
927 /* the element is itself an array */
928 namebuf = DMNEW(char,namelen+dim);
929 memcpy(namebuf+dim,element->name->text,namelen);
933 /* the element is a non-array class */
934 namebuf = DMNEW(char,namelen+2+dim);
936 memcpy(namebuf+dim+1,element->name->text,namelen);
938 namebuf[namelen-1] = ';';
940 memset(namebuf,'[',dim);
942 return class_new( utf_new(namebuf,namelen) );
945 /************************** function: utf_strlen ******************************
947 determine number of unicode characters in the utf string
949 *******************************************************************************/
951 u4 utf_strlen(utf *u)
953 char *endpos = utf_end(u); /* points behind utf string */
954 char *utf_ptr = u->text; /* current position in utf text */
955 u4 len = 0; /* number of unicode characters */
957 while (utf_ptr<endpos) {
959 /* next unicode character */
960 utf_nextu2(&utf_ptr);
964 /* string ended abruptly */
965 panic("illegal utf string");
972 * These are local overrides for various environment variables in Emacs.
973 * Please do not remove this and leave it at the end of the file, where
974 * Emacs will automagically detect them.
975 * ---------------------------------------------------------------------
978 * indent-tabs-mode: t