0d5907521888bf5d10724ecc01535201f531fad2
[cacao.git] / src / vm / tables.c
1 /* tables.c - 
2
3    Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003
4    R. Grafl, A. Krall, C. Kruegel, C. Oates, R. Obermaisser,
5    M. Probst, S. Ring, E. Steiner, C. Thalinger, D. Thuernbeck,
6    P. Tomsich, J. Wenninger
7
8    This file is part of CACAO.
9
10    This program is free software; you can redistribute it and/or
11    modify it under the terms of the GNU General Public License as
12    published by the Free Software Foundation; either version 2, or (at
13    your option) any later version.
14
15    This program is distributed in the hope that it will be useful, but
16    WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18    General Public License for more details.
19
20    You should have received a copy of the GNU General Public License
21    along with this program; if not, write to the Free Software
22    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
23    02111-1307, USA.
24
25    Contact: cacao@complang.tuwien.ac.at
26
27    Authors: Reinhard Grafl
28
29    Changes: Mark Probst
30             Andreas Krall
31
32    Contains support functions for:
33        - Reading of Java class files
34        - Unicode symbols
35        - the heap
36        - additional support functions
37
38    $Id: tables.c 1185 2004-06-19 12:23:13Z twisti $
39
40 */
41
42 #include "global.h"
43
44 #include <string.h>
45 #include <stdlib.h>
46 #include <assert.h>
47 #include <sys/types.h>
48 #include <sys/mman.h>
49 #include <unistd.h>
50 #include "types.h"
51 #include "main.h"
52 #include "tables.h"
53 #include "loader.h"
54 #include "asmpart.h"
55 #include "threads/thread.h"
56 #include "threads/locks.h"
57 #include "toolbox/logging.h"
58 #include "toolbox/memory.h"
59
60
61 /* statistics */
62 int count_utf_len = 0;         /* size of utf hash                  */
63 int count_utf_new = 0;         /* calls of utf_new                  */
64 int count_utf_new_found  = 0;  /* calls of utf_new with fast return */
65
66 hashtable utf_hash;     /* hashtable for utf8-symbols */
67 hashtable string_hash;  /* hashtable for javastrings  */
68 hashtable class_hash;   /* hashtable for classes      */
69
70 list unlinkedclasses;   /* this is only used for eager class loading          */
71
72
73 /******************************************************************************
74  *********************** hashtable functions **********************************
75  ******************************************************************************/
76
77 /* hashsize must be power of 2 */
78
79 #define UTF_HASHSTART   16384   /* initial size of utf-hash */    
80 #define HASHSTART        2048   /* initial size of javastring and class-hash */
81
82
83 /******************** function: init_hashtable ******************************
84
85     Initializes a hashtable structure and allocates memory.
86     The parameter size specifies the initial size of the hashtable.
87         
88 *****************************************************************************/
89
90 void init_hashtable(hashtable *hash, u4 size)
91 {
92         u4 i;
93
94         hash->entries = 0;
95         hash->size    = size;
96         hash->ptr     = MNEW(void*, size);
97
98         /* clear table */
99         for (i = 0; i < size; i++) hash->ptr[i] = NULL;
100 }
101
102
103 /*********************** function: tables_init  *****************************
104
105     creates hashtables for symboltables 
106         (called once at startup)                         
107         
108 *****************************************************************************/
109
110 void tables_init()
111 {
112         init_hashtable(&utf_hash,    UTF_HASHSTART);  /* hashtable for utf8-symbols */
113         init_hashtable(&string_hash, HASHSTART);      /* hashtable for javastrings */
114         init_hashtable(&class_hash,  HASHSTART);      /* hashtable for classes */ 
115
116 /*      if (opt_eager) */
117 /*              list_init(&unlinkedclasses, OFFSET(classinfo, listnode)); */
118
119 #if defined(STATISTICS)
120         if (opt_stat)
121                 count_utf_len += sizeof(utf*) * utf_hash.size;
122 #endif
123 }
124
125
126 /********************** function: tables_close ******************************
127
128         free memory for hashtables                    
129         
130 *****************************************************************************/
131
132 void tables_close()
133 {
134         utf *u = NULL;
135         literalstring *s;
136         u4 i;
137         
138         /* dispose utf symbols */
139         for (i = 0; i < utf_hash.size; i++) {
140                 u = utf_hash.ptr[i];
141                 while (u) {
142                         /* process elements in external hash chain */
143                         utf *nextu = u->hashlink;
144                         MFREE(u->text, u1, u->blength);
145                         FREE(u, utf);
146                         u = nextu;
147                 }       
148         }
149
150         /* dispose javastrings */
151         for (i = 0; i < string_hash.size; i++) {
152                 s = string_hash.ptr[i];
153                 while (u) {
154                         /* process elements in external hash chain */
155                         literalstring *nexts = s->hashlink;
156                         literalstring_free(s->string);
157                         FREE(s, literalstring);
158                         s = nexts;
159                 }       
160         }
161
162         /* dispose hashtable structures */
163         MFREE(utf_hash.ptr,    void*, utf_hash.size);
164         MFREE(string_hash.ptr, void*, string_hash.size);
165         MFREE(class_hash.ptr,  void*, class_hash.size);
166 }
167
168
169 /********************* function: utf_display *********************************
170
171         write utf symbol to stdout (debugging purposes)
172
173 ******************************************************************************/
174
175 void utf_display(utf *u)
176 {
177     char *endpos  = utf_end(u);  /* points behind utf string       */
178     char *utf_ptr = u->text;     /* current position in utf text   */
179
180         if (!u)
181                 return;
182
183     while (utf_ptr < endpos) {
184                 /* read next unicode character */                
185                 u2 c = utf_nextu2(&utf_ptr);
186                 if (c >= 32 && c <= 127) printf("%c", c);
187                 else printf("?");
188         }
189
190         fflush(stdout);
191 }
192
193
194 /********************* function: utf_display *********************************
195
196         write utf symbol to stdout (debugging purposes)
197
198 ******************************************************************************/
199
200 void utf_display_classname(utf *u)
201 {
202     char *endpos  = utf_end(u);  /* points behind utf string       */
203     char *utf_ptr = u->text;     /* current position in utf text   */
204
205         if (!u)
206                 return;
207
208     while (utf_ptr < endpos) {
209                 /* read next unicode character */                
210                 u2 c = utf_nextu2(&utf_ptr);
211                 if (c == '/') c = '.';
212                 if (c >= 32 && c <= 127) printf("%c", c);
213                 else printf("?");
214         }
215
216         fflush(stdout);
217 }
218
219
220 /************************* function: log_utf *********************************
221
222         log utf symbol
223
224 ******************************************************************************/
225
226 void log_utf(utf *u)
227 {
228         char buf[MAXLOGTEXT];
229         utf_sprint(buf, u);
230         dolog("%s", buf);
231 }
232
233
234 /********************** function: log_plain_utf ******************************
235
236         log utf symbol (without printing "LOG: " and newline)
237
238 ******************************************************************************/
239
240 void log_plain_utf(utf *u)
241 {
242         char buf[MAXLOGTEXT];
243         utf_sprint(buf, u);
244         dolog_plain("%s", buf);
245 }
246
247
248 /************************ function: utf_sprint *******************************
249         
250     write utf symbol into c-string (debugging purposes)                                          
251
252 ******************************************************************************/
253
254 void utf_sprint(char *buffer, utf *u)
255 {
256     char *endpos  = utf_end(u);  /* points behind utf string       */
257     char *utf_ptr = u->text;     /* current position in utf text   */ 
258     u2 pos = 0;                  /* position in c-string           */
259
260     while (utf_ptr < endpos) 
261                 /* copy next unicode character */       
262                 buffer[pos++] = utf_nextu2(&utf_ptr);
263
264     /* terminate string */
265     buffer[pos] = '\0';
266 }
267
268
269 /************************ function: utf_sprint_classname *********************
270         
271     write utf symbol into c-string (debugging purposes)
272
273 ******************************************************************************/ 
274
275 void utf_sprint_classname(char *buffer, utf *u)
276 {
277     char *endpos  = utf_end(u);  /* points behind utf string       */
278     char *utf_ptr = u->text;     /* current position in utf text   */ 
279     u2 pos = 0;                  /* position in c-string           */
280
281     while (utf_ptr < endpos) {
282                 /* copy next unicode character */       
283                 u2 c = utf_nextu2(&utf_ptr);
284                 if (c == '/') c = '.';
285                 buffer[pos++] = c;
286         }
287
288     /* terminate string */
289     buffer[pos] = '\0';
290 }
291
292
293 /********************* Funktion: utf_fprint **********************************
294         
295     write utf symbol into file          
296
297 ******************************************************************************/
298
299 void utf_fprint(FILE *file, utf *u)
300 {
301     char *endpos  = utf_end(u);  /* points behind utf string       */
302     char *utf_ptr = u->text;     /* current position in utf text   */ 
303
304     if (!u)
305                 return;
306
307     while (utf_ptr < endpos) { 
308                 /* read next unicode character */                
309                 u2 c = utf_nextu2(&utf_ptr);                            
310
311                 if (c >= 32 && c <= 127) fprintf(file, "%c", c);
312                 else fprintf(file, "?");
313         }
314 }
315
316
317 /********************* Funktion: utf_fprint **********************************
318         
319     write utf symbol into file          
320
321 ******************************************************************************/
322
323 void utf_fprint_classname(FILE *file, utf *u)
324 {
325     char *endpos  = utf_end(u);  /* points behind utf string       */
326     char *utf_ptr = u->text;     /* current position in utf text   */ 
327
328     if (!u)
329                 return;
330
331     while (utf_ptr < endpos) { 
332                 /* read next unicode character */                
333                 u2 c = utf_nextu2(&utf_ptr);                            
334                 if (c == '/') c = '.';
335
336                 if (c >= 32 && c <= 127) fprintf(file, "%c", c);
337                 else fprintf(file, "?");
338         }
339 }
340
341
342 /****************** internal function: utf_hashkey ***************************
343
344         The hashkey is computed from the utf-text by using up to 8 characters.
345         For utf-symbols longer than 15 characters 3 characters are taken from
346         the beginning and the end, 2 characters are taken from the middle.
347
348 ******************************************************************************/ 
349
350 #define nbs(val) ((u4) *(++text) << val) /* get next byte, left shift by val  */
351 #define fbs(val) ((u4) *(  text) << val) /* get first byte, left shift by val */
352
353 static u4 utf_hashkey(char *text, u4 length)
354 {
355         char *start_pos = text; /* pointer to utf text */
356         u4 a;
357
358         switch (length) {               
359                 
360         case 0: /* empty string */
361                 return 0;
362
363         case 1: return fbs(0);
364         case 2: return fbs(0) ^ nbs(3);
365         case 3: return fbs(0) ^ nbs(3) ^ nbs(5);
366         case 4: return fbs(0) ^ nbs(2) ^ nbs(4) ^ nbs(6);
367         case 5: return fbs(0) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(6);
368         case 6: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(5) ^ nbs(6);
369         case 7: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(5) ^ nbs(6);
370         case 8: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(5) ^ nbs(6) ^ nbs(7);
371
372         case 9:
373                 a = fbs(0);
374                 a ^= nbs(1);
375                 a ^= nbs(2);
376                 text++;
377                 return a ^ nbs(4) ^ nbs(5) ^ nbs(6) ^ nbs(7) ^ nbs(8);
378
379         case 10:
380                 a = fbs(0);
381                 text++;
382                 a ^= nbs(2);
383                 a ^= nbs(3);
384                 a ^= nbs(4);
385                 text++;
386                 return a ^ nbs(6) ^ nbs(7) ^ nbs(8) ^ nbs(9);
387
388         case 11:
389                 a = fbs(0);
390                 text++;
391                 a ^= nbs(2);
392                 a ^= nbs(3);
393                 a ^= nbs(4);
394                 text++;
395                 return a ^ nbs(6) ^ nbs(7) ^ nbs(8) ^ nbs(9) ^ nbs(10);
396
397         case 12:
398                 a = fbs(0);
399                 text += 2;
400                 a ^= nbs(2);
401                 a ^= nbs(3);
402                 text++;
403                 a ^= nbs(5);
404                 a ^= nbs(6);
405                 a ^= nbs(7);
406                 text++;
407                 return a ^ nbs(9) ^ nbs(10);
408
409         case 13:
410                 a = fbs(0);
411                 a ^= nbs(1);
412                 text++;
413                 a ^= nbs(3);
414                 a ^= nbs(4);
415                 text += 2;      
416                 a ^= nbs(7);
417                 a ^= nbs(8);
418                 text += 2;
419                 return a ^ nbs(9) ^ nbs(10);
420
421         case 14:
422                 a = fbs(0);
423                 text += 2;      
424                 a ^= nbs(3);
425                 a ^= nbs(4);
426                 text += 2;      
427                 a ^= nbs(7);
428                 a ^= nbs(8);
429                 text += 2;
430                 return a ^ nbs(9) ^ nbs(10) ^ nbs(11);
431
432         case 15:
433                 a = fbs(0);
434                 text += 2;      
435                 a ^= nbs(3);
436                 a ^= nbs(4);
437                 text += 2;      
438                 a ^= nbs(7);
439                 a ^= nbs(8);
440                 text += 2;
441                 return a ^ nbs(9) ^ nbs(10) ^ nbs(11);
442
443         default:  /* 3 characters from beginning */
444                 a = fbs(0);
445                 text += 2;
446                 a ^= nbs(3);
447                 a ^= nbs(4);
448
449                 /* 2 characters from middle */
450                 text = start_pos + (length / 2);
451                 a ^= fbs(5);
452                 text += 2;
453                 a ^= nbs(6);    
454
455                 /* 3 characters from end */
456                 text = start_pos + length - 4;
457
458                 a ^= fbs(7);
459                 text++;
460
461                 return a ^ nbs(10) ^ nbs(11);
462     }
463 }
464
465
466 /*************************** function: utf_hashkey ***************************
467
468     compute the hashkey of a unicode string
469
470 ******************************************************************************/ 
471
472 u4 unicode_hashkey(u2 *text, u2 len)
473 {
474         return utf_hashkey((char*) text, len);
475 }
476
477
478 /************************ function: utf_new **********************************
479
480         Creates a new utf-symbol, the text of the symbol is passed as a 
481         u1-array. The function searches the utf-hashtable for a utf-symbol 
482         with this text. On success the element returned, otherwise a new 
483         hashtable element is created.
484
485         If the number of entries in the hashtable exceeds twice the size of the
486         hashtable slots a reorganization of the hashtable is done and the utf 
487         symbols are copied to a new hashtable with doubled size.
488
489 ******************************************************************************/
490
491 utf *utf_new_intern(char *text, u2 length)
492 {
493         u4 key;            /* hashkey computed from utf-text */
494         u4 slot;           /* slot in hashtable */
495         utf *u;            /* hashtable element */
496         u2 i;
497
498 #ifdef STATISTICS
499         if (opt_stat)
500                 count_utf_new++;
501 #endif
502
503         key  = utf_hashkey(text, length);
504         slot = key & (utf_hash.size-1);
505         u    = utf_hash.ptr[slot];
506
507         /* search external hash chain for utf-symbol */
508         while (u) {
509                 if (u->blength == length) {
510
511                         /* compare text of hashtable elements */
512                         for (i = 0; i < length; i++)
513                                 if (text[i] != u->text[i]) goto nomatch;
514                         
515 #ifdef STATISTICS
516                         if (opt_stat)
517                                 count_utf_new_found++;
518 #endif
519 /*                      log_text("symbol found in hash table");*/
520                         /* symbol found in hashtable */
521 /*                                      utf_display(u);
522                                         {
523                                                 utf blup;
524                                                 blup.blength=length;
525                                                 blup.text=text;
526                                                 utf_display(&blup);
527                                         }*/
528                         return u;
529                 }
530         nomatch:
531                 u = u->hashlink; /* next element in external chain */
532         }
533
534 #ifdef STATISTICS
535         if (opt_stat)
536                 count_utf_len += sizeof(utf) + length;
537 #endif
538
539         /* location in hashtable found, create new utf element */
540         u = NEW(utf);
541         u->blength  = length;               /* length in bytes of utfstring       */
542         u->hashlink = utf_hash.ptr[slot];   /* link in external hashchain         */
543         u->text     = mem_alloc(length + 1);/* allocate memory for utf-text       */
544         memcpy(u->text, text, length);      /* copy utf-text                      */
545         u->text[length] = '\0';
546         utf_hash.ptr[slot] = u;             /* insert symbol into table           */
547
548         utf_hash.entries++;                 /* update number of entries           */
549
550         if (utf_hash.entries > (utf_hash.size * 2)) {
551
552         /* reorganization of hashtable, average length of 
553            the external chains is approx. 2                */  
554
555                 u4 i;
556                 utf *u;
557                 hashtable newhash; /* the new hashtable */
558
559                 /* create new hashtable, double the size */
560                 init_hashtable(&newhash, utf_hash.size * 2);
561                 newhash.entries = utf_hash.entries;
562
563 #ifdef STATISTICS
564                 if (opt_stat)
565                         count_utf_len += sizeof(utf*) * utf_hash.size;
566 #endif
567
568                 /* transfer elements to new hashtable */
569                 for (i = 0; i < utf_hash.size; i++) {
570                         u = (utf *) utf_hash.ptr[i];
571                         while (u) {
572                                 utf *nextu = u->hashlink;
573                                 u4 slot = utf_hashkey(u->text, u->blength) & (newhash.size - 1);
574                                                 
575                                 u->hashlink = (utf *) newhash.ptr[slot];
576                                 newhash.ptr[slot] = u;
577
578                                 /* follow link in external hash chain */
579                                 u = nextu;
580                         }
581                 }
582         
583                 /* dispose old table */
584                 MFREE(utf_hash.ptr, void*, utf_hash.size);
585                 utf_hash = newhash;
586         }
587
588         return u;
589 }
590
591
592 utf *utf_new(char *text, u2 length)
593 {
594     utf *r;
595
596 #if defined(USE_THREADS) && defined(NATIVE_THREADS)
597     tables_lock();
598 #endif
599
600     r = utf_new_intern(text, length);
601
602 #if defined(USE_THREADS) && defined(NATIVE_THREADS)
603     tables_unlock();
604 #endif
605
606     return r;
607 }
608
609
610 /********************* function: utf_new_char ********************************
611
612     creates a new utf symbol, the text for this symbol is passed
613     as a c-string ( = char* )
614
615 ******************************************************************************/
616
617 utf *utf_new_char(char *text)
618 {
619         return utf_new(text, strlen(text));
620 }
621
622
623 /********************* function: utf_new_char ********************************
624
625     creates a new utf symbol, the text for this symbol is passed
626     as a c-string ( = char* )
627     "." characters are going to be replaced by "/". since the above function is
628     used often, this is a separte function, instead of an if
629
630 ******************************************************************************/
631
632 utf *utf_new_char_classname(char *text)
633 {
634         if (strchr(text, '.')) {
635                 char *txt = strdup(text);
636                 char *end = txt + strlen(txt);
637                 char *c;
638                 utf *tmpRes;
639                 for (c = txt; c < end; c++)
640                         if (*c == '.') *c = '/';
641                 tmpRes = utf_new(txt, strlen(txt));
642                 free(txt);
643                 return tmpRes;
644
645         } else
646                 return utf_new(text, strlen(text));
647 }
648
649
650 /************************** Funktion: utf_show ******************************
651
652     writes the utf symbols in the utfhash to stdout and
653     displays the number of external hash chains grouped 
654     according to the chainlength
655     (debugging purposes)
656
657 *****************************************************************************/
658
659 void utf_show()
660 {
661
662 #define CHAIN_LIMIT 20               /* limit for seperated enumeration */
663
664         u4 chain_count[CHAIN_LIMIT]; /* numbers of chains */
665         u4 max_chainlength = 0;      /* maximum length of the chains */
666         u4 sum_chainlength = 0;      /* sum of the chainlengths */
667         u4 beyond_limit = 0;         /* number of utf-symbols in chains with length>=CHAIN_LIMIT-1 */
668         u4 i;
669
670         printf ("UTF-HASH:\n");
671
672         /* show element of utf-hashtable */
673         for (i=0; i<utf_hash.size; i++) {
674                 utf *u = utf_hash.ptr[i];
675                 if (u) {
676                         printf ("SLOT %d: ", (int) i);
677                         while (u) {
678                                 printf ("'");
679                                 utf_display (u);
680                                 printf ("' ");
681                                 u = u->hashlink;
682                         }       
683                         printf ("\n");
684                 }
685                 
686         }
687
688         printf ("UTF-HASH: %d slots for %d entries\n", 
689                         (int) utf_hash.size, (int) utf_hash.entries );
690
691
692         if (utf_hash.entries == 0)
693                 return;
694
695         printf("chains:\n  chainlength    number of chains    %% of utfstrings\n");
696
697         for (i=0;i<CHAIN_LIMIT;i++)
698                 chain_count[i]=0;
699
700         /* count numbers of hashchains according to their length */
701         for (i=0; i<utf_hash.size; i++) {
702                   
703                 utf *u = (utf*) utf_hash.ptr[i];
704                 u4 chain_length = 0;
705
706                 /* determine chainlength */
707                 while (u) {
708                         u = u->hashlink;
709                         chain_length++;
710                 }
711
712                 /* update sum of all chainlengths */
713                 sum_chainlength+=chain_length;
714
715                 /* determine the maximum length of the chains */
716                 if (chain_length>max_chainlength)
717                         max_chainlength = chain_length;
718
719                 /* update number of utf-symbols in chains with length>=CHAIN_LIMIT-1 */
720                 if (chain_length>=CHAIN_LIMIT) {
721                         beyond_limit+=chain_length;
722                         chain_length=CHAIN_LIMIT-1;
723                 }
724
725                 /* update number of hashchains of current length */
726                 chain_count[chain_length]++;
727         }
728
729         /* display results */  
730         for (i=1;i<CHAIN_LIMIT-1;i++) 
731                 printf("       %2d %17d %18.2f%%\n",i,chain_count[i],(((float) chain_count[i]*i*100)/utf_hash.entries));
732           
733         printf("     >=%2d %17d %18.2f%%\n",CHAIN_LIMIT-1,chain_count[CHAIN_LIMIT-1],((float) beyond_limit*100)/utf_hash.entries);
734
735
736         printf("max. chainlength:%5d\n",max_chainlength);
737
738         /* avg. chainlength = sum of chainlengths / number of chains */
739         printf("avg. chainlength:%5.2f\n",(float) sum_chainlength / (utf_hash.size-chain_count[0]));
740 }
741
742 /******************************************************************************
743 *********************** Misc support functions ********************************
744 ******************************************************************************/
745
746
747 /******************** Function: desc_to_type **********************************
748    
749         Determines the corresponding Java base data type for a given type
750         descriptor.
751         
752 ******************************************************************************/
753
754 u2 desc_to_type(utf *descriptor)
755 {
756         char *utf_ptr = descriptor->text;  /* current position in utf text */
757         char logtext[MAXLOGTEXT];
758
759         if (descriptor->blength < 1) panic("Type-Descriptor is empty string");
760         
761         switch (*utf_ptr++) {
762         case 'B': 
763         case 'C':
764         case 'I':
765         case 'S':  
766         case 'Z':  return TYPE_INT;
767         case 'D':  return TYPE_DOUBLE;
768         case 'F':  return TYPE_FLOAT;
769         case 'J':  return TYPE_LONG;
770         case 'L':
771         case '[':  return TYPE_ADDRESS;
772         }
773                         
774         sprintf(logtext, "Invalid Type-Descriptor: ");
775         utf_sprint(logtext+strlen(logtext), descriptor);
776         error("%s",logtext);
777
778         return 0;
779 }
780
781
782 /********************** Function: desc_typesize *******************************
783
784         Calculates the lenght in bytes needed for a data element of the type given
785         by its type descriptor.
786         
787 ******************************************************************************/
788
789 u2 desc_typesize(utf *descriptor)
790 {
791         switch (desc_to_type(descriptor)) {
792         case TYPE_INT:     return 4;
793         case TYPE_LONG:    return 8;
794         case TYPE_FLOAT:   return 4;
795         case TYPE_DOUBLE:  return 8;
796         case TYPE_ADDRESS: return sizeof(voidptr);
797         default:           return 0;
798         }
799 }
800
801
802 /********************** function: utf_nextu2 *********************************
803
804     read the next unicode character from the utf string and
805     increment the utf-string pointer accordingly
806
807 ******************************************************************************/
808
809 u2 utf_nextu2(char **utf_ptr) 
810 {
811     /* uncompressed unicode character */
812     u2 unicode_char = 0;
813     /* current position in utf text */  
814     unsigned char *utf = (unsigned char *) (*utf_ptr);
815     /* bytes representing the unicode character */
816     unsigned char ch1, ch2, ch3;
817     /* number of bytes used to represent the unicode character */
818     int len = 0;
819         
820     switch ((ch1 = utf[0]) >> 4) {
821         default: /* 1 byte */
822                 (*utf_ptr)++;
823                 return (u2) ch1;
824         case 0xC: 
825         case 0xD: /* 2 bytes */
826                 if (((ch2 = utf[1]) & 0xC0) == 0x80) {
827                         unsigned char high = ch1 & 0x1F;
828                         unsigned char low  = ch2 & 0x3F;
829                         unicode_char = (high << 6) + low;
830                         len = 2;
831                 }
832                 break;
833
834         case 0xE: /* 2 or 3 bytes */
835                 if (((ch2 = utf[1]) & 0xC0) == 0x80) {
836                         if (((ch3 = utf[2]) & 0xC0) == 0x80) {
837                                 unsigned char low  = ch3 & 0x3f;
838                                 unsigned char mid  = ch2 & 0x3f;
839                                 unsigned char high = ch1 & 0x0f;
840                                 unicode_char = (((high << 6) + mid) << 6) + low;
841                                 len = 3;
842                         } else
843                                 len = 2;                                           
844                 }
845                 break;
846     }
847
848     /* update position in utf-text */
849     *utf_ptr = (char *) (utf + len);
850     return unicode_char;
851 }
852
853
854 /********************* function: is_valid_utf ********************************
855
856     return true if the given string is a valid UTF-8 string
857
858     utf_ptr...points to first character
859     end_pos...points after last character
860
861 ******************************************************************************/
862
863 static unsigned long min_codepoint[6] = {0,1L<<7,1L<<11,1L<<16,1L<<21,1L<<26};
864
865 bool
866 is_valid_utf(char *utf_ptr,char *end_pos)
867 {
868         int bytes;
869         int len,i;
870         char c;
871         unsigned long v;
872
873         if (end_pos < utf_ptr) return false;
874         bytes = end_pos - utf_ptr;
875         while (bytes--) {
876                 c = *utf_ptr++;
877                 /*dolog("%c %02x",c,c);*/
878                 if (!c) return false;                     /* 0x00 is not allowed */
879                 if ((c & 0x80) == 0) continue;            /* ASCII */
880
881                 if      ((c & 0xe0) == 0xc0) len = 1;     /* 110x xxxx */
882                 else if ((c & 0xf0) == 0xe0) len = 2;     /* 1110 xxxx */
883                 else if ((c & 0xf8) == 0xf0) len = 3;     /* 1111 0xxx */
884                 else if ((c & 0xfc) == 0xf8) len = 4;     /* 1111 10xx */
885                 else if ((c & 0xfe) == 0xfc) len = 5;     /* 1111 110x */
886                 else return false;                        /* invalid leading byte */
887
888                 if (len > 2) return false;                /* Java limitation */
889
890                 v = (unsigned long)c & (0x3f >> len);
891                 
892                 if ((bytes -= len) < 0) return false;     /* missing bytes */
893
894                 for (i = len; i--; ) {
895                         c = *utf_ptr++;
896                         /*dolog("    %c %02x",c,c);*/
897                         if ((c & 0xc0) != 0x80)               /* 10xx xxxx */
898                                 return false;
899                         v = (v<<6) | (c & 0x3f);
900                 }
901
902                 /*              dolog("v=%d",v);*/
903
904                 if (v == 0) {
905                         if (len != 1) return false;           /* Java special */
906                 }
907                 else {
908                         /* Sun Java seems to allow overlong UTF-8 encodings */
909                         
910                         if (v < min_codepoint[len]) { /* overlong UTF-8 */
911                                 if (!opt_liberalutf)
912                                         fprintf(stderr,"WARNING: Overlong UTF-8 sequence found.\n");
913                                 /* XXX change this to panic? */
914                         }
915                 }
916
917                 /* surrogates in UTF-8 seem to be allowed in Java classfiles */
918                 /* if (v >= 0xd800 && v <= 0xdfff) return false; */ /* surrogates */
919
920                 /* even these seem to be allowed */
921                 /* if (v == 0xfffe || v == 0xffff) return false; */ /* invalid codepoints */
922         }
923
924         return true;
925 }
926  
927 /********************* function: is_valid_name *******************************
928
929     return true if the given string may be used as a class/field/method name.
930     (Currently this only disallows empty strings and control characters.)
931
932     NOTE: The string is assumed to have passed is_valid_utf!
933
934     utf_ptr...points to first character
935     end_pos...points after last character
936
937 ******************************************************************************/
938
939 bool
940 is_valid_name(char *utf_ptr,char *end_pos)
941 {
942         if (end_pos <= utf_ptr) return false; /* disallow empty names */
943
944         while (utf_ptr < end_pos) {
945                 unsigned char c = *utf_ptr++;
946
947                 if (c < 0x20) return false; /* disallow control characters */
948                 if (c == 0xc0 && (unsigned char)*utf_ptr == 0x80) return false; /* disallow zero */
949         }
950         return true;
951 }
952
953 bool
954 is_valid_name_utf(utf *u)
955 {
956         return is_valid_name(u->text,utf_end(u));
957 }
958
959 /******************** Function: class_new **************************************
960
961     searches for the class with the specified name in the classes hashtable,
962     if there is no such class a new classinfo structure is created and inserted
963     into the list of classes to be loaded
964
965 *******************************************************************************/
966
967 classinfo *class_new_intern(utf *classname)
968 {
969         classinfo *c;     /* hashtable element */
970         u4 key;           /* hashkey computed from classname */
971         u4 slot;          /* slot in hashtable */
972         u2 i;
973
974         key  = utf_hashkey(classname->text, classname->blength);
975         slot = key & (class_hash.size - 1);
976         c    = class_hash.ptr[slot];
977
978         /* search external hash chain for the class */
979         while (c) {
980                 if (c->name->blength == classname->blength) {
981                         for (i = 0; i < classname->blength; i++)
982                                 if (classname->text[i] != c->name->text[i]) goto nomatch;
983                                                 
984                         /* class found in hashtable */
985                         return c;
986                 }
987                         
988         nomatch:
989                 c = c->hashlink; /* next element in external chain */
990         }
991
992         /* location in hashtable found, create new classinfo structure */
993
994 #if defined(STATISTICS)
995         if (opt_stat)
996                 count_class_infos += sizeof(classinfo);
997 #endif
998
999         if (initverbose) {
1000                 char logtext[MAXLOGTEXT];
1001                 sprintf(logtext, "Creating class: ");
1002                 utf_sprint_classname(logtext + strlen(logtext), classname);
1003                 log_text(logtext);
1004         }
1005
1006         c = GCNEW(classinfo, 1); /*JOWENN: NEW*/
1007         /*c=NEW(classinfo);*/
1008         c->vmClass = 0;
1009         c->flags = 0;
1010         c->name = classname;
1011         c->packagename = NULL;
1012         c->cpcount = 0;
1013         c->cptags = NULL;
1014         c->cpinfos = NULL;
1015         c->super = NULL;
1016         c->sub = NULL;
1017         c->nextsub = NULL;
1018         c->interfacescount = 0;
1019         c->interfaces = NULL;
1020         c->fieldscount = 0;
1021         c->fields = NULL;
1022         c->methodscount = 0;
1023         c->methods = NULL;
1024         c->linked = false;
1025         c->loaded = false;
1026         c->index = 0;
1027         c->instancesize = 0;
1028         c->header.vftbl = NULL;
1029         c->innerclasscount = 0;
1030         c->innerclass = NULL;
1031         c->vftbl = NULL;
1032         c->initialized = false;
1033         c->initializing = false;
1034         c->classvftbl = false;
1035     c->classUsed = 0;
1036     c->impldBy = NULL;
1037         c->classloader = NULL;
1038         c->sourcefile = NULL;
1039         
1040         /* insert class into the hashtable */
1041         c->hashlink = class_hash.ptr[slot];
1042         class_hash.ptr[slot] = c;
1043
1044         /* update number of hashtable-entries */
1045         class_hash.entries++;
1046
1047         if (class_hash.entries > (class_hash.size * 2)) {
1048
1049                 /* reorganization of hashtable, average length of 
1050                    the external chains is approx. 2                */  
1051
1052                 u4 i;
1053                 classinfo *c;
1054                 hashtable newhash;  /* the new hashtable */
1055
1056                 /* create new hashtable, double the size */
1057                 init_hashtable(&newhash, class_hash.size * 2);
1058                 newhash.entries = class_hash.entries;
1059
1060                 /* transfer elements to new hashtable */
1061                 for (i = 0; i < class_hash.size; i++) {
1062                         c = (classinfo *) class_hash.ptr[i];
1063                         while (c) {
1064                                 classinfo *nextc = c->hashlink;
1065                                 u4 slot = (utf_hashkey(c->name->text, c->name->blength)) & (newhash.size - 1);
1066                                                 
1067                                 c->hashlink = newhash.ptr[slot];
1068                                 newhash.ptr[slot] = c;
1069
1070                                 c = nextc;
1071                         }
1072                 }
1073         
1074                 /* dispose old table */ 
1075                 MFREE(class_hash.ptr, void*, class_hash.size);
1076                 class_hash = newhash;
1077         }
1078
1079     /* Array classes need further initialization. */
1080     if (c->name->text[0] == '[') {
1081                 /* Array classes are not loaded from classfiles. */
1082                 c->loaded = true;
1083         class_new_array(c);
1084                 c->packagename = array_packagename;
1085
1086         } else {
1087                 /* Find the package name */
1088                 /* Classes in the unnamed package keep packagename == NULL. */
1089                 char *p = utf_end(c->name) - 1;
1090                 char *start = c->name->text;
1091                 for (;p > start; --p) {
1092                         if (*p == '.') {
1093                                 c->packagename = utf_new(start, p - start);
1094                                 break;
1095                         }
1096                 }
1097         }
1098
1099         return c;
1100 }
1101
1102
1103 classinfo *class_new(utf *classname)
1104 {
1105     classinfo *c;
1106
1107 #if defined(USE_THREADS) && defined(NATIVE_THREADS)
1108     tables_lock();
1109 #endif
1110
1111     c = class_new_intern(classname);
1112
1113         /* we support eager class loading and linking on demand */
1114
1115         if (opt_eager) {
1116                 classinfo *tc;
1117
1118                 list_init(&unlinkedclasses, OFFSET(classinfo, listnode));
1119
1120                 if (!c->loaded) {
1121                         if (!class_load(c)) {
1122 #if defined(USE_THREADS) && defined(NATIVE_THREADS)
1123                                 tables_unlock();
1124 #endif
1125                                 return NULL;
1126                         }
1127                 }
1128
1129                 /* link all referenced classes */
1130
1131                 while ((tc = list_first(&unlinkedclasses))) {
1132         printf("tc=%p next=%p prev=%p ", tc, tc->listnode.next, tc->listnode.prev);
1133         utf_display(tc->name);
1134         printf("\n");
1135         fflush(stdout);
1136
1137                         /* skip super class */
1138                         if (tc != c) {
1139                                 if (!class_link(tc)) {
1140 #if defined(USE_THREADS) && defined(NATIVE_THREADS)
1141                                         tables_unlock();
1142 #endif
1143                                         return NULL;
1144                                 }
1145                         }
1146
1147                         list_remove(&unlinkedclasses, tc);
1148                 }
1149
1150                 if (!c->linked) {
1151                         if (!class_link(c)) {
1152 #if defined(USE_THREADS) && defined(NATIVE_THREADS)
1153                                 tables_unlock();
1154 #endif
1155                                 return NULL;
1156                         }
1157                 }
1158         }
1159
1160 #if defined(USE_THREADS) && defined(NATIVE_THREADS)
1161     tables_unlock();
1162 #endif
1163
1164     return c;
1165 }
1166
1167
1168 /******************** Function: class_get **************************************
1169
1170     searches for the class with the specified name in the classes hashtable
1171     if there is no such class NULL is returned
1172
1173 *******************************************************************************/
1174
1175 classinfo *class_get(utf *classname)
1176 {
1177         classinfo *c;  /* hashtable element */ 
1178         u4 key;        /* hashkey computed from classname */   
1179         u4 slot;       /* slot in hashtable */
1180         u2 i;  
1181
1182         key  = utf_hashkey(classname->text, classname->blength);
1183         slot = key & (class_hash.size-1);
1184         c    = class_hash.ptr[slot];
1185
1186         /* search external hash-chain */
1187         while (c) {
1188                 if (c->name->blength == classname->blength) {
1189                         /* compare classnames */
1190                         for (i = 0; i < classname->blength; i++) 
1191                                 if (classname->text[i] != c->name->text[i])
1192                                         goto nomatch;
1193
1194                         /* class found in hashtable */                          
1195                         return c;
1196                 }
1197                         
1198         nomatch:
1199                 c = c->hashlink;
1200         }
1201
1202         /* class not found */
1203         return NULL;
1204 }
1205
1206
1207 /* class_remove ****************************************************************
1208
1209    removes the class entry wth the specified name in the classes hashtable,
1210    furthermore the class' resources are freed
1211    if there is no such class false is returned
1212
1213 *******************************************************************************/
1214
1215 bool class_remove(classinfo *c)
1216 {
1217         classinfo *tc;  /* hashtable element */
1218         classinfo *pc;
1219         u4 key;         /* hashkey computed from classname */   
1220         u4 slot;        /* slot in hashtable */
1221         u2 i;  
1222
1223         key  = utf_hashkey(c->name->text, c->name->blength);
1224         slot = key & (class_hash.size - 1);
1225         tc   = class_hash.ptr[slot];
1226         pc   = NULL;
1227
1228         /* search external hash-chain */
1229         while (tc) {
1230                 if (tc->name->blength == c->name->blength) {
1231                         
1232                         /* compare classnames */
1233                         for (i = 0; i < c->name->blength; i++)
1234                                 if (tc->name->text[i] != c->name->text[i])
1235                                         goto nomatch;
1236
1237                         /* class found in hashtable */
1238                         if (!pc) {
1239                                 class_hash.ptr[slot] = tc->hashlink;
1240
1241                         } else {
1242                                 pc->hashlink = tc->hashlink;
1243                         }
1244
1245                         class_free(tc);
1246
1247                         return true;
1248                 }
1249                         
1250         nomatch:
1251                 pc = tc;
1252                 tc = tc->hashlink;
1253         }
1254
1255         /* class not found */
1256         return false;
1257 }
1258
1259
1260 /***************** Function: class_array_of ***********************************
1261
1262     Returns an array class with the given component class.
1263     The array class is dynamically created if neccessary.
1264
1265 *******************************************************************************/
1266
1267 classinfo *class_array_of(classinfo *component)
1268 {
1269     int namelen;
1270     char *namebuf;
1271         classinfo *c;
1272
1273     /* Assemble the array class name */
1274     namelen = component->name->blength;
1275     
1276     if (component->name->text[0] == '[') {
1277         /* the component is itself an array */
1278         namebuf = DMNEW(char, namelen + 1);
1279         namebuf[0] = '[';
1280         memcpy(namebuf + 1, component->name->text, namelen);
1281         namelen++;
1282
1283     } else {
1284         /* the component is a non-array class */
1285         namebuf = DMNEW(char, namelen + 3);
1286         namebuf[0] = '[';
1287         namebuf[1] = 'L';
1288         memcpy(namebuf + 2, component->name->text, namelen);
1289         namebuf[2 + namelen] = ';';
1290         namelen += 3;
1291     }
1292
1293         /* load this class ;-) and link it */
1294         c = class_new(utf_new(namebuf, namelen));
1295         c->loaded = 1;
1296         class_link(c);
1297
1298     return c;
1299 }
1300
1301 /*************** Function: class_multiarray_of ********************************
1302
1303     Returns an array class with the given dimension and element class.
1304     The array class is dynamically created if neccessary.
1305
1306 *******************************************************************************/
1307
1308 classinfo *class_multiarray_of(int dim, classinfo *element)
1309 {
1310     int namelen;
1311     char *namebuf;
1312
1313         if (dim < 1)
1314                 panic("Invalid array dimension requested");
1315
1316     /* Assemble the array class name */
1317     namelen = element->name->blength;
1318     
1319     if (element->name->text[0] == '[') {
1320         /* the element is itself an array */
1321         namebuf = DMNEW(char, namelen + dim);
1322         memcpy(namebuf + dim, element->name->text, namelen);
1323         namelen += dim;
1324     }
1325     else {
1326         /* the element is a non-array class */
1327         namebuf = DMNEW(char, namelen + 2 + dim);
1328         namebuf[dim] = 'L';
1329         memcpy(namebuf + dim + 1, element->name->text, namelen);
1330         namelen += (2 + dim);
1331         namebuf[namelen - 1] = ';';
1332     }
1333         memset(namebuf, '[', dim);
1334
1335     return class_new(utf_new(namebuf, namelen));
1336 }
1337
1338 /************************** function: utf_strlen ******************************
1339
1340     determine number of unicode characters in the utf string
1341
1342 *******************************************************************************/
1343
1344 u4 utf_strlen(utf *u) 
1345 {
1346     char *endpos  = utf_end(u);  /* points behind utf string       */
1347     char *utf_ptr = u->text;     /* current position in utf text   */
1348     u4 len = 0;                  /* number of unicode characters   */
1349
1350     while (utf_ptr < endpos) {
1351                 len++;
1352                 /* next unicode character */
1353                 utf_nextu2(&utf_ptr);
1354     }
1355
1356     if (utf_ptr != endpos)
1357         /* string ended abruptly */
1358                 panic("illegal utf string"); 
1359
1360     return len;
1361 }
1362
1363
1364 /*
1365  * These are local overrides for various environment variables in Emacs.
1366  * Please do not remove this and leave it at the end of the file, where
1367  * Emacs will automagically detect them.
1368  * ---------------------------------------------------------------------
1369  * Local variables:
1370  * mode: c
1371  * indent-tabs-mode: t
1372  * c-basic-offset: 4
1373  * tab-width: 4
1374  * End:
1375  */