9a61952107d042db815ceca849cef1228166254f
[cacao.git] / src / vm / tables.c
1 /* vm/tables.c - 
2
3    Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003
4    R. Grafl, A. Krall, C. Kruegel, C. Oates, R. Obermaisser,
5    M. Probst, S. Ring, E. Steiner, C. Thalinger, D. Thuernbeck,
6    P. Tomsich, J. Wenninger
7
8    This file is part of CACAO.
9
10    This program is free software; you can redistribute it and/or
11    modify it under the terms of the GNU General Public License as
12    published by the Free Software Foundation; either version 2, or (at
13    your option) any later version.
14
15    This program is distributed in the hope that it will be useful, but
16    WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18    General Public License for more details.
19
20    You should have received a copy of the GNU General Public License
21    along with this program; if not, write to the Free Software
22    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
23    02111-1307, USA.
24
25    Contact: cacao@complang.tuwien.ac.at
26
27    Authors: Reinhard Grafl
28
29    Changes: Mark Probst
30             Andreas Krall
31
32    Contains support functions for:
33        - Reading of Java class files
34        - Unicode symbols
35        - the heap
36        - additional support functions
37
38    $Id: tables.c 1621 2004-11-30 13:06:55Z twisti $
39
40 */
41
42 #include <string.h>
43 #include <stdlib.h>
44 #include <assert.h>
45 #include <sys/types.h>
46 #include <sys/mman.h>
47 #include <unistd.h>
48
49 #include "types.h"
50 #include "mm/memory.h"
51 #include "native/native.h"
52 #include "toolbox/logging.h"
53 #include "vm/builtin.h"
54 #include "vm/exceptions.h"
55 #include "vm/global.h"
56 #include "vm/loader.h"
57 #include "vm/options.h"
58 #include "vm/statistics.h"
59 #include "vm/tables.h"
60
61
62 hashtable utf_hash;     /* hashtable for utf8-symbols */
63 hashtable string_hash;  /* hashtable for javastrings  */
64 hashtable class_hash;   /* hashtable for classes      */
65
66 list unlinkedclasses;   /* this is only used for eager class loading          */
67
68
69 /******************************************************************************
70  *********************** hashtable functions **********************************
71  ******************************************************************************/
72
73 /* hashsize must be power of 2 */
74
75 #define UTF_HASHSTART   16384   /* initial size of utf-hash */    
76 #define HASHSTART        2048   /* initial size of javastring and class-hash */
77
78
79 /******************** function: init_hashtable ******************************
80
81     Initializes a hashtable structure and allocates memory.
82     The parameter size specifies the initial size of the hashtable.
83         
84 *****************************************************************************/
85
86 void init_hashtable(hashtable *hash, u4 size)
87 {
88         u4 i;
89
90         hash->entries = 0;
91         hash->size    = size;
92         hash->ptr     = MNEW(void*, size);
93
94         /* clear table */
95         for (i = 0; i < size; i++) hash->ptr[i] = NULL;
96 }
97
98
99 /*********************** function: tables_init  *****************************
100
101     creates hashtables for symboltables 
102         (called once at startup)                         
103         
104 *****************************************************************************/
105
106 void tables_init()
107 {
108         init_hashtable(&utf_hash,    UTF_HASHSTART);  /* hashtable for utf8-symbols */
109         init_hashtable(&string_hash, HASHSTART);      /* hashtable for javastrings */
110         init_hashtable(&class_hash,  HASHSTART);      /* hashtable for classes */ 
111
112 /*      if (opt_eager) */
113 /*              list_init(&unlinkedclasses, OFFSET(classinfo, listnode)); */
114
115 #if defined(STATISTICS)
116         if (opt_stat)
117                 count_utf_len += sizeof(utf*) * utf_hash.size;
118 #endif
119 }
120
121
122 /********************** function: tables_close ******************************
123
124         free memory for hashtables                    
125         
126 *****************************************************************************/
127
128 void tables_close()
129 {
130         utf *u = NULL;
131         literalstring *s;
132         u4 i;
133         
134         /* dispose utf symbols */
135         for (i = 0; i < utf_hash.size; i++) {
136                 u = utf_hash.ptr[i];
137                 while (u) {
138                         /* process elements in external hash chain */
139                         utf *nextu = u->hashlink;
140                         MFREE(u->text, u1, u->blength);
141                         FREE(u, utf);
142                         u = nextu;
143                 }       
144         }
145
146         /* dispose javastrings */
147         for (i = 0; i < string_hash.size; i++) {
148                 s = string_hash.ptr[i];
149                 while (u) {
150                         /* process elements in external hash chain */
151                         literalstring *nexts = s->hashlink;
152                         literalstring_free(s->string);
153                         FREE(s, literalstring);
154                         s = nexts;
155                 }       
156         }
157
158         /* dispose hashtable structures */
159         MFREE(utf_hash.ptr,    void*, utf_hash.size);
160         MFREE(string_hash.ptr, void*, string_hash.size);
161         MFREE(class_hash.ptr,  void*, class_hash.size);
162 }
163
164
165 /********************* function: utf_display *********************************
166
167         write utf symbol to stdout (debugging purposes)
168
169 *******************************************************************************/
170
171 void utf_display(utf *u)
172 {
173     char *endpos;                       /* points behind utf string           */
174     char *utf_ptr;                      /* current position in utf text       */
175
176         if (!u) {
177                 printf("NULL");
178                 fflush(stdout);
179                 return;
180         }
181
182     endpos = utf_end(u);
183     utf_ptr = u->text;
184
185     while (utf_ptr < endpos) {
186                 /* read next unicode character */                
187                 u2 c = utf_nextu2(&utf_ptr);
188                 if (c >= 32 && c <= 127) printf("%c", c);
189                 else printf("?");
190         }
191
192         fflush(stdout);
193 }
194
195
196 /* utf_display_classname *******************************************************
197
198    write utf symbol to stdout (debugging purposes)
199
200 *******************************************************************************/
201
202 void utf_display_classname(utf *u)
203 {
204     char *endpos;                       /* points behind utf string           */
205     char *utf_ptr;                      /* current position in utf text       */
206
207         if (!u) {
208                 printf("NULL");
209                 fflush(stdout);
210                 return;
211         }
212
213     endpos = utf_end(u);
214     utf_ptr = u->text;
215
216     while (utf_ptr < endpos) {
217                 /* read next unicode character */                
218                 u2 c = utf_nextu2(&utf_ptr);
219                 if (c == '/') c = '.';
220                 if (c >= 32 && c <= 127) printf("%c", c);
221                 else printf("?");
222         }
223
224         fflush(stdout);
225 }
226
227
228 /************************* function: log_utf *********************************
229
230         log utf symbol
231
232 ******************************************************************************/
233
234 void log_utf(utf *u)
235 {
236         char buf[MAXLOGTEXT];
237         utf_sprint(buf, u);
238         dolog("%s", buf);
239 }
240
241
242 /********************** function: log_plain_utf ******************************
243
244         log utf symbol (without printing "LOG: " and newline)
245
246 ******************************************************************************/
247
248 void log_plain_utf(utf *u)
249 {
250         char buf[MAXLOGTEXT];
251         utf_sprint(buf, u);
252         dolog_plain("%s", buf);
253 }
254
255
256 /* utf_sprint ******************************************************************
257         
258    write utf symbol into c-string (debugging purposes)
259
260 *******************************************************************************/
261
262 void utf_sprint(char *buffer, utf *u)
263 {
264     char *endpos;                       /* points behind utf string           */
265     char *utf_ptr;                      /* current position in utf text       */
266     u2 pos = 0;                         /* position in c-string               */
267
268         if (!u) {
269                 memcpy(buffer, "NULL", 5);      /* 4 chars + terminating \0           */
270                 return;
271         }
272
273     endpos = utf_end(u);
274     utf_ptr = u->text;
275
276     while (utf_ptr < endpos) 
277                 /* copy next unicode character */       
278                 buffer[pos++] = utf_nextu2(&utf_ptr);
279
280     /* terminate string */
281     buffer[pos] = '\0';
282 }
283
284
285 /* utf_sprint_classname ********************************************************
286         
287    write utf symbol into c-string (debugging purposes)
288
289 *******************************************************************************/
290
291 void utf_sprint_classname(char *buffer, utf *u)
292 {
293     char *endpos;                       /* points behind utf string           */
294     char *utf_ptr;                      /* current position in utf text       */
295     u2 pos = 0;                         /* position in c-string               */
296
297         if (!u) {
298                 memcpy(buffer, "NULL", 5);      /* 4 chars + terminating \0           */
299                 return;
300         }
301
302     endpos = utf_end(u);
303     utf_ptr = u->text;
304
305     while (utf_ptr < endpos) {
306                 /* copy next unicode character */       
307                 u2 c = utf_nextu2(&utf_ptr);
308                 if (c == '/') c = '.';
309                 buffer[pos++] = c;
310         }
311
312     /* terminate string */
313     buffer[pos] = '\0';
314 }
315
316
317 /********************* Funktion: utf_fprint **********************************
318         
319     write utf symbol into file          
320
321 ******************************************************************************/
322
323 void utf_fprint(FILE *file, utf *u)
324 {
325     char *endpos  = utf_end(u);  /* points behind utf string       */
326     char *utf_ptr = u->text;     /* current position in utf text   */ 
327
328     if (!u)
329                 return;
330
331     while (utf_ptr < endpos) { 
332                 /* read next unicode character */                
333                 u2 c = utf_nextu2(&utf_ptr);                            
334
335                 if (c >= 32 && c <= 127) fprintf(file, "%c", c);
336                 else fprintf(file, "?");
337         }
338 }
339
340
341 /********************* Funktion: utf_fprint **********************************
342         
343     write utf symbol into file          
344
345 ******************************************************************************/
346
347 void utf_fprint_classname(FILE *file, utf *u)
348 {
349     char *endpos  = utf_end(u);  /* points behind utf string       */
350     char *utf_ptr = u->text;     /* current position in utf text   */ 
351
352     if (!u)
353                 return;
354
355     while (utf_ptr < endpos) { 
356                 /* read next unicode character */                
357                 u2 c = utf_nextu2(&utf_ptr);                            
358                 if (c == '/') c = '.';
359
360                 if (c >= 32 && c <= 127) fprintf(file, "%c", c);
361                 else fprintf(file, "?");
362         }
363 }
364
365
366 /****************** internal function: utf_hashkey ***************************
367
368         The hashkey is computed from the utf-text by using up to 8 characters.
369         For utf-symbols longer than 15 characters 3 characters are taken from
370         the beginning and the end, 2 characters are taken from the middle.
371
372 ******************************************************************************/ 
373
374 #define nbs(val) ((u4) *(++text) << val) /* get next byte, left shift by val  */
375 #define fbs(val) ((u4) *(  text) << val) /* get first byte, left shift by val */
376
377 static u4 utf_hashkey(char *text, u4 length)
378 {
379         char *start_pos = text; /* pointer to utf text */
380         u4 a;
381
382         switch (length) {               
383                 
384         case 0: /* empty string */
385                 return 0;
386
387         case 1: return fbs(0);
388         case 2: return fbs(0) ^ nbs(3);
389         case 3: return fbs(0) ^ nbs(3) ^ nbs(5);
390         case 4: return fbs(0) ^ nbs(2) ^ nbs(4) ^ nbs(6);
391         case 5: return fbs(0) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(6);
392         case 6: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(5) ^ nbs(6);
393         case 7: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(5) ^ nbs(6);
394         case 8: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(5) ^ nbs(6) ^ nbs(7);
395
396         case 9:
397                 a = fbs(0);
398                 a ^= nbs(1);
399                 a ^= nbs(2);
400                 text++;
401                 return a ^ nbs(4) ^ nbs(5) ^ nbs(6) ^ nbs(7) ^ nbs(8);
402
403         case 10:
404                 a = fbs(0);
405                 text++;
406                 a ^= nbs(2);
407                 a ^= nbs(3);
408                 a ^= nbs(4);
409                 text++;
410                 return a ^ nbs(6) ^ nbs(7) ^ nbs(8) ^ nbs(9);
411
412         case 11:
413                 a = fbs(0);
414                 text++;
415                 a ^= nbs(2);
416                 a ^= nbs(3);
417                 a ^= nbs(4);
418                 text++;
419                 return a ^ nbs(6) ^ nbs(7) ^ nbs(8) ^ nbs(9) ^ nbs(10);
420
421         case 12:
422                 a = fbs(0);
423                 text += 2;
424                 a ^= nbs(2);
425                 a ^= nbs(3);
426                 text++;
427                 a ^= nbs(5);
428                 a ^= nbs(6);
429                 a ^= nbs(7);
430                 text++;
431                 return a ^ nbs(9) ^ nbs(10);
432
433         case 13:
434                 a = fbs(0);
435                 a ^= nbs(1);
436                 text++;
437                 a ^= nbs(3);
438                 a ^= nbs(4);
439                 text += 2;      
440                 a ^= nbs(7);
441                 a ^= nbs(8);
442                 text += 2;
443                 return a ^ nbs(9) ^ nbs(10);
444
445         case 14:
446                 a = fbs(0);
447                 text += 2;      
448                 a ^= nbs(3);
449                 a ^= nbs(4);
450                 text += 2;      
451                 a ^= nbs(7);
452                 a ^= nbs(8);
453                 text += 2;
454                 return a ^ nbs(9) ^ nbs(10) ^ nbs(11);
455
456         case 15:
457                 a = fbs(0);
458                 text += 2;      
459                 a ^= nbs(3);
460                 a ^= nbs(4);
461                 text += 2;      
462                 a ^= nbs(7);
463                 a ^= nbs(8);
464                 text += 2;
465                 return a ^ nbs(9) ^ nbs(10) ^ nbs(11);
466
467         default:  /* 3 characters from beginning */
468                 a = fbs(0);
469                 text += 2;
470                 a ^= nbs(3);
471                 a ^= nbs(4);
472
473                 /* 2 characters from middle */
474                 text = start_pos + (length / 2);
475                 a ^= fbs(5);
476                 text += 2;
477                 a ^= nbs(6);    
478
479                 /* 3 characters from end */
480                 text = start_pos + length - 4;
481
482                 a ^= fbs(7);
483                 text++;
484
485                 return a ^ nbs(10) ^ nbs(11);
486     }
487 }
488
489
490 /*************************** function: utf_hashkey ***************************
491
492     compute the hashkey of a unicode string
493
494 ******************************************************************************/ 
495
496 u4 unicode_hashkey(u2 *text, u2 len)
497 {
498         return utf_hashkey((char*) text, len);
499 }
500
501
502 /************************ function: utf_new **********************************
503
504         Creates a new utf-symbol, the text of the symbol is passed as a 
505         u1-array. The function searches the utf-hashtable for a utf-symbol 
506         with this text. On success the element returned, otherwise a new 
507         hashtable element is created.
508
509         If the number of entries in the hashtable exceeds twice the size of the
510         hashtable slots a reorganization of the hashtable is done and the utf 
511         symbols are copied to a new hashtable with doubled size.
512
513 ******************************************************************************/
514
515 utf *utf_new_intern(char *text, u2 length)
516 {
517         u4 key;            /* hashkey computed from utf-text */
518         u4 slot;           /* slot in hashtable */
519         utf *u;            /* hashtable element */
520         u2 i;
521
522 #ifdef STATISTICS
523         if (opt_stat)
524                 count_utf_new++;
525 #endif
526
527         key  = utf_hashkey(text, length);
528         slot = key & (utf_hash.size-1);
529         u    = utf_hash.ptr[slot];
530
531         /* search external hash chain for utf-symbol */
532         while (u) {
533                 if (u->blength == length) {
534
535                         /* compare text of hashtable elements */
536                         for (i = 0; i < length; i++)
537                                 if (text[i] != u->text[i]) goto nomatch;
538                         
539 #ifdef STATISTICS
540                         if (opt_stat)
541                                 count_utf_new_found++;
542 #endif
543 /*                      log_text("symbol found in hash table");*/
544                         /* symbol found in hashtable */
545 /*                                      utf_display(u);
546                                         {
547                                                 utf blup;
548                                                 blup.blength=length;
549                                                 blup.text=text;
550                                                 utf_display(&blup);
551                                         }*/
552                         return u;
553                 }
554         nomatch:
555                 u = u->hashlink; /* next element in external chain */
556         }
557
558 #ifdef STATISTICS
559         if (opt_stat)
560                 count_utf_len += sizeof(utf) + length;
561 #endif
562
563         /* location in hashtable found, create new utf element */
564         u = NEW(utf);
565         u->blength  = length;               /* length in bytes of utfstring       */
566         u->hashlink = utf_hash.ptr[slot];   /* link in external hashchain         */
567         u->text     = mem_alloc(length + 1);/* allocate memory for utf-text       */
568         memcpy(u->text, text, length);      /* copy utf-text                      */
569         u->text[length] = '\0';
570         utf_hash.ptr[slot] = u;             /* insert symbol into table           */
571
572         utf_hash.entries++;                 /* update number of entries           */
573
574         if (utf_hash.entries > (utf_hash.size * 2)) {
575
576         /* reorganization of hashtable, average length of 
577            the external chains is approx. 2                */  
578
579                 u4 i;
580                 utf *u;
581                 hashtable newhash; /* the new hashtable */
582
583                 /* create new hashtable, double the size */
584                 init_hashtable(&newhash, utf_hash.size * 2);
585                 newhash.entries = utf_hash.entries;
586
587 #ifdef STATISTICS
588                 if (opt_stat)
589                         count_utf_len += sizeof(utf*) * utf_hash.size;
590 #endif
591
592                 /* transfer elements to new hashtable */
593                 for (i = 0; i < utf_hash.size; i++) {
594                         u = (utf *) utf_hash.ptr[i];
595                         while (u) {
596                                 utf *nextu = u->hashlink;
597                                 u4 slot = utf_hashkey(u->text, u->blength) & (newhash.size - 1);
598                                                 
599                                 u->hashlink = (utf *) newhash.ptr[slot];
600                                 newhash.ptr[slot] = u;
601
602                                 /* follow link in external hash chain */
603                                 u = nextu;
604                         }
605                 }
606         
607                 /* dispose old table */
608                 MFREE(utf_hash.ptr, void*, utf_hash.size);
609                 utf_hash = newhash;
610         }
611
612         return u;
613 }
614
615
616 utf *utf_new(char *text, u2 length)
617 {
618     utf *r;
619
620 #if defined(USE_THREADS) && defined(NATIVE_THREADS)
621     tables_lock();
622 #endif
623
624     r = utf_new_intern(text, length);
625
626 #if defined(USE_THREADS) && defined(NATIVE_THREADS)
627     tables_unlock();
628 #endif
629
630     return r;
631 }
632
633
634 /********************* function: utf_new_char ********************************
635
636     creates a new utf symbol, the text for this symbol is passed
637     as a c-string ( = char* )
638
639 ******************************************************************************/
640
641 utf *utf_new_char(char *text)
642 {
643         return utf_new(text, strlen(text));
644 }
645
646
647 /********************* function: utf_new_char ********************************
648
649     creates a new utf symbol, the text for this symbol is passed
650     as a c-string ( = char* )
651     "." characters are going to be replaced by "/". since the above function is
652     used often, this is a separte function, instead of an if
653
654 ******************************************************************************/
655
656 utf *utf_new_char_classname(char *text)
657 {
658         if (strchr(text, '.')) {
659                 char *txt = strdup(text);
660                 char *end = txt + strlen(txt);
661                 char *c;
662                 utf *tmpRes;
663                 for (c = txt; c < end; c++)
664                         if (*c == '.') *c = '/';
665                 tmpRes = utf_new(txt, strlen(txt));
666                 free(txt);
667                 return tmpRes;
668
669         } else
670                 return utf_new(text, strlen(text));
671 }
672
673
674 /************************** Funktion: utf_show ******************************
675
676     writes the utf symbols in the utfhash to stdout and
677     displays the number of external hash chains grouped 
678     according to the chainlength
679     (debugging purposes)
680
681 *****************************************************************************/
682
683 void utf_show()
684 {
685
686 #define CHAIN_LIMIT 20               /* limit for seperated enumeration */
687
688         u4 chain_count[CHAIN_LIMIT]; /* numbers of chains */
689         u4 max_chainlength = 0;      /* maximum length of the chains */
690         u4 sum_chainlength = 0;      /* sum of the chainlengths */
691         u4 beyond_limit = 0;         /* number of utf-symbols in chains with length>=CHAIN_LIMIT-1 */
692         u4 i;
693
694         printf ("UTF-HASH:\n");
695
696         /* show element of utf-hashtable */
697         for (i=0; i<utf_hash.size; i++) {
698                 utf *u = utf_hash.ptr[i];
699                 if (u) {
700                         printf ("SLOT %d: ", (int) i);
701                         while (u) {
702                                 printf ("'");
703                                 utf_display (u);
704                                 printf ("' ");
705                                 u = u->hashlink;
706                         }       
707                         printf ("\n");
708                 }
709                 
710         }
711
712         printf ("UTF-HASH: %d slots for %d entries\n", 
713                         (int) utf_hash.size, (int) utf_hash.entries );
714
715
716         if (utf_hash.entries == 0)
717                 return;
718
719         printf("chains:\n  chainlength    number of chains    %% of utfstrings\n");
720
721         for (i=0;i<CHAIN_LIMIT;i++)
722                 chain_count[i]=0;
723
724         /* count numbers of hashchains according to their length */
725         for (i=0; i<utf_hash.size; i++) {
726                   
727                 utf *u = (utf*) utf_hash.ptr[i];
728                 u4 chain_length = 0;
729
730                 /* determine chainlength */
731                 while (u) {
732                         u = u->hashlink;
733                         chain_length++;
734                 }
735
736                 /* update sum of all chainlengths */
737                 sum_chainlength+=chain_length;
738
739                 /* determine the maximum length of the chains */
740                 if (chain_length>max_chainlength)
741                         max_chainlength = chain_length;
742
743                 /* update number of utf-symbols in chains with length>=CHAIN_LIMIT-1 */
744                 if (chain_length>=CHAIN_LIMIT) {
745                         beyond_limit+=chain_length;
746                         chain_length=CHAIN_LIMIT-1;
747                 }
748
749                 /* update number of hashchains of current length */
750                 chain_count[chain_length]++;
751         }
752
753         /* display results */  
754         for (i=1;i<CHAIN_LIMIT-1;i++) 
755                 printf("       %2d %17d %18.2f%%\n",i,chain_count[i],(((float) chain_count[i]*i*100)/utf_hash.entries));
756           
757         printf("     >=%2d %17d %18.2f%%\n",CHAIN_LIMIT-1,chain_count[CHAIN_LIMIT-1],((float) beyond_limit*100)/utf_hash.entries);
758
759
760         printf("max. chainlength:%5d\n",max_chainlength);
761
762         /* avg. chainlength = sum of chainlengths / number of chains */
763         printf("avg. chainlength:%5.2f\n",(float) sum_chainlength / (utf_hash.size-chain_count[0]));
764 }
765
766 /******************************************************************************
767 *********************** Misc support functions ********************************
768 ******************************************************************************/
769
770
771 /******************** Function: desc_to_type **********************************
772    
773         Determines the corresponding Java base data type for a given type
774         descriptor.
775         
776 ******************************************************************************/
777
778 u2 desc_to_type(utf *descriptor)
779 {
780         char *utf_ptr = descriptor->text;  /* current position in utf text */
781         char logtext[MAXLOGTEXT];
782
783         if (descriptor->blength < 1) panic("Type-Descriptor is empty string");
784         
785         switch (*utf_ptr++) {
786         case 'B': 
787         case 'C':
788         case 'I':
789         case 'S':  
790         case 'Z':  return TYPE_INT;
791         case 'D':  return TYPE_DOUBLE;
792         case 'F':  return TYPE_FLOAT;
793         case 'J':  return TYPE_LONG;
794         case 'L':
795         case '[':  return TYPE_ADDRESS;
796         }
797                         
798         sprintf(logtext, "Invalid Type-Descriptor: ");
799         utf_sprint(logtext+strlen(logtext), descriptor);
800         error("%s",logtext);
801
802         return 0;
803 }
804
805
806 /********************** Function: desc_typesize *******************************
807
808         Calculates the lenght in bytes needed for a data element of the type given
809         by its type descriptor.
810         
811 ******************************************************************************/
812
813 u2 desc_typesize(utf *descriptor)
814 {
815         switch (desc_to_type(descriptor)) {
816         case TYPE_INT:     return 4;
817         case TYPE_LONG:    return 8;
818         case TYPE_FLOAT:   return 4;
819         case TYPE_DOUBLE:  return 8;
820         case TYPE_ADDRESS: return sizeof(voidptr);
821         default:           return 0;
822         }
823 }
824
825
826 /********************** function: utf_nextu2 *********************************
827
828     read the next unicode character from the utf string and
829     increment the utf-string pointer accordingly
830
831 ******************************************************************************/
832
833 u2 utf_nextu2(char **utf_ptr) 
834 {
835     /* uncompressed unicode character */
836     u2 unicode_char = 0;
837     /* current position in utf text */  
838     unsigned char *utf = (unsigned char *) (*utf_ptr);
839     /* bytes representing the unicode character */
840     unsigned char ch1, ch2, ch3;
841     /* number of bytes used to represent the unicode character */
842     int len = 0;
843         
844     switch ((ch1 = utf[0]) >> 4) {
845         default: /* 1 byte */
846                 (*utf_ptr)++;
847                 return (u2) ch1;
848         case 0xC: 
849         case 0xD: /* 2 bytes */
850                 if (((ch2 = utf[1]) & 0xC0) == 0x80) {
851                         unsigned char high = ch1 & 0x1F;
852                         unsigned char low  = ch2 & 0x3F;
853                         unicode_char = (high << 6) + low;
854                         len = 2;
855                 }
856                 break;
857
858         case 0xE: /* 2 or 3 bytes */
859                 if (((ch2 = utf[1]) & 0xC0) == 0x80) {
860                         if (((ch3 = utf[2]) & 0xC0) == 0x80) {
861                                 unsigned char low  = ch3 & 0x3f;
862                                 unsigned char mid  = ch2 & 0x3f;
863                                 unsigned char high = ch1 & 0x0f;
864                                 unicode_char = (((high << 6) + mid) << 6) + low;
865                                 len = 3;
866                         } else
867                                 len = 2;                                           
868                 }
869                 break;
870     }
871
872     /* update position in utf-text */
873     *utf_ptr = (char *) (utf + len);
874     return unicode_char;
875 }
876
877
878 /********************* function: is_valid_utf ********************************
879
880     return true if the given string is a valid UTF-8 string
881
882     utf_ptr...points to first character
883     end_pos...points after last character
884
885 ******************************************************************************/
886
887 static unsigned long min_codepoint[6] = {0,1L<<7,1L<<11,1L<<16,1L<<21,1L<<26};
888
889 bool
890 is_valid_utf(char *utf_ptr,char *end_pos)
891 {
892         int bytes;
893         int len,i;
894         char c;
895         unsigned long v;
896
897         if (end_pos < utf_ptr) return false;
898         bytes = end_pos - utf_ptr;
899         while (bytes--) {
900                 c = *utf_ptr++;
901                 /*dolog("%c %02x",c,c);*/
902                 if (!c) return false;                     /* 0x00 is not allowed */
903                 if ((c & 0x80) == 0) continue;            /* ASCII */
904
905                 if      ((c & 0xe0) == 0xc0) len = 1;     /* 110x xxxx */
906                 else if ((c & 0xf0) == 0xe0) len = 2;     /* 1110 xxxx */
907                 else if ((c & 0xf8) == 0xf0) len = 3;     /* 1111 0xxx */
908                 else if ((c & 0xfc) == 0xf8) len = 4;     /* 1111 10xx */
909                 else if ((c & 0xfe) == 0xfc) len = 5;     /* 1111 110x */
910                 else return false;                        /* invalid leading byte */
911
912                 if (len > 2) return false;                /* Java limitation */
913
914                 v = (unsigned long)c & (0x3f >> len);
915                 
916                 if ((bytes -= len) < 0) return false;     /* missing bytes */
917
918                 for (i = len; i--; ) {
919                         c = *utf_ptr++;
920                         /*dolog("    %c %02x",c,c);*/
921                         if ((c & 0xc0) != 0x80)               /* 10xx xxxx */
922                                 return false;
923                         v = (v<<6) | (c & 0x3f);
924                 }
925
926                 /*              dolog("v=%d",v);*/
927
928                 if (v == 0) {
929                         if (len != 1) return false;           /* Java special */
930                 }
931                 else {
932                         /* Sun Java seems to allow overlong UTF-8 encodings */
933                         
934                         if (v < min_codepoint[len]) { /* overlong UTF-8 */
935                                 if (!opt_liberalutf)
936                                         fprintf(stderr,"WARNING: Overlong UTF-8 sequence found.\n");
937                                 /* XXX change this to panic? */
938                         }
939                 }
940
941                 /* surrogates in UTF-8 seem to be allowed in Java classfiles */
942                 /* if (v >= 0xd800 && v <= 0xdfff) return false; */ /* surrogates */
943
944                 /* even these seem to be allowed */
945                 /* if (v == 0xfffe || v == 0xffff) return false; */ /* invalid codepoints */
946         }
947
948         return true;
949 }
950  
951 /********************* function: is_valid_name *******************************
952
953     return true if the given string may be used as a class/field/method name.
954     (Currently this only disallows empty strings and control characters.)
955
956     NOTE: The string is assumed to have passed is_valid_utf!
957
958     utf_ptr...points to first character
959     end_pos...points after last character
960
961 ******************************************************************************/
962
963 bool
964 is_valid_name(char *utf_ptr,char *end_pos)
965 {
966         if (end_pos <= utf_ptr) return false; /* disallow empty names */
967
968         while (utf_ptr < end_pos) {
969                 unsigned char c = *utf_ptr++;
970
971                 if (c < 0x20) return false; /* disallow control characters */
972                 if (c == 0xc0 && (unsigned char)*utf_ptr == 0x80) return false; /* disallow zero */
973         }
974         return true;
975 }
976
977 bool
978 is_valid_name_utf(utf *u)
979 {
980         return is_valid_name(u->text,utf_end(u));
981 }
982
983 /******************** Function: class_new **************************************
984
985     searches for the class with the specified name in the classes hashtable,
986     if there is no such class a new classinfo structure is created and inserted
987     into the list of classes to be loaded
988
989 *******************************************************************************/
990
991 classinfo *class_new_intern(utf *classname)
992 {
993         classinfo *c;     /* hashtable element */
994         u4 key;           /* hashkey computed from classname */
995         u4 slot;          /* slot in hashtable */
996         u2 i;
997
998         key  = utf_hashkey(classname->text, classname->blength);
999         slot = key & (class_hash.size - 1);
1000         c    = class_hash.ptr[slot];
1001
1002         /* search external hash chain for the class */
1003         while (c) {
1004                 if (c->name->blength == classname->blength) {
1005                         for (i = 0; i < classname->blength; i++)
1006                                 if (classname->text[i] != c->name->text[i]) goto nomatch;
1007                                                 
1008                         /* class found in hashtable */
1009                         return c;
1010                 }
1011                         
1012         nomatch:
1013                 c = c->hashlink; /* next element in external chain */
1014         }
1015
1016         /* location in hashtable found, create new classinfo structure */
1017
1018 #if defined(STATISTICS)
1019         if (opt_stat)
1020                 count_class_infos += sizeof(classinfo);
1021 #endif
1022
1023         if (initverbose) {
1024                 char logtext[MAXLOGTEXT];
1025                 sprintf(logtext, "Creating class: ");
1026                 utf_sprint_classname(logtext + strlen(logtext), classname);
1027                 log_text(logtext);
1028         }
1029
1030         c = GCNEW(classinfo, 1); /*JOWENN: NEW*/
1031         /*c=NEW(classinfo);*/
1032         c->vmClass = 0;
1033         c->flags = 0;
1034         c->name = classname;
1035         c->packagename = NULL;
1036         c->cpcount = 0;
1037         c->cptags = NULL;
1038         c->cpinfos = NULL;
1039         c->super = NULL;
1040         c->sub = NULL;
1041         c->nextsub = NULL;
1042         c->interfacescount = 0;
1043         c->interfaces = NULL;
1044         c->fieldscount = 0;
1045         c->fields = NULL;
1046         c->methodscount = 0;
1047         c->methods = NULL;
1048         c->linked = false;
1049         c->loaded = false;
1050         c->index = 0;
1051         c->instancesize = 0;
1052         c->header.vftbl = NULL;
1053         c->innerclasscount = 0;
1054         c->innerclass = NULL;
1055         c->vftbl = NULL;
1056         c->initialized = false;
1057         c->initializing = false;
1058         c->classvftbl = false;
1059     c->classUsed = 0;
1060     c->impldBy = NULL;
1061         c->classloader = NULL;
1062         c->sourcefile = NULL;
1063         
1064         /* insert class into the hashtable */
1065         c->hashlink = class_hash.ptr[slot];
1066         class_hash.ptr[slot] = c;
1067
1068         /* update number of hashtable-entries */
1069         class_hash.entries++;
1070
1071         if (class_hash.entries > (class_hash.size * 2)) {
1072
1073                 /* reorganization of hashtable, average length of 
1074                    the external chains is approx. 2                */  
1075
1076                 u4 i;
1077                 classinfo *c;
1078                 hashtable newhash;  /* the new hashtable */
1079
1080                 /* create new hashtable, double the size */
1081                 init_hashtable(&newhash, class_hash.size * 2);
1082                 newhash.entries = class_hash.entries;
1083
1084                 /* transfer elements to new hashtable */
1085                 for (i = 0; i < class_hash.size; i++) {
1086                         c = (classinfo *) class_hash.ptr[i];
1087                         while (c) {
1088                                 classinfo *nextc = c->hashlink;
1089                                 u4 slot = (utf_hashkey(c->name->text, c->name->blength)) & (newhash.size - 1);
1090                                                 
1091                                 c->hashlink = newhash.ptr[slot];
1092                                 newhash.ptr[slot] = c;
1093
1094                                 c = nextc;
1095                         }
1096                 }
1097         
1098                 /* dispose old table */ 
1099                 MFREE(class_hash.ptr, void*, class_hash.size);
1100                 class_hash = newhash;
1101         }
1102
1103     /* Array classes need further initialization. */
1104     if (c->name->text[0] == '[') {
1105                 /* Array classes are not loaded from classfiles. */
1106                 c->loaded = true;
1107         class_new_array(c);
1108                 c->packagename = array_packagename;
1109
1110         } else {
1111                 /* Find the package name */
1112                 /* Classes in the unnamed package keep packagename == NULL. */
1113                 char *p = utf_end(c->name) - 1;
1114                 char *start = c->name->text;
1115                 for (;p > start; --p) {
1116                         if (*p == '.') {
1117                                 c->packagename = utf_new(start, p - start);
1118                                 break;
1119                         }
1120                 }
1121         }
1122 #if defined(USE_THREADS) && defined(NATIVE_THREADS)
1123         initObjectLock(&c->header);
1124 #endif
1125
1126         return c;
1127 }
1128
1129
1130 classinfo *class_new(utf *classname)
1131 {
1132     classinfo *c;
1133
1134 #if defined(USE_THREADS) && defined(NATIVE_THREADS)
1135     tables_lock();
1136 #endif
1137
1138     c = class_new_intern(classname);
1139
1140         /* we support eager class loading and linking on demand */
1141
1142         if (opt_eager) {
1143                 classinfo *tc;
1144                 classinfo *tmp;
1145
1146                 list_init(&unlinkedclasses, OFFSET(classinfo, listnode));
1147
1148                 if (!c->loaded) {
1149                         if (!class_load(c)) {
1150 #if defined(USE_THREADS) && defined(NATIVE_THREADS)
1151                                 tables_unlock();
1152 #endif
1153                                 return c;
1154                         }
1155                 }
1156
1157                 /* link all referenced classes */
1158
1159                 tc = list_first(&unlinkedclasses);
1160
1161                 while (tc) {
1162                         /* skip the current loaded/linked class */
1163                         if (tc != c) {
1164                                 if (!class_link(tc)) {
1165 #if defined(USE_THREADS) && defined(NATIVE_THREADS)
1166                                         tables_unlock();
1167 #endif
1168                                         return c;
1169                                 }
1170                         }
1171
1172                         /* we need a tmp variable here, because list_remove sets prev and
1173                            next to NULL */
1174                         tmp = list_next(&unlinkedclasses, tc);
1175                         list_remove(&unlinkedclasses, tc);
1176                         tc = tmp;
1177                 }
1178
1179                 if (!c->linked) {
1180                         if (!class_link(c)) {
1181 #if defined(USE_THREADS) && defined(NATIVE_THREADS)
1182                                 tables_unlock();
1183 #endif
1184                                 return c;
1185                         }
1186                 }
1187         }
1188
1189 #if defined(USE_THREADS) && defined(NATIVE_THREADS)
1190     tables_unlock();
1191 #endif
1192
1193     return c;
1194 }
1195
1196
1197 /******************** Function: class_get **************************************
1198
1199     searches for the class with the specified name in the classes hashtable
1200     if there is no such class NULL is returned
1201
1202 *******************************************************************************/
1203
1204 classinfo *class_get(utf *classname)
1205 {
1206         classinfo *c;  /* hashtable element */ 
1207         u4 key;        /* hashkey computed from classname */   
1208         u4 slot;       /* slot in hashtable */
1209         u2 i;  
1210
1211         key  = utf_hashkey(classname->text, classname->blength);
1212         slot = key & (class_hash.size-1);
1213         c    = class_hash.ptr[slot];
1214
1215         /* search external hash-chain */
1216         while (c) {
1217                 if (c->name->blength == classname->blength) {
1218                         /* compare classnames */
1219                         for (i = 0; i < classname->blength; i++) 
1220                                 if (classname->text[i] != c->name->text[i])
1221                                         goto nomatch;
1222
1223                         /* class found in hashtable */                          
1224                         return c;
1225                 }
1226                         
1227         nomatch:
1228                 c = c->hashlink;
1229         }
1230
1231         /* class not found */
1232         return NULL;
1233 }
1234
1235
1236 /* class_remove ****************************************************************
1237
1238    removes the class entry wth the specified name in the classes hashtable,
1239    furthermore the class' resources are freed
1240    if there is no such class false is returned
1241
1242 *******************************************************************************/
1243
1244 bool class_remove(classinfo *c)
1245 {
1246         classinfo *tc;  /* hashtable element */
1247         classinfo *pc;
1248         u4 key;         /* hashkey computed from classname */   
1249         u4 slot;        /* slot in hashtable */
1250         u2 i;  
1251
1252         key  = utf_hashkey(c->name->text, c->name->blength);
1253         slot = key & (class_hash.size - 1);
1254         tc   = class_hash.ptr[slot];
1255         pc   = NULL;
1256
1257         /* search external hash-chain */
1258         while (tc) {
1259                 if (tc->name->blength == c->name->blength) {
1260                         
1261                         /* compare classnames */
1262                         for (i = 0; i < c->name->blength; i++)
1263                                 if (tc->name->text[i] != c->name->text[i])
1264                                         goto nomatch;
1265
1266                         /* class found in hashtable */
1267                         if (!pc) {
1268                                 class_hash.ptr[slot] = tc->hashlink;
1269
1270                         } else {
1271                                 pc->hashlink = tc->hashlink;
1272                         }
1273
1274                         class_free(tc);
1275
1276                         return true;
1277                 }
1278                         
1279         nomatch:
1280                 pc = tc;
1281                 tc = tc->hashlink;
1282         }
1283
1284         /* class not found */
1285         return false;
1286 }
1287
1288
1289 /***************** Function: class_array_of ***********************************
1290
1291     Returns an array class with the given component class.
1292     The array class is dynamically created if neccessary.
1293
1294 *******************************************************************************/
1295
1296 classinfo *class_array_of(classinfo *component)
1297 {
1298     int namelen;
1299     char *namebuf;
1300         classinfo *c;
1301
1302     /* Assemble the array class name */
1303     namelen = component->name->blength;
1304     
1305     if (component->name->text[0] == '[') {
1306         /* the component is itself an array */
1307         namebuf = DMNEW(char, namelen + 1);
1308         namebuf[0] = '[';
1309         memcpy(namebuf + 1, component->name->text, namelen);
1310         namelen++;
1311
1312     } else {
1313         /* the component is a non-array class */
1314         namebuf = DMNEW(char, namelen + 3);
1315         namebuf[0] = '[';
1316         namebuf[1] = 'L';
1317         memcpy(namebuf + 2, component->name->text, namelen);
1318         namebuf[2 + namelen] = ';';
1319         namelen += 3;
1320     }
1321
1322         /* load this class ;-) and link it */
1323         c = class_new(utf_new(namebuf, namelen));
1324         c->loaded = 1;
1325         class_link(c);
1326
1327     return c;
1328 }
1329
1330 /*************** Function: class_multiarray_of ********************************
1331
1332     Returns an array class with the given dimension and element class.
1333     The array class is dynamically created if neccessary.
1334
1335 *******************************************************************************/
1336
1337 classinfo *class_multiarray_of(int dim, classinfo *element)
1338 {
1339     int namelen;
1340     char *namebuf;
1341
1342         if (dim < 1)
1343                 panic("Invalid array dimension requested");
1344
1345     /* Assemble the array class name */
1346     namelen = element->name->blength;
1347     
1348     if (element->name->text[0] == '[') {
1349         /* the element is itself an array */
1350         namebuf = DMNEW(char, namelen + dim);
1351         memcpy(namebuf + dim, element->name->text, namelen);
1352         namelen += dim;
1353     }
1354     else {
1355         /* the element is a non-array class */
1356         namebuf = DMNEW(char, namelen + 2 + dim);
1357         namebuf[dim] = 'L';
1358         memcpy(namebuf + dim + 1, element->name->text, namelen);
1359         namelen += (2 + dim);
1360         namebuf[namelen - 1] = ';';
1361     }
1362         memset(namebuf, '[', dim);
1363
1364     return class_new(utf_new(namebuf, namelen));
1365 }
1366
1367 /************************** function: utf_strlen ******************************
1368
1369     determine number of unicode characters in the utf string
1370
1371 *******************************************************************************/
1372
1373 u4 utf_strlen(utf *u) 
1374 {
1375     char *endpos;                   /* points behind utf string       */
1376     char *utf_ptr;                  /* current position in utf text   */
1377     u4 len = 0;                     /* number of unicode characters   */
1378
1379         if (!u) {
1380                 *exceptionptr = new_nullpointerexception();
1381                 return 0;
1382         }
1383
1384         endpos = utf_end(u);
1385         utf_ptr = u->text;
1386
1387     while (utf_ptr < endpos) {
1388                 len++;
1389                 /* next unicode character */
1390                 utf_nextu2(&utf_ptr);
1391     }
1392
1393     if (utf_ptr != endpos)
1394         /* string ended abruptly */
1395                 panic("illegal utf string"); 
1396
1397     return len;
1398 }
1399
1400
1401 /*
1402  * These are local overrides for various environment variables in Emacs.
1403  * Please do not remove this and leave it at the end of the file, where
1404  * Emacs will automagically detect them.
1405  * ---------------------------------------------------------------------
1406  * Local variables:
1407  * mode: c
1408  * indent-tabs-mode: t
1409  * c-basic-offset: 4
1410  * tab-width: 4
1411  * End:
1412  */