0bbde3683e9fcd6268eeaef3baae39e6f99ac6bb
[cacao.git] / src / vm / tables.c
1 /* vm/tables.c - 
2
3    Copyright (C) 1996-2005 R. Grafl, A. Krall, C. Kruegel, C. Oates,
4    R. Obermaisser, M. Platter, M. Probst, S. Ring, E. Steiner,
5    C. Thalinger, D. Thuernbeck, P. Tomsich, C. Ullrich, J. Wenninger,
6    Institut f. Computersprachen - TU Wien
7
8    This file is part of CACAO.
9
10    This program is free software; you can redistribute it and/or
11    modify it under the terms of the GNU General Public License as
12    published by the Free Software Foundation; either version 2, or (at
13    your option) any later version.
14
15    This program is distributed in the hope that it will be useful, but
16    WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18    General Public License for more details.
19
20    You should have received a copy of the GNU General Public License
21    along with this program; if not, write to the Free Software
22    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
23    02111-1307, USA.
24
25    Contact: cacao@complang.tuwien.ac.at
26
27    Authors: Reinhard Grafl
28
29    Changes: Mark Probst
30             Andreas Krall
31
32    Contains support functions for:
33        - Reading of Java class files
34        - Unicode symbols
35        - the heap
36        - additional support functions
37
38    $Id: tables.c 1843 2005-01-04 11:21:02Z twisti $
39
40 */
41
42 #include <string.h>
43 #include <stdlib.h>
44 #include <assert.h>
45 #include <sys/types.h>
46 #include <sys/mman.h>
47 #include <unistd.h>
48
49 #include "types.h"
50 #include "mm/memory.h"
51 #include "native/native.h"
52 #include "toolbox/logging.h"
53 #include "vm/builtin.h"
54 #include "vm/exceptions.h"
55 #include "vm/global.h"
56 #include "vm/loader.h"
57 #include "vm/options.h"
58 #include "vm/statistics.h"
59 #include "vm/tables.h"
60
61
62 hashtable utf_hash;     /* hashtable for utf8-symbols */
63 hashtable string_hash;  /* hashtable for javastrings  */
64 hashtable class_hash;   /* hashtable for classes      */
65
66 list unlinkedclasses;   /* this is only used for eager class loading          */
67
68
69 /******************************************************************************
70  *********************** hashtable functions **********************************
71  ******************************************************************************/
72
73 /* hashsize must be power of 2 */
74
75 #define UTF_HASHSTART   16384   /* initial size of utf-hash */    
76 #define HASHSTART        2048   /* initial size of javastring and class-hash */
77
78
79 /******************** function: init_hashtable ******************************
80
81     Initializes a hashtable structure and allocates memory.
82     The parameter size specifies the initial size of the hashtable.
83         
84 *****************************************************************************/
85
86 void init_hashtable(hashtable *hash, u4 size)
87 {
88         u4 i;
89
90         hash->entries = 0;
91         hash->size    = size;
92         hash->ptr     = MNEW(void*, size);
93
94         /* clear table */
95         for (i = 0; i < size; i++) hash->ptr[i] = NULL;
96 }
97
98
99 /*********************** function: tables_init  *****************************
100
101     creates hashtables for symboltables 
102         (called once at startup)                         
103         
104 *****************************************************************************/
105
106 void tables_init()
107 {
108         init_hashtable(&utf_hash,    UTF_HASHSTART);  /* hashtable for utf8-symbols */
109         init_hashtable(&string_hash, HASHSTART);      /* hashtable for javastrings */
110         init_hashtable(&class_hash,  HASHSTART);      /* hashtable for classes */ 
111
112 /*      if (opt_eager) */
113 /*              list_init(&unlinkedclasses, OFFSET(classinfo, listnode)); */
114
115 #if defined(STATISTICS)
116         if (opt_stat)
117                 count_utf_len += sizeof(utf*) * utf_hash.size;
118 #endif
119 }
120
121
122 /********************** function: tables_close ******************************
123
124         free memory for hashtables                    
125         
126 *****************************************************************************/
127
128 void tables_close()
129 {
130         utf *u = NULL;
131         literalstring *s;
132         u4 i;
133         
134         /* dispose utf symbols */
135         for (i = 0; i < utf_hash.size; i++) {
136                 u = utf_hash.ptr[i];
137                 while (u) {
138                         /* process elements in external hash chain */
139                         utf *nextu = u->hashlink;
140                         MFREE(u->text, u1, u->blength);
141                         FREE(u, utf);
142                         u = nextu;
143                 }       
144         }
145
146         /* dispose javastrings */
147         for (i = 0; i < string_hash.size; i++) {
148                 s = string_hash.ptr[i];
149                 while (u) {
150                         /* process elements in external hash chain */
151                         literalstring *nexts = s->hashlink;
152                         literalstring_free(s->string);
153                         FREE(s, literalstring);
154                         s = nexts;
155                 }       
156         }
157
158         /* dispose hashtable structures */
159         MFREE(utf_hash.ptr,    void*, utf_hash.size);
160         MFREE(string_hash.ptr, void*, string_hash.size);
161         MFREE(class_hash.ptr,  void*, class_hash.size);
162 }
163
164
165 /********************* function: utf_display *********************************
166
167         write utf symbol to stdout (debugging purposes)
168
169 *******************************************************************************/
170
171 void utf_display(utf *u)
172 {
173     char *endpos;                       /* points behind utf string           */
174     char *utf_ptr;                      /* current position in utf text       */
175
176         if (!u) {
177                 printf("NULL");
178                 fflush(stdout);
179                 return;
180         }
181
182     endpos = utf_end(u);
183     utf_ptr = u->text;
184
185     while (utf_ptr < endpos) {
186                 /* read next unicode character */                
187                 u2 c = utf_nextu2(&utf_ptr);
188                 if (c >= 32 && c <= 127) printf("%c", c);
189                 else printf("?");
190         }
191
192         fflush(stdout);
193 }
194
195
196 /* utf_display_classname *******************************************************
197
198    write utf symbol to stdout (debugging purposes)
199
200 *******************************************************************************/
201
202 void utf_display_classname(utf *u)
203 {
204     char *endpos;                       /* points behind utf string           */
205     char *utf_ptr;                      /* current position in utf text       */
206
207         if (!u) {
208                 printf("NULL");
209                 fflush(stdout);
210                 return;
211         }
212
213     endpos = utf_end(u);
214     utf_ptr = u->text;
215
216     while (utf_ptr < endpos) {
217                 /* read next unicode character */                
218                 u2 c = utf_nextu2(&utf_ptr);
219                 if (c == '/') c = '.';
220                 if (c >= 32 && c <= 127) printf("%c", c);
221                 else printf("?");
222         }
223
224         fflush(stdout);
225 }
226
227
228 /************************* function: log_utf *********************************
229
230         log utf symbol
231
232 ******************************************************************************/
233
234 void log_utf(utf *u)
235 {
236         char buf[MAXLOGTEXT];
237         utf_sprint(buf, u);
238         dolog("%s", buf);
239 }
240
241
242 /********************** function: log_plain_utf ******************************
243
244         log utf symbol (without printing "LOG: " and newline)
245
246 ******************************************************************************/
247
248 void log_plain_utf(utf *u)
249 {
250         char buf[MAXLOGTEXT];
251         utf_sprint(buf, u);
252         dolog_plain("%s", buf);
253 }
254
255
256 /* utf_sprint ******************************************************************
257         
258    write utf symbol into c-string (debugging purposes)
259
260 *******************************************************************************/
261
262 void utf_sprint(char *buffer, utf *u)
263 {
264     char *endpos;                       /* points behind utf string           */
265     char *utf_ptr;                      /* current position in utf text       */
266     u2 pos = 0;                         /* position in c-string               */
267
268         if (!u) {
269                 memcpy(buffer, "NULL", 5);      /* 4 chars + terminating \0           */
270                 return;
271         }
272
273     endpos = utf_end(u);
274     utf_ptr = u->text;
275
276     while (utf_ptr < endpos) 
277                 /* copy next unicode character */       
278                 buffer[pos++] = utf_nextu2(&utf_ptr);
279
280     /* terminate string */
281     buffer[pos] = '\0';
282 }
283
284
285 /* utf_sprint_classname ********************************************************
286         
287    write utf symbol into c-string (debugging purposes)
288
289 *******************************************************************************/
290
291 void utf_sprint_classname(char *buffer, utf *u)
292 {
293     char *endpos;                       /* points behind utf string           */
294     char *utf_ptr;                      /* current position in utf text       */
295     u2 pos = 0;                         /* position in c-string               */
296
297         if (!u) {
298                 memcpy(buffer, "NULL", 5);      /* 4 chars + terminating \0           */
299                 return;
300         }
301
302     endpos = utf_end(u);
303     utf_ptr = u->text;
304
305     while (utf_ptr < endpos) {
306                 /* copy next unicode character */       
307                 u2 c = utf_nextu2(&utf_ptr);
308                 if (c == '/') c = '.';
309                 buffer[pos++] = c;
310         }
311
312     /* terminate string */
313     buffer[pos] = '\0';
314 }
315
316
317 /********************* Funktion: utf_fprint **********************************
318         
319     write utf symbol into file          
320
321 ******************************************************************************/
322
323 void utf_fprint(FILE *file, utf *u)
324 {
325     char *endpos  = utf_end(u);  /* points behind utf string       */
326     char *utf_ptr = u->text;     /* current position in utf text   */ 
327
328     if (!u)
329                 return;
330
331     while (utf_ptr < endpos) { 
332                 /* read next unicode character */                
333                 u2 c = utf_nextu2(&utf_ptr);                            
334
335                 if (c >= 32 && c <= 127) fprintf(file, "%c", c);
336                 else fprintf(file, "?");
337         }
338 }
339
340
341 /********************* Funktion: utf_fprint **********************************
342         
343     write utf symbol into file          
344
345 ******************************************************************************/
346
347 void utf_fprint_classname(FILE *file, utf *u)
348 {
349     char *endpos  = utf_end(u);  /* points behind utf string       */
350     char *utf_ptr = u->text;     /* current position in utf text   */ 
351
352     if (!u)
353                 return;
354
355     while (utf_ptr < endpos) { 
356                 /* read next unicode character */                
357                 u2 c = utf_nextu2(&utf_ptr);                            
358                 if (c == '/') c = '.';
359
360                 if (c >= 32 && c <= 127) fprintf(file, "%c", c);
361                 else fprintf(file, "?");
362         }
363 }
364
365
366 /****************** internal function: utf_hashkey *****************************
367
368         The hashkey is computed from the utf-text by using up to 8 characters.
369         For utf-symbols longer than 15 characters 3 characters are taken from
370         the beginning and the end, 2 characters are taken from the middle.
371
372 *******************************************************************************/
373
374 #define nbs(val) ((u4) *(++text) << val) /* get next byte, left shift by val  */
375 #define fbs(val) ((u4) *(  text) << val) /* get first byte, left shift by val */
376
377 static u4 utf_hashkey(const char *text, u4 length)
378 {
379         const char *start_pos = text;       /* pointer to utf text                */
380         u4 a;
381
382         switch (length) {               
383                 
384         case 0: /* empty string */
385                 return 0;
386
387         case 1: return fbs(0);
388         case 2: return fbs(0) ^ nbs(3);
389         case 3: return fbs(0) ^ nbs(3) ^ nbs(5);
390         case 4: return fbs(0) ^ nbs(2) ^ nbs(4) ^ nbs(6);
391         case 5: return fbs(0) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(6);
392         case 6: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(5) ^ nbs(6);
393         case 7: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(5) ^ nbs(6);
394         case 8: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(5) ^ nbs(6) ^ nbs(7);
395
396         case 9:
397                 a = fbs(0);
398                 a ^= nbs(1);
399                 a ^= nbs(2);
400                 text++;
401                 return a ^ nbs(4) ^ nbs(5) ^ nbs(6) ^ nbs(7) ^ nbs(8);
402
403         case 10:
404                 a = fbs(0);
405                 text++;
406                 a ^= nbs(2);
407                 a ^= nbs(3);
408                 a ^= nbs(4);
409                 text++;
410                 return a ^ nbs(6) ^ nbs(7) ^ nbs(8) ^ nbs(9);
411
412         case 11:
413                 a = fbs(0);
414                 text++;
415                 a ^= nbs(2);
416                 a ^= nbs(3);
417                 a ^= nbs(4);
418                 text++;
419                 return a ^ nbs(6) ^ nbs(7) ^ nbs(8) ^ nbs(9) ^ nbs(10);
420
421         case 12:
422                 a = fbs(0);
423                 text += 2;
424                 a ^= nbs(2);
425                 a ^= nbs(3);
426                 text++;
427                 a ^= nbs(5);
428                 a ^= nbs(6);
429                 a ^= nbs(7);
430                 text++;
431                 return a ^ nbs(9) ^ nbs(10);
432
433         case 13:
434                 a = fbs(0);
435                 a ^= nbs(1);
436                 text++;
437                 a ^= nbs(3);
438                 a ^= nbs(4);
439                 text += 2;      
440                 a ^= nbs(7);
441                 a ^= nbs(8);
442                 text += 2;
443                 return a ^ nbs(9) ^ nbs(10);
444
445         case 14:
446                 a = fbs(0);
447                 text += 2;      
448                 a ^= nbs(3);
449                 a ^= nbs(4);
450                 text += 2;      
451                 a ^= nbs(7);
452                 a ^= nbs(8);
453                 text += 2;
454                 return a ^ nbs(9) ^ nbs(10) ^ nbs(11);
455
456         case 15:
457                 a = fbs(0);
458                 text += 2;      
459                 a ^= nbs(3);
460                 a ^= nbs(4);
461                 text += 2;      
462                 a ^= nbs(7);
463                 a ^= nbs(8);
464                 text += 2;
465                 return a ^ nbs(9) ^ nbs(10) ^ nbs(11);
466
467         default:  /* 3 characters from beginning */
468                 a = fbs(0);
469                 text += 2;
470                 a ^= nbs(3);
471                 a ^= nbs(4);
472
473                 /* 2 characters from middle */
474                 text = start_pos + (length / 2);
475                 a ^= fbs(5);
476                 text += 2;
477                 a ^= nbs(6);    
478
479                 /* 3 characters from end */
480                 text = start_pos + length - 4;
481
482                 a ^= fbs(7);
483                 text++;
484
485                 return a ^ nbs(10) ^ nbs(11);
486     }
487 }
488
489
490 /*************************** function: utf_hashkey ***************************
491
492     compute the hashkey of a unicode string
493
494 ******************************************************************************/ 
495
496 u4 unicode_hashkey(u2 *text, u2 len)
497 {
498         return utf_hashkey((char*) text, len);
499 }
500
501
502 /************************ function: utf_new **********************************
503
504         Creates a new utf-symbol, the text of the symbol is passed as a 
505         u1-array. The function searches the utf-hashtable for a utf-symbol 
506         with this text. On success the element returned, otherwise a new 
507         hashtable element is created.
508
509         If the number of entries in the hashtable exceeds twice the size of the
510         hashtable slots a reorganization of the hashtable is done and the utf 
511         symbols are copied to a new hashtable with doubled size.
512
513 ******************************************************************************/
514
515 utf *utf_new_intern(const char *text, u2 length);
516
517 utf *utf_new(const char *text, u2 length)
518 {
519     utf *r;
520
521 #if defined(USE_THREADS) && defined(NATIVE_THREADS)
522     tables_lock();
523 #endif
524
525     r = utf_new_intern(text, length);
526
527 #if defined(USE_THREADS) && defined(NATIVE_THREADS)
528     tables_unlock();
529 #endif
530
531     return r;
532 }
533
534
535 utf *utf_new_intern(const char *text, u2 length)
536 {
537         u4 key;            /* hashkey computed from utf-text */
538         u4 slot;           /* slot in hashtable */
539         utf *u;            /* hashtable element */
540         u2 i;
541
542 #ifdef STATISTICS
543         if (opt_stat)
544                 count_utf_new++;
545 #endif
546
547         key  = utf_hashkey(text, length);
548         slot = key & (utf_hash.size-1);
549         u    = utf_hash.ptr[slot];
550
551         /* search external hash chain for utf-symbol */
552         while (u) {
553                 if (u->blength == length) {
554
555                         /* compare text of hashtable elements */
556                         for (i = 0; i < length; i++)
557                                 if (text[i] != u->text[i]) goto nomatch;
558                         
559 #ifdef STATISTICS
560                         if (opt_stat)
561                                 count_utf_new_found++;
562 #endif
563 /*                      log_text("symbol found in hash table");*/
564                         /* symbol found in hashtable */
565 /*                                      utf_display(u);
566                                         {
567                                                 utf blup;
568                                                 blup.blength=length;
569                                                 blup.text=text;
570                                                 utf_display(&blup);
571                                         }*/
572                         return u;
573                 }
574         nomatch:
575                 u = u->hashlink; /* next element in external chain */
576         }
577
578 #ifdef STATISTICS
579         if (opt_stat)
580                 count_utf_len += sizeof(utf) + length;
581 #endif
582
583         /* location in hashtable found, create new utf element */
584         u = NEW(utf);
585         u->blength  = length;               /* length in bytes of utfstring       */
586         u->hashlink = utf_hash.ptr[slot];   /* link in external hashchain         */
587         u->text     = mem_alloc(length + 1);/* allocate memory for utf-text       */
588         memcpy(u->text, text, length);      /* copy utf-text                      */
589         u->text[length] = '\0';
590         utf_hash.ptr[slot] = u;             /* insert symbol into table           */
591
592         utf_hash.entries++;                 /* update number of entries           */
593
594         if (utf_hash.entries > (utf_hash.size * 2)) {
595
596         /* reorganization of hashtable, average length of 
597            the external chains is approx. 2                */  
598
599                 u4 i;
600                 utf *u;
601                 hashtable newhash; /* the new hashtable */
602
603                 /* create new hashtable, double the size */
604                 init_hashtable(&newhash, utf_hash.size * 2);
605                 newhash.entries = utf_hash.entries;
606
607 #ifdef STATISTICS
608                 if (opt_stat)
609                         count_utf_len += sizeof(utf*) * utf_hash.size;
610 #endif
611
612                 /* transfer elements to new hashtable */
613                 for (i = 0; i < utf_hash.size; i++) {
614                         u = (utf *) utf_hash.ptr[i];
615                         while (u) {
616                                 utf *nextu = u->hashlink;
617                                 u4 slot = utf_hashkey(u->text, u->blength) & (newhash.size - 1);
618                                                 
619                                 u->hashlink = (utf *) newhash.ptr[slot];
620                                 newhash.ptr[slot] = u;
621
622                                 /* follow link in external hash chain */
623                                 u = nextu;
624                         }
625                 }
626         
627                 /* dispose old table */
628                 MFREE(utf_hash.ptr, void*, utf_hash.size);
629                 utf_hash = newhash;
630         }
631
632         return u;
633 }
634
635
636 /********************* function: utf_new_char ********************************
637
638     creates a new utf symbol, the text for this symbol is passed
639     as a c-string ( = char* )
640
641 ******************************************************************************/
642
643 utf *utf_new_char(const char *text)
644 {
645         return utf_new(text, strlen(text));
646 }
647
648
649 /********************* function: utf_new_char ********************************
650
651     creates a new utf symbol, the text for this symbol is passed
652     as a c-string ( = char* )
653     "." characters are going to be replaced by "/". since the above function is
654     used often, this is a separte function, instead of an if
655
656 ******************************************************************************/
657
658 utf *utf_new_char_classname(const char *text)
659 {
660         if (strchr(text, '.')) {
661                 char *txt = strdup(text);
662                 char *end = txt + strlen(txt);
663                 char *c;
664                 utf *tmpRes;
665                 for (c = txt; c < end; c++)
666                         if (*c == '.') *c = '/';
667                 tmpRes = utf_new(txt, strlen(txt));
668                 free(txt);
669                 return tmpRes;
670
671         } else
672                 return utf_new(text, strlen(text));
673 }
674
675
676 /************************** Funktion: utf_show ******************************
677
678     writes the utf symbols in the utfhash to stdout and
679     displays the number of external hash chains grouped 
680     according to the chainlength
681     (debugging purposes)
682
683 *****************************************************************************/
684
685 void utf_show(void)
686 {
687
688 #define CHAIN_LIMIT 20               /* limit for seperated enumeration */
689
690         u4 chain_count[CHAIN_LIMIT]; /* numbers of chains */
691         u4 max_chainlength = 0;      /* maximum length of the chains */
692         u4 sum_chainlength = 0;      /* sum of the chainlengths */
693         u4 beyond_limit = 0;         /* number of utf-symbols in chains with length>=CHAIN_LIMIT-1 */
694         u4 i;
695
696         printf ("UTF-HASH:\n");
697
698         /* show element of utf-hashtable */
699         for (i=0; i<utf_hash.size; i++) {
700                 utf *u = utf_hash.ptr[i];
701                 if (u) {
702                         printf ("SLOT %d: ", (int) i);
703                         while (u) {
704                                 printf ("'");
705                                 utf_display (u);
706                                 printf ("' ");
707                                 u = u->hashlink;
708                         }       
709                         printf ("\n");
710                 }
711                 
712         }
713
714         printf ("UTF-HASH: %d slots for %d entries\n", 
715                         (int) utf_hash.size, (int) utf_hash.entries );
716
717
718         if (utf_hash.entries == 0)
719                 return;
720
721         printf("chains:\n  chainlength    number of chains    %% of utfstrings\n");
722
723         for (i=0;i<CHAIN_LIMIT;i++)
724                 chain_count[i]=0;
725
726         /* count numbers of hashchains according to their length */
727         for (i=0; i<utf_hash.size; i++) {
728                   
729                 utf *u = (utf*) utf_hash.ptr[i];
730                 u4 chain_length = 0;
731
732                 /* determine chainlength */
733                 while (u) {
734                         u = u->hashlink;
735                         chain_length++;
736                 }
737
738                 /* update sum of all chainlengths */
739                 sum_chainlength+=chain_length;
740
741                 /* determine the maximum length of the chains */
742                 if (chain_length>max_chainlength)
743                         max_chainlength = chain_length;
744
745                 /* update number of utf-symbols in chains with length>=CHAIN_LIMIT-1 */
746                 if (chain_length>=CHAIN_LIMIT) {
747                         beyond_limit+=chain_length;
748                         chain_length=CHAIN_LIMIT-1;
749                 }
750
751                 /* update number of hashchains of current length */
752                 chain_count[chain_length]++;
753         }
754
755         /* display results */  
756         for (i=1;i<CHAIN_LIMIT-1;i++) 
757                 printf("       %2d %17d %18.2f%%\n",i,chain_count[i],(((float) chain_count[i]*i*100)/utf_hash.entries));
758           
759         printf("     >=%2d %17d %18.2f%%\n",CHAIN_LIMIT-1,chain_count[CHAIN_LIMIT-1],((float) beyond_limit*100)/utf_hash.entries);
760
761
762         printf("max. chainlength:%5d\n",max_chainlength);
763
764         /* avg. chainlength = sum of chainlengths / number of chains */
765         printf("avg. chainlength:%5.2f\n",(float) sum_chainlength / (utf_hash.size-chain_count[0]));
766 }
767
768 /******************************************************************************
769 *********************** Misc support functions ********************************
770 ******************************************************************************/
771
772
773 /******************** Function: desc_to_type **********************************
774    
775         Determines the corresponding Java base data type for a given type
776         descriptor.
777         
778 ******************************************************************************/
779
780 u2 desc_to_type(utf *descriptor)
781 {
782         char *utf_ptr = descriptor->text;  /* current position in utf text */
783         char logtext[MAXLOGTEXT];
784
785         if (descriptor->blength < 1) panic("Type-Descriptor is empty string");
786         
787         switch (*utf_ptr++) {
788         case 'B': 
789         case 'C':
790         case 'I':
791         case 'S':  
792         case 'Z':  return TYPE_INT;
793         case 'D':  return TYPE_DOUBLE;
794         case 'F':  return TYPE_FLOAT;
795         case 'J':  return TYPE_LONG;
796         case 'L':
797         case '[':  return TYPE_ADDRESS;
798         }
799                         
800         sprintf(logtext, "Invalid Type-Descriptor: ");
801         utf_sprint(logtext+strlen(logtext), descriptor);
802         error("%s",logtext);
803
804         return 0;
805 }
806
807
808 /********************** Function: desc_typesize *******************************
809
810         Calculates the lenght in bytes needed for a data element of the type given
811         by its type descriptor.
812         
813 ******************************************************************************/
814
815 u2 desc_typesize(utf *descriptor)
816 {
817         switch (desc_to_type(descriptor)) {
818         case TYPE_INT:     return 4;
819         case TYPE_LONG:    return 8;
820         case TYPE_FLOAT:   return 4;
821         case TYPE_DOUBLE:  return 8;
822         case TYPE_ADDRESS: return sizeof(voidptr);
823         default:           return 0;
824         }
825 }
826
827
828 /********************** function: utf_nextu2 *********************************
829
830     read the next unicode character from the utf string and
831     increment the utf-string pointer accordingly
832
833 ******************************************************************************/
834
835 u2 utf_nextu2(char **utf_ptr) 
836 {
837     /* uncompressed unicode character */
838     u2 unicode_char = 0;
839     /* current position in utf text */  
840     unsigned char *utf = (unsigned char *) (*utf_ptr);
841     /* bytes representing the unicode character */
842     unsigned char ch1, ch2, ch3;
843     /* number of bytes used to represent the unicode character */
844     int len = 0;
845         
846     switch ((ch1 = utf[0]) >> 4) {
847         default: /* 1 byte */
848                 (*utf_ptr)++;
849                 return (u2) ch1;
850         case 0xC: 
851         case 0xD: /* 2 bytes */
852                 if (((ch2 = utf[1]) & 0xC0) == 0x80) {
853                         unsigned char high = ch1 & 0x1F;
854                         unsigned char low  = ch2 & 0x3F;
855                         unicode_char = (high << 6) + low;
856                         len = 2;
857                 }
858                 break;
859
860         case 0xE: /* 2 or 3 bytes */
861                 if (((ch2 = utf[1]) & 0xC0) == 0x80) {
862                         if (((ch3 = utf[2]) & 0xC0) == 0x80) {
863                                 unsigned char low  = ch3 & 0x3f;
864                                 unsigned char mid  = ch2 & 0x3f;
865                                 unsigned char high = ch1 & 0x0f;
866                                 unicode_char = (((high << 6) + mid) << 6) + low;
867                                 len = 3;
868                         } else
869                                 len = 2;                                           
870                 }
871                 break;
872     }
873
874     /* update position in utf-text */
875     *utf_ptr = (char *) (utf + len);
876     return unicode_char;
877 }
878
879
880 /********************* function: is_valid_utf ********************************
881
882     return true if the given string is a valid UTF-8 string
883
884     utf_ptr...points to first character
885     end_pos...points after last character
886
887 ******************************************************************************/
888
889 static unsigned long min_codepoint[6] = {0,1L<<7,1L<<11,1L<<16,1L<<21,1L<<26};
890
891 bool
892 is_valid_utf(char *utf_ptr,char *end_pos)
893 {
894         int bytes;
895         int len,i;
896         char c;
897         unsigned long v;
898
899         if (end_pos < utf_ptr) return false;
900         bytes = end_pos - utf_ptr;
901         while (bytes--) {
902                 c = *utf_ptr++;
903                 /*dolog("%c %02x",c,c);*/
904                 if (!c) return false;                     /* 0x00 is not allowed */
905                 if ((c & 0x80) == 0) continue;            /* ASCII */
906
907                 if      ((c & 0xe0) == 0xc0) len = 1;     /* 110x xxxx */
908                 else if ((c & 0xf0) == 0xe0) len = 2;     /* 1110 xxxx */
909                 else if ((c & 0xf8) == 0xf0) len = 3;     /* 1111 0xxx */
910                 else if ((c & 0xfc) == 0xf8) len = 4;     /* 1111 10xx */
911                 else if ((c & 0xfe) == 0xfc) len = 5;     /* 1111 110x */
912                 else return false;                        /* invalid leading byte */
913
914                 if (len > 2) return false;                /* Java limitation */
915
916                 v = (unsigned long)c & (0x3f >> len);
917                 
918                 if ((bytes -= len) < 0) return false;     /* missing bytes */
919
920                 for (i = len; i--; ) {
921                         c = *utf_ptr++;
922                         /*dolog("    %c %02x",c,c);*/
923                         if ((c & 0xc0) != 0x80)               /* 10xx xxxx */
924                                 return false;
925                         v = (v<<6) | (c & 0x3f);
926                 }
927
928                 /*              dolog("v=%d",v);*/
929
930                 if (v == 0) {
931                         if (len != 1) return false;           /* Java special */
932                 }
933                 else {
934                         /* Sun Java seems to allow overlong UTF-8 encodings */
935                         
936                         if (v < min_codepoint[len]) { /* overlong UTF-8 */
937                                 if (!opt_liberalutf)
938                                         fprintf(stderr,"WARNING: Overlong UTF-8 sequence found.\n");
939                                 /* XXX change this to panic? */
940                         }
941                 }
942
943                 /* surrogates in UTF-8 seem to be allowed in Java classfiles */
944                 /* if (v >= 0xd800 && v <= 0xdfff) return false; */ /* surrogates */
945
946                 /* even these seem to be allowed */
947                 /* if (v == 0xfffe || v == 0xffff) return false; */ /* invalid codepoints */
948         }
949
950         return true;
951 }
952  
953 /********************* function: is_valid_name *******************************
954
955     return true if the given string may be used as a class/field/method name.
956     (Currently this only disallows empty strings and control characters.)
957
958     NOTE: The string is assumed to have passed is_valid_utf!
959
960     utf_ptr...points to first character
961     end_pos...points after last character
962
963 ******************************************************************************/
964
965 bool
966 is_valid_name(char *utf_ptr,char *end_pos)
967 {
968         if (end_pos <= utf_ptr) return false; /* disallow empty names */
969
970         while (utf_ptr < end_pos) {
971                 unsigned char c = *utf_ptr++;
972
973                 if (c < 0x20) return false; /* disallow control characters */
974                 if (c == 0xc0 && (unsigned char)*utf_ptr == 0x80) return false; /* disallow zero */
975         }
976         return true;
977 }
978
979 bool
980 is_valid_name_utf(utf *u)
981 {
982         return is_valid_name(u->text,utf_end(u));
983 }
984
985 /******************** Function: class_new **************************************
986
987     searches for the class with the specified name in the classes hashtable,
988     if there is no such class a new classinfo structure is created and inserted
989     into the list of classes to be loaded
990
991 *******************************************************************************/
992
993 classinfo *class_new_intern(utf *classname)
994 {
995         classinfo *c;     /* hashtable element */
996         u4 key;           /* hashkey computed from classname */
997         u4 slot;          /* slot in hashtable */
998         u2 i;
999
1000         key  = utf_hashkey(classname->text, classname->blength);
1001         slot = key & (class_hash.size - 1);
1002         c    = class_hash.ptr[slot];
1003
1004         /* search external hash chain for the class */
1005         while (c) {
1006                 if (c->name->blength == classname->blength) {
1007                         for (i = 0; i < classname->blength; i++)
1008                                 if (classname->text[i] != c->name->text[i]) goto nomatch;
1009                                                 
1010                         /* class found in hashtable */
1011                         return c;
1012                 }
1013                         
1014         nomatch:
1015                 c = c->hashlink; /* next element in external chain */
1016         }
1017
1018         /* location in hashtable found, create new classinfo structure */
1019
1020 #if defined(STATISTICS)
1021         if (opt_stat)
1022                 count_class_infos += sizeof(classinfo);
1023 #endif
1024
1025         if (initverbose) {
1026                 char logtext[MAXLOGTEXT];
1027                 sprintf(logtext, "Creating class: ");
1028                 utf_sprint_classname(logtext + strlen(logtext), classname);
1029                 log_text(logtext);
1030         }
1031
1032         c = GCNEW(classinfo, 1); /*JOWENN: NEW*/
1033         /*c=NEW(classinfo);*/
1034         c->vmClass = 0;
1035         c->flags = 0;
1036         c->name = classname;
1037         c->packagename = NULL;
1038         c->cpcount = 0;
1039         c->cptags = NULL;
1040         c->cpinfos = NULL;
1041         c->super = NULL;
1042         c->sub = NULL;
1043         c->nextsub = NULL;
1044         c->interfacescount = 0;
1045         c->interfaces = NULL;
1046         c->fieldscount = 0;
1047         c->fields = NULL;
1048         c->methodscount = 0;
1049         c->methods = NULL;
1050         c->linked = false;
1051         c->loaded = false;
1052         c->index = 0;
1053         c->instancesize = 0;
1054         c->header.vftbl = NULL;
1055         c->innerclasscount = 0;
1056         c->innerclass = NULL;
1057         c->vftbl = NULL;
1058         c->initialized = false;
1059         c->initializing = false;
1060         c->classvftbl = false;
1061     c->classUsed = 0;
1062     c->impldBy = NULL;
1063         c->classloader = NULL;
1064         c->sourcefile = NULL;
1065         
1066         /* insert class into the hashtable */
1067         c->hashlink = class_hash.ptr[slot];
1068         class_hash.ptr[slot] = c;
1069
1070         /* update number of hashtable-entries */
1071         class_hash.entries++;
1072
1073         if (class_hash.entries > (class_hash.size * 2)) {
1074
1075                 /* reorganization of hashtable, average length of 
1076                    the external chains is approx. 2                */  
1077
1078                 u4 i;
1079                 classinfo *c;
1080                 hashtable newhash;  /* the new hashtable */
1081
1082                 /* create new hashtable, double the size */
1083                 init_hashtable(&newhash, class_hash.size * 2);
1084                 newhash.entries = class_hash.entries;
1085
1086                 /* transfer elements to new hashtable */
1087                 for (i = 0; i < class_hash.size; i++) {
1088                         c = (classinfo *) class_hash.ptr[i];
1089                         while (c) {
1090                                 classinfo *nextc = c->hashlink;
1091                                 u4 slot = (utf_hashkey(c->name->text, c->name->blength)) & (newhash.size - 1);
1092                                                 
1093                                 c->hashlink = newhash.ptr[slot];
1094                                 newhash.ptr[slot] = c;
1095
1096                                 c = nextc;
1097                         }
1098                 }
1099         
1100                 /* dispose old table */ 
1101                 MFREE(class_hash.ptr, void*, class_hash.size);
1102                 class_hash = newhash;
1103         }
1104
1105     /* Array classes need further initialization. */
1106     if (c->name->text[0] == '[') {
1107                 /* Array classes are not loaded from classfiles. */
1108                 c->loaded = true;
1109         class_new_array(c);
1110                 c->packagename = array_packagename;
1111
1112         } else {
1113                 /* Find the package name */
1114                 /* Classes in the unnamed package keep packagename == NULL. */
1115                 char *p = utf_end(c->name) - 1;
1116                 char *start = c->name->text;
1117                 for (;p > start; --p) {
1118                         if (*p == '/') {
1119                                 c->packagename = utf_new (start, p - start);
1120                                 break;
1121                         }
1122                 }
1123         }
1124 #if defined(USE_THREADS) && defined(NATIVE_THREADS)
1125         initObjectLock(&c->header);
1126 #endif
1127
1128         return c;
1129 }
1130
1131
1132 classinfo *class_new(utf *classname)
1133 {
1134     classinfo *c;
1135
1136 #if defined(USE_THREADS) && defined(NATIVE_THREADS)
1137     tables_lock();
1138 #endif
1139
1140     c = class_new_intern(classname);
1141
1142         /* we support eager class loading and linking on demand */
1143
1144         if (opt_eager) {
1145                 classinfo *tc;
1146                 classinfo *tmp;
1147
1148                 list_init(&unlinkedclasses, OFFSET(classinfo, listnode));
1149
1150                 if (!c->loaded) {
1151                         if (!class_load(c)) {
1152 #if defined(USE_THREADS) && defined(NATIVE_THREADS)
1153                                 tables_unlock();
1154 #endif
1155                                 return c;
1156                         }
1157                 }
1158
1159                 /* link all referenced classes */
1160
1161                 tc = list_first(&unlinkedclasses);
1162
1163                 while (tc) {
1164                         /* skip the current loaded/linked class */
1165                         if (tc != c) {
1166                                 if (!class_link(tc)) {
1167 #if defined(USE_THREADS) && defined(NATIVE_THREADS)
1168                                         tables_unlock();
1169 #endif
1170                                         return c;
1171                                 }
1172                         }
1173
1174                         /* we need a tmp variable here, because list_remove sets prev and
1175                            next to NULL */
1176                         tmp = list_next(&unlinkedclasses, tc);
1177                         list_remove(&unlinkedclasses, tc);
1178                         tc = tmp;
1179                 }
1180
1181                 if (!c->linked) {
1182                         if (!class_link(c)) {
1183 #if defined(USE_THREADS) && defined(NATIVE_THREADS)
1184                                 tables_unlock();
1185 #endif
1186                                 return c;
1187                         }
1188                 }
1189         }
1190
1191 #if defined(USE_THREADS) && defined(NATIVE_THREADS)
1192     tables_unlock();
1193 #endif
1194
1195     return c;
1196 }
1197
1198
1199 /******************** Function: class_get **************************************
1200
1201     searches for the class with the specified name in the classes hashtable
1202     if there is no such class NULL is returned
1203
1204 *******************************************************************************/
1205
1206 classinfo *class_get(utf *classname)
1207 {
1208         classinfo *c;  /* hashtable element */ 
1209         u4 key;        /* hashkey computed from classname */   
1210         u4 slot;       /* slot in hashtable */
1211         u2 i;  
1212
1213         key  = utf_hashkey(classname->text, classname->blength);
1214         slot = key & (class_hash.size-1);
1215         c    = class_hash.ptr[slot];
1216
1217         /* search external hash-chain */
1218         while (c) {
1219                 if (c->name->blength == classname->blength) {
1220                         /* compare classnames */
1221                         for (i = 0; i < classname->blength; i++) 
1222                                 if (classname->text[i] != c->name->text[i])
1223                                         goto nomatch;
1224
1225                         /* class found in hashtable */                          
1226                         return c;
1227                 }
1228                         
1229         nomatch:
1230                 c = c->hashlink;
1231         }
1232
1233         /* class not found */
1234         return NULL;
1235 }
1236
1237
1238 /* class_remove ****************************************************************
1239
1240    removes the class entry wth the specified name in the classes hashtable,
1241    furthermore the class' resources are freed
1242    if there is no such class false is returned
1243
1244 *******************************************************************************/
1245
1246 bool class_remove(classinfo *c)
1247 {
1248         classinfo *tc;  /* hashtable element */
1249         classinfo *pc;
1250         u4 key;         /* hashkey computed from classname */   
1251         u4 slot;        /* slot in hashtable */
1252         u2 i;  
1253
1254         key  = utf_hashkey(c->name->text, c->name->blength);
1255         slot = key & (class_hash.size - 1);
1256         tc   = class_hash.ptr[slot];
1257         pc   = NULL;
1258
1259         /* search external hash-chain */
1260         while (tc) {
1261                 if (tc->name->blength == c->name->blength) {
1262                         
1263                         /* compare classnames */
1264                         for (i = 0; i < c->name->blength; i++)
1265                                 if (tc->name->text[i] != c->name->text[i])
1266                                         goto nomatch;
1267
1268                         /* class found in hashtable */
1269                         if (!pc) {
1270                                 class_hash.ptr[slot] = tc->hashlink;
1271
1272                         } else {
1273                                 pc->hashlink = tc->hashlink;
1274                         }
1275
1276                         class_free(tc);
1277
1278                         return true;
1279                 }
1280                         
1281         nomatch:
1282                 pc = tc;
1283                 tc = tc->hashlink;
1284         }
1285
1286         /* class not found */
1287         return false;
1288 }
1289
1290
1291 /***************** Function: class_array_of ***********************************
1292
1293     Returns an array class with the given component class.
1294     The array class is dynamically created if neccessary.
1295
1296 *******************************************************************************/
1297
1298 classinfo *class_array_of(classinfo *component)
1299 {
1300     int namelen;
1301     char *namebuf;
1302         classinfo *c;
1303
1304     /* Assemble the array class name */
1305     namelen = component->name->blength;
1306     
1307     if (component->name->text[0] == '[') {
1308         /* the component is itself an array */
1309         namebuf = DMNEW(char, namelen + 1);
1310         namebuf[0] = '[';
1311         memcpy(namebuf + 1, component->name->text, namelen);
1312         namelen++;
1313
1314     } else {
1315         /* the component is a non-array class */
1316         namebuf = DMNEW(char, namelen + 3);
1317         namebuf[0] = '[';
1318         namebuf[1] = 'L';
1319         memcpy(namebuf + 2, component->name->text, namelen);
1320         namebuf[2 + namelen] = ';';
1321         namelen += 3;
1322     }
1323
1324         /* load this class ;-) and link it */
1325         c = class_new(utf_new(namebuf, namelen));
1326         c->loaded = 1;
1327         class_link(c);
1328
1329     return c;
1330 }
1331
1332 /*************** Function: class_multiarray_of ********************************
1333
1334     Returns an array class with the given dimension and element class.
1335     The array class is dynamically created if neccessary.
1336
1337 *******************************************************************************/
1338
1339 classinfo *class_multiarray_of(int dim, classinfo *element)
1340 {
1341     int namelen;
1342     char *namebuf;
1343
1344         if (dim < 1)
1345                 panic("Invalid array dimension requested");
1346
1347     /* Assemble the array class name */
1348     namelen = element->name->blength;
1349     
1350     if (element->name->text[0] == '[') {
1351         /* the element is itself an array */
1352         namebuf = DMNEW(char, namelen + dim);
1353         memcpy(namebuf + dim, element->name->text, namelen);
1354         namelen += dim;
1355     }
1356     else {
1357         /* the element is a non-array class */
1358         namebuf = DMNEW(char, namelen + 2 + dim);
1359         namebuf[dim] = 'L';
1360         memcpy(namebuf + dim + 1, element->name->text, namelen);
1361         namelen += (2 + dim);
1362         namebuf[namelen - 1] = ';';
1363     }
1364         memset(namebuf, '[', dim);
1365
1366     return class_new(utf_new(namebuf, namelen));
1367 }
1368
1369 /************************** function: utf_strlen ******************************
1370
1371     determine number of unicode characters in the utf string
1372
1373 *******************************************************************************/
1374
1375 u4 utf_strlen(utf *u) 
1376 {
1377     char *endpos;                   /* points behind utf string       */
1378     char *utf_ptr;                  /* current position in utf text   */
1379     u4 len = 0;                     /* number of unicode characters   */
1380
1381         if (!u) {
1382                 *exceptionptr = new_nullpointerexception();
1383                 return 0;
1384         }
1385
1386         endpos = utf_end(u);
1387         utf_ptr = u->text;
1388
1389     while (utf_ptr < endpos) {
1390                 len++;
1391                 /* next unicode character */
1392                 utf_nextu2(&utf_ptr);
1393     }
1394
1395     if (utf_ptr != endpos)
1396         /* string ended abruptly */
1397                 panic("illegal utf string"); 
1398
1399     return len;
1400 }
1401
1402
1403 /*
1404  * These are local overrides for various environment variables in Emacs.
1405  * Please do not remove this and leave it at the end of the file, where
1406  * Emacs will automagically detect them.
1407  * ---------------------------------------------------------------------
1408  * Local variables:
1409  * mode: c
1410  * indent-tabs-mode: t
1411  * c-basic-offset: 4
1412  * tab-width: 4
1413  * End:
1414  */