Handle null pointers in utf_display, utf_display_classname, utf_sprint and
[cacao.git] / src / vm / tables.c
1 /* tables.c - 
2
3    Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003
4    R. Grafl, A. Krall, C. Kruegel, C. Oates, R. Obermaisser,
5    M. Probst, S. Ring, E. Steiner, C. Thalinger, D. Thuernbeck,
6    P. Tomsich, J. Wenninger
7
8    This file is part of CACAO.
9
10    This program is free software; you can redistribute it and/or
11    modify it under the terms of the GNU General Public License as
12    published by the Free Software Foundation; either version 2, or (at
13    your option) any later version.
14
15    This program is distributed in the hope that it will be useful, but
16    WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18    General Public License for more details.
19
20    You should have received a copy of the GNU General Public License
21    along with this program; if not, write to the Free Software
22    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
23    02111-1307, USA.
24
25    Contact: cacao@complang.tuwien.ac.at
26
27    Authors: Reinhard Grafl
28
29    Changes: Mark Probst
30             Andreas Krall
31
32    Contains support functions for:
33        - Reading of Java class files
34        - Unicode symbols
35        - the heap
36        - additional support functions
37
38    $Id: tables.c 1482 2004-11-11 14:39:13Z twisti $
39
40 */
41
42 #include "global.h"
43
44 #include <string.h>
45 #include <stdlib.h>
46 #include <assert.h>
47 #include <sys/types.h>
48 #include <sys/mman.h>
49 #include <unistd.h>
50 #include "builtin.h"
51 #include "exceptions.h"
52 #include "types.h"
53 #include "native.h"
54 #include "options.h"
55 #include "tables.h"
56 #include "loader.h"
57 #include "asmpart.h"
58 #include "statistics.h"
59 #include "threads/thread.h"
60 #include "threads/locks.h"
61 #include "toolbox/logging.h"
62 #include "toolbox/memory.h"
63
64
65 hashtable utf_hash;     /* hashtable for utf8-symbols */
66 hashtable string_hash;  /* hashtable for javastrings  */
67 hashtable class_hash;   /* hashtable for classes      */
68
69 list unlinkedclasses;   /* this is only used for eager class loading          */
70
71
72 /******************************************************************************
73  *********************** hashtable functions **********************************
74  ******************************************************************************/
75
76 /* hashsize must be power of 2 */
77
78 #define UTF_HASHSTART   16384   /* initial size of utf-hash */    
79 #define HASHSTART        2048   /* initial size of javastring and class-hash */
80
81
82 /******************** function: init_hashtable ******************************
83
84     Initializes a hashtable structure and allocates memory.
85     The parameter size specifies the initial size of the hashtable.
86         
87 *****************************************************************************/
88
89 void init_hashtable(hashtable *hash, u4 size)
90 {
91         u4 i;
92
93         hash->entries = 0;
94         hash->size    = size;
95         hash->ptr     = MNEW(void*, size);
96
97         /* clear table */
98         for (i = 0; i < size; i++) hash->ptr[i] = NULL;
99 }
100
101
102 /*********************** function: tables_init  *****************************
103
104     creates hashtables for symboltables 
105         (called once at startup)                         
106         
107 *****************************************************************************/
108
109 void tables_init()
110 {
111         init_hashtable(&utf_hash,    UTF_HASHSTART);  /* hashtable for utf8-symbols */
112         init_hashtable(&string_hash, HASHSTART);      /* hashtable for javastrings */
113         init_hashtable(&class_hash,  HASHSTART);      /* hashtable for classes */ 
114
115 /*      if (opt_eager) */
116 /*              list_init(&unlinkedclasses, OFFSET(classinfo, listnode)); */
117
118 #if defined(STATISTICS)
119         if (opt_stat)
120                 count_utf_len += sizeof(utf*) * utf_hash.size;
121 #endif
122 }
123
124
125 /********************** function: tables_close ******************************
126
127         free memory for hashtables                    
128         
129 *****************************************************************************/
130
131 void tables_close()
132 {
133         utf *u = NULL;
134         literalstring *s;
135         u4 i;
136         
137         /* dispose utf symbols */
138         for (i = 0; i < utf_hash.size; i++) {
139                 u = utf_hash.ptr[i];
140                 while (u) {
141                         /* process elements in external hash chain */
142                         utf *nextu = u->hashlink;
143                         MFREE(u->text, u1, u->blength);
144                         FREE(u, utf);
145                         u = nextu;
146                 }       
147         }
148
149         /* dispose javastrings */
150         for (i = 0; i < string_hash.size; i++) {
151                 s = string_hash.ptr[i];
152                 while (u) {
153                         /* process elements in external hash chain */
154                         literalstring *nexts = s->hashlink;
155                         literalstring_free(s->string);
156                         FREE(s, literalstring);
157                         s = nexts;
158                 }       
159         }
160
161         /* dispose hashtable structures */
162         MFREE(utf_hash.ptr,    void*, utf_hash.size);
163         MFREE(string_hash.ptr, void*, string_hash.size);
164         MFREE(class_hash.ptr,  void*, class_hash.size);
165 }
166
167
168 /********************* function: utf_display *********************************
169
170         write utf symbol to stdout (debugging purposes)
171
172 *******************************************************************************/
173
174 void utf_display(utf *u)
175 {
176     char *endpos;                       /* points behind utf string           */
177     char *utf_ptr;                      /* current position in utf text       */
178
179         if (!u) {
180                 printf("NULL");
181                 fflush(stdout);
182                 return;
183         }
184
185     endpos = utf_end(u);
186     utf_ptr = u->text;
187
188     while (utf_ptr < endpos) {
189                 /* read next unicode character */                
190                 u2 c = utf_nextu2(&utf_ptr);
191                 if (c >= 32 && c <= 127) printf("%c", c);
192                 else printf("?");
193         }
194
195         fflush(stdout);
196 }
197
198
199 /* utf_display_classname *******************************************************
200
201    write utf symbol to stdout (debugging purposes)
202
203 *******************************************************************************/
204
205 void utf_display_classname(utf *u)
206 {
207     char *endpos;                       /* points behind utf string           */
208     char *utf_ptr;                      /* current position in utf text       */
209
210         if (!u) {
211                 printf("NULL");
212                 fflush(stdout);
213                 return;
214         }
215
216     endpos = utf_end(u);
217     utf_ptr = u->text;
218
219     while (utf_ptr < endpos) {
220                 /* read next unicode character */                
221                 u2 c = utf_nextu2(&utf_ptr);
222                 if (c == '/') c = '.';
223                 if (c >= 32 && c <= 127) printf("%c", c);
224                 else printf("?");
225         }
226
227         fflush(stdout);
228 }
229
230
231 /************************* function: log_utf *********************************
232
233         log utf symbol
234
235 ******************************************************************************/
236
237 void log_utf(utf *u)
238 {
239         char buf[MAXLOGTEXT];
240         utf_sprint(buf, u);
241         dolog("%s", buf);
242 }
243
244
245 /********************** function: log_plain_utf ******************************
246
247         log utf symbol (without printing "LOG: " and newline)
248
249 ******************************************************************************/
250
251 void log_plain_utf(utf *u)
252 {
253         char buf[MAXLOGTEXT];
254         utf_sprint(buf, u);
255         dolog_plain("%s", buf);
256 }
257
258
259 /* utf_sprint ******************************************************************
260         
261    write utf symbol into c-string (debugging purposes)
262
263 *******************************************************************************/
264
265 void utf_sprint(char *buffer, utf *u)
266 {
267     char *endpos;                       /* points behind utf string           */
268     char *utf_ptr;                      /* current position in utf text       */
269     u2 pos = 0;                         /* position in c-string               */
270
271         if (!u) {
272                 memcpy(buffer, "NULL", 5);      /* 4 chars + terminating \0           */
273                 return;
274         }
275
276     endpos = utf_end(u);
277     utf_ptr = u->text;
278
279     while (utf_ptr < endpos) 
280                 /* copy next unicode character */       
281                 buffer[pos++] = utf_nextu2(&utf_ptr);
282
283     /* terminate string */
284     buffer[pos] = '\0';
285 }
286
287
288 /* utf_sprint_classname ********************************************************
289         
290    write utf symbol into c-string (debugging purposes)
291
292 *******************************************************************************/
293
294 void utf_sprint_classname(char *buffer, utf *u)
295 {
296     char *endpos;                       /* points behind utf string           */
297     char *utf_ptr;                      /* current position in utf text       */
298     u2 pos = 0;                         /* position in c-string               */
299
300         if (!u) {
301                 memcpy(buffer, "NULL", 5);      /* 4 chars + terminating \0           */
302                 return;
303         }
304
305     endpos = utf_end(u);
306     utf_ptr = u->text;
307
308     while (utf_ptr < endpos) {
309                 /* copy next unicode character */       
310                 u2 c = utf_nextu2(&utf_ptr);
311                 if (c == '/') c = '.';
312                 buffer[pos++] = c;
313         }
314
315     /* terminate string */
316     buffer[pos] = '\0';
317 }
318
319
320 /********************* Funktion: utf_fprint **********************************
321         
322     write utf symbol into file          
323
324 ******************************************************************************/
325
326 void utf_fprint(FILE *file, utf *u)
327 {
328     char *endpos  = utf_end(u);  /* points behind utf string       */
329     char *utf_ptr = u->text;     /* current position in utf text   */ 
330
331     if (!u)
332                 return;
333
334     while (utf_ptr < endpos) { 
335                 /* read next unicode character */                
336                 u2 c = utf_nextu2(&utf_ptr);                            
337
338                 if (c >= 32 && c <= 127) fprintf(file, "%c", c);
339                 else fprintf(file, "?");
340         }
341 }
342
343
344 /********************* Funktion: utf_fprint **********************************
345         
346     write utf symbol into file          
347
348 ******************************************************************************/
349
350 void utf_fprint_classname(FILE *file, utf *u)
351 {
352     char *endpos  = utf_end(u);  /* points behind utf string       */
353     char *utf_ptr = u->text;     /* current position in utf text   */ 
354
355     if (!u)
356                 return;
357
358     while (utf_ptr < endpos) { 
359                 /* read next unicode character */                
360                 u2 c = utf_nextu2(&utf_ptr);                            
361                 if (c == '/') c = '.';
362
363                 if (c >= 32 && c <= 127) fprintf(file, "%c", c);
364                 else fprintf(file, "?");
365         }
366 }
367
368
369 /****************** internal function: utf_hashkey ***************************
370
371         The hashkey is computed from the utf-text by using up to 8 characters.
372         For utf-symbols longer than 15 characters 3 characters are taken from
373         the beginning and the end, 2 characters are taken from the middle.
374
375 ******************************************************************************/ 
376
377 #define nbs(val) ((u4) *(++text) << val) /* get next byte, left shift by val  */
378 #define fbs(val) ((u4) *(  text) << val) /* get first byte, left shift by val */
379
380 static u4 utf_hashkey(char *text, u4 length)
381 {
382         char *start_pos = text; /* pointer to utf text */
383         u4 a;
384
385         switch (length) {               
386                 
387         case 0: /* empty string */
388                 return 0;
389
390         case 1: return fbs(0);
391         case 2: return fbs(0) ^ nbs(3);
392         case 3: return fbs(0) ^ nbs(3) ^ nbs(5);
393         case 4: return fbs(0) ^ nbs(2) ^ nbs(4) ^ nbs(6);
394         case 5: return fbs(0) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(6);
395         case 6: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(5) ^ nbs(6);
396         case 7: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(5) ^ nbs(6);
397         case 8: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(5) ^ nbs(6) ^ nbs(7);
398
399         case 9:
400                 a = fbs(0);
401                 a ^= nbs(1);
402                 a ^= nbs(2);
403                 text++;
404                 return a ^ nbs(4) ^ nbs(5) ^ nbs(6) ^ nbs(7) ^ nbs(8);
405
406         case 10:
407                 a = fbs(0);
408                 text++;
409                 a ^= nbs(2);
410                 a ^= nbs(3);
411                 a ^= nbs(4);
412                 text++;
413                 return a ^ nbs(6) ^ nbs(7) ^ nbs(8) ^ nbs(9);
414
415         case 11:
416                 a = fbs(0);
417                 text++;
418                 a ^= nbs(2);
419                 a ^= nbs(3);
420                 a ^= nbs(4);
421                 text++;
422                 return a ^ nbs(6) ^ nbs(7) ^ nbs(8) ^ nbs(9) ^ nbs(10);
423
424         case 12:
425                 a = fbs(0);
426                 text += 2;
427                 a ^= nbs(2);
428                 a ^= nbs(3);
429                 text++;
430                 a ^= nbs(5);
431                 a ^= nbs(6);
432                 a ^= nbs(7);
433                 text++;
434                 return a ^ nbs(9) ^ nbs(10);
435
436         case 13:
437                 a = fbs(0);
438                 a ^= nbs(1);
439                 text++;
440                 a ^= nbs(3);
441                 a ^= nbs(4);
442                 text += 2;      
443                 a ^= nbs(7);
444                 a ^= nbs(8);
445                 text += 2;
446                 return a ^ nbs(9) ^ nbs(10);
447
448         case 14:
449                 a = fbs(0);
450                 text += 2;      
451                 a ^= nbs(3);
452                 a ^= nbs(4);
453                 text += 2;      
454                 a ^= nbs(7);
455                 a ^= nbs(8);
456                 text += 2;
457                 return a ^ nbs(9) ^ nbs(10) ^ nbs(11);
458
459         case 15:
460                 a = fbs(0);
461                 text += 2;      
462                 a ^= nbs(3);
463                 a ^= nbs(4);
464                 text += 2;      
465                 a ^= nbs(7);
466                 a ^= nbs(8);
467                 text += 2;
468                 return a ^ nbs(9) ^ nbs(10) ^ nbs(11);
469
470         default:  /* 3 characters from beginning */
471                 a = fbs(0);
472                 text += 2;
473                 a ^= nbs(3);
474                 a ^= nbs(4);
475
476                 /* 2 characters from middle */
477                 text = start_pos + (length / 2);
478                 a ^= fbs(5);
479                 text += 2;
480                 a ^= nbs(6);    
481
482                 /* 3 characters from end */
483                 text = start_pos + length - 4;
484
485                 a ^= fbs(7);
486                 text++;
487
488                 return a ^ nbs(10) ^ nbs(11);
489     }
490 }
491
492
493 /*************************** function: utf_hashkey ***************************
494
495     compute the hashkey of a unicode string
496
497 ******************************************************************************/ 
498
499 u4 unicode_hashkey(u2 *text, u2 len)
500 {
501         return utf_hashkey((char*) text, len);
502 }
503
504
505 /************************ function: utf_new **********************************
506
507         Creates a new utf-symbol, the text of the symbol is passed as a 
508         u1-array. The function searches the utf-hashtable for a utf-symbol 
509         with this text. On success the element returned, otherwise a new 
510         hashtable element is created.
511
512         If the number of entries in the hashtable exceeds twice the size of the
513         hashtable slots a reorganization of the hashtable is done and the utf 
514         symbols are copied to a new hashtable with doubled size.
515
516 ******************************************************************************/
517
518 utf *utf_new_intern(char *text, u2 length)
519 {
520         u4 key;            /* hashkey computed from utf-text */
521         u4 slot;           /* slot in hashtable */
522         utf *u;            /* hashtable element */
523         u2 i;
524
525 #ifdef STATISTICS
526         if (opt_stat)
527                 count_utf_new++;
528 #endif
529
530         key  = utf_hashkey(text, length);
531         slot = key & (utf_hash.size-1);
532         u    = utf_hash.ptr[slot];
533
534         /* search external hash chain for utf-symbol */
535         while (u) {
536                 if (u->blength == length) {
537
538                         /* compare text of hashtable elements */
539                         for (i = 0; i < length; i++)
540                                 if (text[i] != u->text[i]) goto nomatch;
541                         
542 #ifdef STATISTICS
543                         if (opt_stat)
544                                 count_utf_new_found++;
545 #endif
546 /*                      log_text("symbol found in hash table");*/
547                         /* symbol found in hashtable */
548 /*                                      utf_display(u);
549                                         {
550                                                 utf blup;
551                                                 blup.blength=length;
552                                                 blup.text=text;
553                                                 utf_display(&blup);
554                                         }*/
555                         return u;
556                 }
557         nomatch:
558                 u = u->hashlink; /* next element in external chain */
559         }
560
561 #ifdef STATISTICS
562         if (opt_stat)
563                 count_utf_len += sizeof(utf) + length;
564 #endif
565
566         /* location in hashtable found, create new utf element */
567         u = NEW(utf);
568         u->blength  = length;               /* length in bytes of utfstring       */
569         u->hashlink = utf_hash.ptr[slot];   /* link in external hashchain         */
570         u->text     = mem_alloc(length + 1);/* allocate memory for utf-text       */
571         memcpy(u->text, text, length);      /* copy utf-text                      */
572         u->text[length] = '\0';
573         utf_hash.ptr[slot] = u;             /* insert symbol into table           */
574
575         utf_hash.entries++;                 /* update number of entries           */
576
577         if (utf_hash.entries > (utf_hash.size * 2)) {
578
579         /* reorganization of hashtable, average length of 
580            the external chains is approx. 2                */  
581
582                 u4 i;
583                 utf *u;
584                 hashtable newhash; /* the new hashtable */
585
586                 /* create new hashtable, double the size */
587                 init_hashtable(&newhash, utf_hash.size * 2);
588                 newhash.entries = utf_hash.entries;
589
590 #ifdef STATISTICS
591                 if (opt_stat)
592                         count_utf_len += sizeof(utf*) * utf_hash.size;
593 #endif
594
595                 /* transfer elements to new hashtable */
596                 for (i = 0; i < utf_hash.size; i++) {
597                         u = (utf *) utf_hash.ptr[i];
598                         while (u) {
599                                 utf *nextu = u->hashlink;
600                                 u4 slot = utf_hashkey(u->text, u->blength) & (newhash.size - 1);
601                                                 
602                                 u->hashlink = (utf *) newhash.ptr[slot];
603                                 newhash.ptr[slot] = u;
604
605                                 /* follow link in external hash chain */
606                                 u = nextu;
607                         }
608                 }
609         
610                 /* dispose old table */
611                 MFREE(utf_hash.ptr, void*, utf_hash.size);
612                 utf_hash = newhash;
613         }
614
615         return u;
616 }
617
618
619 utf *utf_new(char *text, u2 length)
620 {
621     utf *r;
622
623 #if defined(USE_THREADS) && defined(NATIVE_THREADS)
624     tables_lock();
625 #endif
626
627     r = utf_new_intern(text, length);
628
629 #if defined(USE_THREADS) && defined(NATIVE_THREADS)
630     tables_unlock();
631 #endif
632
633     return r;
634 }
635
636
637 /********************* function: utf_new_char ********************************
638
639     creates a new utf symbol, the text for this symbol is passed
640     as a c-string ( = char* )
641
642 ******************************************************************************/
643
644 utf *utf_new_char(char *text)
645 {
646         return utf_new(text, strlen(text));
647 }
648
649
650 /********************* function: utf_new_char ********************************
651
652     creates a new utf symbol, the text for this symbol is passed
653     as a c-string ( = char* )
654     "." characters are going to be replaced by "/". since the above function is
655     used often, this is a separte function, instead of an if
656
657 ******************************************************************************/
658
659 utf *utf_new_char_classname(char *text)
660 {
661         if (strchr(text, '.')) {
662                 char *txt = strdup(text);
663                 char *end = txt + strlen(txt);
664                 char *c;
665                 utf *tmpRes;
666                 for (c = txt; c < end; c++)
667                         if (*c == '.') *c = '/';
668                 tmpRes = utf_new(txt, strlen(txt));
669                 free(txt);
670                 return tmpRes;
671
672         } else
673                 return utf_new(text, strlen(text));
674 }
675
676
677 /************************** Funktion: utf_show ******************************
678
679     writes the utf symbols in the utfhash to stdout and
680     displays the number of external hash chains grouped 
681     according to the chainlength
682     (debugging purposes)
683
684 *****************************************************************************/
685
686 void utf_show()
687 {
688
689 #define CHAIN_LIMIT 20               /* limit for seperated enumeration */
690
691         u4 chain_count[CHAIN_LIMIT]; /* numbers of chains */
692         u4 max_chainlength = 0;      /* maximum length of the chains */
693         u4 sum_chainlength = 0;      /* sum of the chainlengths */
694         u4 beyond_limit = 0;         /* number of utf-symbols in chains with length>=CHAIN_LIMIT-1 */
695         u4 i;
696
697         printf ("UTF-HASH:\n");
698
699         /* show element of utf-hashtable */
700         for (i=0; i<utf_hash.size; i++) {
701                 utf *u = utf_hash.ptr[i];
702                 if (u) {
703                         printf ("SLOT %d: ", (int) i);
704                         while (u) {
705                                 printf ("'");
706                                 utf_display (u);
707                                 printf ("' ");
708                                 u = u->hashlink;
709                         }       
710                         printf ("\n");
711                 }
712                 
713         }
714
715         printf ("UTF-HASH: %d slots for %d entries\n", 
716                         (int) utf_hash.size, (int) utf_hash.entries );
717
718
719         if (utf_hash.entries == 0)
720                 return;
721
722         printf("chains:\n  chainlength    number of chains    %% of utfstrings\n");
723
724         for (i=0;i<CHAIN_LIMIT;i++)
725                 chain_count[i]=0;
726
727         /* count numbers of hashchains according to their length */
728         for (i=0; i<utf_hash.size; i++) {
729                   
730                 utf *u = (utf*) utf_hash.ptr[i];
731                 u4 chain_length = 0;
732
733                 /* determine chainlength */
734                 while (u) {
735                         u = u->hashlink;
736                         chain_length++;
737                 }
738
739                 /* update sum of all chainlengths */
740                 sum_chainlength+=chain_length;
741
742                 /* determine the maximum length of the chains */
743                 if (chain_length>max_chainlength)
744                         max_chainlength = chain_length;
745
746                 /* update number of utf-symbols in chains with length>=CHAIN_LIMIT-1 */
747                 if (chain_length>=CHAIN_LIMIT) {
748                         beyond_limit+=chain_length;
749                         chain_length=CHAIN_LIMIT-1;
750                 }
751
752                 /* update number of hashchains of current length */
753                 chain_count[chain_length]++;
754         }
755
756         /* display results */  
757         for (i=1;i<CHAIN_LIMIT-1;i++) 
758                 printf("       %2d %17d %18.2f%%\n",i,chain_count[i],(((float) chain_count[i]*i*100)/utf_hash.entries));
759           
760         printf("     >=%2d %17d %18.2f%%\n",CHAIN_LIMIT-1,chain_count[CHAIN_LIMIT-1],((float) beyond_limit*100)/utf_hash.entries);
761
762
763         printf("max. chainlength:%5d\n",max_chainlength);
764
765         /* avg. chainlength = sum of chainlengths / number of chains */
766         printf("avg. chainlength:%5.2f\n",(float) sum_chainlength / (utf_hash.size-chain_count[0]));
767 }
768
769 /******************************************************************************
770 *********************** Misc support functions ********************************
771 ******************************************************************************/
772
773
774 /******************** Function: desc_to_type **********************************
775    
776         Determines the corresponding Java base data type for a given type
777         descriptor.
778         
779 ******************************************************************************/
780
781 u2 desc_to_type(utf *descriptor)
782 {
783         char *utf_ptr = descriptor->text;  /* current position in utf text */
784         char logtext[MAXLOGTEXT];
785
786         if (descriptor->blength < 1) panic("Type-Descriptor is empty string");
787         
788         switch (*utf_ptr++) {
789         case 'B': 
790         case 'C':
791         case 'I':
792         case 'S':  
793         case 'Z':  return TYPE_INT;
794         case 'D':  return TYPE_DOUBLE;
795         case 'F':  return TYPE_FLOAT;
796         case 'J':  return TYPE_LONG;
797         case 'L':
798         case '[':  return TYPE_ADDRESS;
799         }
800                         
801         sprintf(logtext, "Invalid Type-Descriptor: ");
802         utf_sprint(logtext+strlen(logtext), descriptor);
803         error("%s",logtext);
804
805         return 0;
806 }
807
808
809 /********************** Function: desc_typesize *******************************
810
811         Calculates the lenght in bytes needed for a data element of the type given
812         by its type descriptor.
813         
814 ******************************************************************************/
815
816 u2 desc_typesize(utf *descriptor)
817 {
818         switch (desc_to_type(descriptor)) {
819         case TYPE_INT:     return 4;
820         case TYPE_LONG:    return 8;
821         case TYPE_FLOAT:   return 4;
822         case TYPE_DOUBLE:  return 8;
823         case TYPE_ADDRESS: return sizeof(voidptr);
824         default:           return 0;
825         }
826 }
827
828
829 /********************** function: utf_nextu2 *********************************
830
831     read the next unicode character from the utf string and
832     increment the utf-string pointer accordingly
833
834 ******************************************************************************/
835
836 u2 utf_nextu2(char **utf_ptr) 
837 {
838     /* uncompressed unicode character */
839     u2 unicode_char = 0;
840     /* current position in utf text */  
841     unsigned char *utf = (unsigned char *) (*utf_ptr);
842     /* bytes representing the unicode character */
843     unsigned char ch1, ch2, ch3;
844     /* number of bytes used to represent the unicode character */
845     int len = 0;
846         
847     switch ((ch1 = utf[0]) >> 4) {
848         default: /* 1 byte */
849                 (*utf_ptr)++;
850                 return (u2) ch1;
851         case 0xC: 
852         case 0xD: /* 2 bytes */
853                 if (((ch2 = utf[1]) & 0xC0) == 0x80) {
854                         unsigned char high = ch1 & 0x1F;
855                         unsigned char low  = ch2 & 0x3F;
856                         unicode_char = (high << 6) + low;
857                         len = 2;
858                 }
859                 break;
860
861         case 0xE: /* 2 or 3 bytes */
862                 if (((ch2 = utf[1]) & 0xC0) == 0x80) {
863                         if (((ch3 = utf[2]) & 0xC0) == 0x80) {
864                                 unsigned char low  = ch3 & 0x3f;
865                                 unsigned char mid  = ch2 & 0x3f;
866                                 unsigned char high = ch1 & 0x0f;
867                                 unicode_char = (((high << 6) + mid) << 6) + low;
868                                 len = 3;
869                         } else
870                                 len = 2;                                           
871                 }
872                 break;
873     }
874
875     /* update position in utf-text */
876     *utf_ptr = (char *) (utf + len);
877     return unicode_char;
878 }
879
880
881 /********************* function: is_valid_utf ********************************
882
883     return true if the given string is a valid UTF-8 string
884
885     utf_ptr...points to first character
886     end_pos...points after last character
887
888 ******************************************************************************/
889
890 static unsigned long min_codepoint[6] = {0,1L<<7,1L<<11,1L<<16,1L<<21,1L<<26};
891
892 bool
893 is_valid_utf(char *utf_ptr,char *end_pos)
894 {
895         int bytes;
896         int len,i;
897         char c;
898         unsigned long v;
899
900         if (end_pos < utf_ptr) return false;
901         bytes = end_pos - utf_ptr;
902         while (bytes--) {
903                 c = *utf_ptr++;
904                 /*dolog("%c %02x",c,c);*/
905                 if (!c) return false;                     /* 0x00 is not allowed */
906                 if ((c & 0x80) == 0) continue;            /* ASCII */
907
908                 if      ((c & 0xe0) == 0xc0) len = 1;     /* 110x xxxx */
909                 else if ((c & 0xf0) == 0xe0) len = 2;     /* 1110 xxxx */
910                 else if ((c & 0xf8) == 0xf0) len = 3;     /* 1111 0xxx */
911                 else if ((c & 0xfc) == 0xf8) len = 4;     /* 1111 10xx */
912                 else if ((c & 0xfe) == 0xfc) len = 5;     /* 1111 110x */
913                 else return false;                        /* invalid leading byte */
914
915                 if (len > 2) return false;                /* Java limitation */
916
917                 v = (unsigned long)c & (0x3f >> len);
918                 
919                 if ((bytes -= len) < 0) return false;     /* missing bytes */
920
921                 for (i = len; i--; ) {
922                         c = *utf_ptr++;
923                         /*dolog("    %c %02x",c,c);*/
924                         if ((c & 0xc0) != 0x80)               /* 10xx xxxx */
925                                 return false;
926                         v = (v<<6) | (c & 0x3f);
927                 }
928
929                 /*              dolog("v=%d",v);*/
930
931                 if (v == 0) {
932                         if (len != 1) return false;           /* Java special */
933                 }
934                 else {
935                         /* Sun Java seems to allow overlong UTF-8 encodings */
936                         
937                         if (v < min_codepoint[len]) { /* overlong UTF-8 */
938                                 if (!opt_liberalutf)
939                                         fprintf(stderr,"WARNING: Overlong UTF-8 sequence found.\n");
940                                 /* XXX change this to panic? */
941                         }
942                 }
943
944                 /* surrogates in UTF-8 seem to be allowed in Java classfiles */
945                 /* if (v >= 0xd800 && v <= 0xdfff) return false; */ /* surrogates */
946
947                 /* even these seem to be allowed */
948                 /* if (v == 0xfffe || v == 0xffff) return false; */ /* invalid codepoints */
949         }
950
951         return true;
952 }
953  
954 /********************* function: is_valid_name *******************************
955
956     return true if the given string may be used as a class/field/method name.
957     (Currently this only disallows empty strings and control characters.)
958
959     NOTE: The string is assumed to have passed is_valid_utf!
960
961     utf_ptr...points to first character
962     end_pos...points after last character
963
964 ******************************************************************************/
965
966 bool
967 is_valid_name(char *utf_ptr,char *end_pos)
968 {
969         if (end_pos <= utf_ptr) return false; /* disallow empty names */
970
971         while (utf_ptr < end_pos) {
972                 unsigned char c = *utf_ptr++;
973
974                 if (c < 0x20) return false; /* disallow control characters */
975                 if (c == 0xc0 && (unsigned char)*utf_ptr == 0x80) return false; /* disallow zero */
976         }
977         return true;
978 }
979
980 bool
981 is_valid_name_utf(utf *u)
982 {
983         return is_valid_name(u->text,utf_end(u));
984 }
985
986 /******************** Function: class_new **************************************
987
988     searches for the class with the specified name in the classes hashtable,
989     if there is no such class a new classinfo structure is created and inserted
990     into the list of classes to be loaded
991
992 *******************************************************************************/
993
994 classinfo *class_new_intern(utf *classname)
995 {
996         classinfo *c;     /* hashtable element */
997         u4 key;           /* hashkey computed from classname */
998         u4 slot;          /* slot in hashtable */
999         u2 i;
1000
1001         key  = utf_hashkey(classname->text, classname->blength);
1002         slot = key & (class_hash.size - 1);
1003         c    = class_hash.ptr[slot];
1004
1005         /* search external hash chain for the class */
1006         while (c) {
1007                 if (c->name->blength == classname->blength) {
1008                         for (i = 0; i < classname->blength; i++)
1009                                 if (classname->text[i] != c->name->text[i]) goto nomatch;
1010                                                 
1011                         /* class found in hashtable */
1012                         return c;
1013                 }
1014                         
1015         nomatch:
1016                 c = c->hashlink; /* next element in external chain */
1017         }
1018
1019         /* location in hashtable found, create new classinfo structure */
1020
1021 #if defined(STATISTICS)
1022         if (opt_stat)
1023                 count_class_infos += sizeof(classinfo);
1024 #endif
1025
1026         if (initverbose) {
1027                 char logtext[MAXLOGTEXT];
1028                 sprintf(logtext, "Creating class: ");
1029                 utf_sprint_classname(logtext + strlen(logtext), classname);
1030                 log_text(logtext);
1031         }
1032
1033         c = GCNEW(classinfo, 1); /*JOWENN: NEW*/
1034         /*c=NEW(classinfo);*/
1035         c->vmClass = 0;
1036         c->flags = 0;
1037         c->name = classname;
1038         c->packagename = NULL;
1039         c->cpcount = 0;
1040         c->cptags = NULL;
1041         c->cpinfos = NULL;
1042         c->super = NULL;
1043         c->sub = NULL;
1044         c->nextsub = NULL;
1045         c->interfacescount = 0;
1046         c->interfaces = NULL;
1047         c->fieldscount = 0;
1048         c->fields = NULL;
1049         c->methodscount = 0;
1050         c->methods = NULL;
1051         c->linked = false;
1052         c->loaded = false;
1053         c->index = 0;
1054         c->instancesize = 0;
1055         c->header.vftbl = NULL;
1056         c->innerclasscount = 0;
1057         c->innerclass = NULL;
1058         c->vftbl = NULL;
1059         c->initialized = false;
1060         c->initializing = false;
1061         c->classvftbl = false;
1062     c->classUsed = 0;
1063     c->impldBy = NULL;
1064         c->classloader = NULL;
1065         c->sourcefile = NULL;
1066         
1067         /* insert class into the hashtable */
1068         c->hashlink = class_hash.ptr[slot];
1069         class_hash.ptr[slot] = c;
1070
1071         /* update number of hashtable-entries */
1072         class_hash.entries++;
1073
1074         if (class_hash.entries > (class_hash.size * 2)) {
1075
1076                 /* reorganization of hashtable, average length of 
1077                    the external chains is approx. 2                */  
1078
1079                 u4 i;
1080                 classinfo *c;
1081                 hashtable newhash;  /* the new hashtable */
1082
1083                 /* create new hashtable, double the size */
1084                 init_hashtable(&newhash, class_hash.size * 2);
1085                 newhash.entries = class_hash.entries;
1086
1087                 /* transfer elements to new hashtable */
1088                 for (i = 0; i < class_hash.size; i++) {
1089                         c = (classinfo *) class_hash.ptr[i];
1090                         while (c) {
1091                                 classinfo *nextc = c->hashlink;
1092                                 u4 slot = (utf_hashkey(c->name->text, c->name->blength)) & (newhash.size - 1);
1093                                                 
1094                                 c->hashlink = newhash.ptr[slot];
1095                                 newhash.ptr[slot] = c;
1096
1097                                 c = nextc;
1098                         }
1099                 }
1100         
1101                 /* dispose old table */ 
1102                 MFREE(class_hash.ptr, void*, class_hash.size);
1103                 class_hash = newhash;
1104         }
1105
1106     /* Array classes need further initialization. */
1107     if (c->name->text[0] == '[') {
1108                 /* Array classes are not loaded from classfiles. */
1109                 c->loaded = true;
1110         class_new_array(c);
1111                 c->packagename = array_packagename;
1112
1113         } else {
1114                 /* Find the package name */
1115                 /* Classes in the unnamed package keep packagename == NULL. */
1116                 char *p = utf_end(c->name) - 1;
1117                 char *start = c->name->text;
1118                 for (;p > start; --p) {
1119                         if (*p == '.') {
1120                                 c->packagename = utf_new(start, p - start);
1121                                 break;
1122                         }
1123                 }
1124         }
1125 #if defined(USE_THREADS) && defined(NATIVE_THREADS)
1126         initObjectLock(&c->header);
1127 #endif
1128
1129         return c;
1130 }
1131
1132
1133 classinfo *class_new(utf *classname)
1134 {
1135     classinfo *c;
1136
1137 #if defined(USE_THREADS) && defined(NATIVE_THREADS)
1138     tables_lock();
1139 #endif
1140
1141     c = class_new_intern(classname);
1142
1143         /* we support eager class loading and linking on demand */
1144
1145         if (opt_eager) {
1146                 classinfo *tc;
1147                 classinfo *tmp;
1148
1149                 list_init(&unlinkedclasses, OFFSET(classinfo, listnode));
1150
1151                 if (!c->loaded) {
1152                         if (!class_load(c)) {
1153 #if defined(USE_THREADS) && defined(NATIVE_THREADS)
1154                                 tables_unlock();
1155 #endif
1156                                 return c;
1157                         }
1158                 }
1159
1160                 /* link all referenced classes */
1161
1162                 tc = list_first(&unlinkedclasses);
1163
1164                 while (tc) {
1165                         /* skip the current loaded/linked class */
1166                         if (tc != c) {
1167                                 if (!class_link(tc)) {
1168 #if defined(USE_THREADS) && defined(NATIVE_THREADS)
1169                                         tables_unlock();
1170 #endif
1171                                         return c;
1172                                 }
1173                         }
1174
1175                         /* we need a tmp variable here, because list_remove sets prev and
1176                            next to NULL */
1177                         tmp = list_next(&unlinkedclasses, tc);
1178                         list_remove(&unlinkedclasses, tc);
1179                         tc = tmp;
1180                 }
1181
1182                 if (!c->linked) {
1183                         if (!class_link(c)) {
1184 #if defined(USE_THREADS) && defined(NATIVE_THREADS)
1185                                 tables_unlock();
1186 #endif
1187                                 return c;
1188                         }
1189                 }
1190         }
1191
1192 #if defined(USE_THREADS) && defined(NATIVE_THREADS)
1193     tables_unlock();
1194 #endif
1195
1196     return c;
1197 }
1198
1199
1200 /******************** Function: class_get **************************************
1201
1202     searches for the class with the specified name in the classes hashtable
1203     if there is no such class NULL is returned
1204
1205 *******************************************************************************/
1206
1207 classinfo *class_get(utf *classname)
1208 {
1209         classinfo *c;  /* hashtable element */ 
1210         u4 key;        /* hashkey computed from classname */   
1211         u4 slot;       /* slot in hashtable */
1212         u2 i;  
1213
1214         key  = utf_hashkey(classname->text, classname->blength);
1215         slot = key & (class_hash.size-1);
1216         c    = class_hash.ptr[slot];
1217
1218         /* search external hash-chain */
1219         while (c) {
1220                 if (c->name->blength == classname->blength) {
1221                         /* compare classnames */
1222                         for (i = 0; i < classname->blength; i++) 
1223                                 if (classname->text[i] != c->name->text[i])
1224                                         goto nomatch;
1225
1226                         /* class found in hashtable */                          
1227                         return c;
1228                 }
1229                         
1230         nomatch:
1231                 c = c->hashlink;
1232         }
1233
1234         /* class not found */
1235         return NULL;
1236 }
1237
1238
1239 /* class_remove ****************************************************************
1240
1241    removes the class entry wth the specified name in the classes hashtable,
1242    furthermore the class' resources are freed
1243    if there is no such class false is returned
1244
1245 *******************************************************************************/
1246
1247 bool class_remove(classinfo *c)
1248 {
1249         classinfo *tc;  /* hashtable element */
1250         classinfo *pc;
1251         u4 key;         /* hashkey computed from classname */   
1252         u4 slot;        /* slot in hashtable */
1253         u2 i;  
1254
1255         key  = utf_hashkey(c->name->text, c->name->blength);
1256         slot = key & (class_hash.size - 1);
1257         tc   = class_hash.ptr[slot];
1258         pc   = NULL;
1259
1260         /* search external hash-chain */
1261         while (tc) {
1262                 if (tc->name->blength == c->name->blength) {
1263                         
1264                         /* compare classnames */
1265                         for (i = 0; i < c->name->blength; i++)
1266                                 if (tc->name->text[i] != c->name->text[i])
1267                                         goto nomatch;
1268
1269                         /* class found in hashtable */
1270                         if (!pc) {
1271                                 class_hash.ptr[slot] = tc->hashlink;
1272
1273                         } else {
1274                                 pc->hashlink = tc->hashlink;
1275                         }
1276
1277                         class_free(tc);
1278
1279                         return true;
1280                 }
1281                         
1282         nomatch:
1283                 pc = tc;
1284                 tc = tc->hashlink;
1285         }
1286
1287         /* class not found */
1288         return false;
1289 }
1290
1291
1292 /***************** Function: class_array_of ***********************************
1293
1294     Returns an array class with the given component class.
1295     The array class is dynamically created if neccessary.
1296
1297 *******************************************************************************/
1298
1299 classinfo *class_array_of(classinfo *component)
1300 {
1301     int namelen;
1302     char *namebuf;
1303         classinfo *c;
1304
1305     /* Assemble the array class name */
1306     namelen = component->name->blength;
1307     
1308     if (component->name->text[0] == '[') {
1309         /* the component is itself an array */
1310         namebuf = DMNEW(char, namelen + 1);
1311         namebuf[0] = '[';
1312         memcpy(namebuf + 1, component->name->text, namelen);
1313         namelen++;
1314
1315     } else {
1316         /* the component is a non-array class */
1317         namebuf = DMNEW(char, namelen + 3);
1318         namebuf[0] = '[';
1319         namebuf[1] = 'L';
1320         memcpy(namebuf + 2, component->name->text, namelen);
1321         namebuf[2 + namelen] = ';';
1322         namelen += 3;
1323     }
1324
1325         /* load this class ;-) and link it */
1326         c = class_new(utf_new(namebuf, namelen));
1327         c->loaded = 1;
1328         class_link(c);
1329
1330     return c;
1331 }
1332
1333 /*************** Function: class_multiarray_of ********************************
1334
1335     Returns an array class with the given dimension and element class.
1336     The array class is dynamically created if neccessary.
1337
1338 *******************************************************************************/
1339
1340 classinfo *class_multiarray_of(int dim, classinfo *element)
1341 {
1342     int namelen;
1343     char *namebuf;
1344
1345         if (dim < 1)
1346                 panic("Invalid array dimension requested");
1347
1348     /* Assemble the array class name */
1349     namelen = element->name->blength;
1350     
1351     if (element->name->text[0] == '[') {
1352         /* the element is itself an array */
1353         namebuf = DMNEW(char, namelen + dim);
1354         memcpy(namebuf + dim, element->name->text, namelen);
1355         namelen += dim;
1356     }
1357     else {
1358         /* the element is a non-array class */
1359         namebuf = DMNEW(char, namelen + 2 + dim);
1360         namebuf[dim] = 'L';
1361         memcpy(namebuf + dim + 1, element->name->text, namelen);
1362         namelen += (2 + dim);
1363         namebuf[namelen - 1] = ';';
1364     }
1365         memset(namebuf, '[', dim);
1366
1367     return class_new(utf_new(namebuf, namelen));
1368 }
1369
1370 /************************** function: utf_strlen ******************************
1371
1372     determine number of unicode characters in the utf string
1373
1374 *******************************************************************************/
1375
1376 u4 utf_strlen(utf *u) 
1377 {
1378     char *endpos;                   /* points behind utf string       */
1379     char *utf_ptr;                  /* current position in utf text   */
1380     u4 len = 0;                     /* number of unicode characters   */
1381
1382         if (!u) {
1383                 *exceptionptr = new_nullpointerexception();
1384                 return 0;
1385         }
1386
1387         endpos = utf_end(u);
1388         utf_ptr = u->text;
1389
1390     while (utf_ptr < endpos) {
1391                 len++;
1392                 /* next unicode character */
1393                 utf_nextu2(&utf_ptr);
1394     }
1395
1396     if (utf_ptr != endpos)
1397         /* string ended abruptly */
1398                 panic("illegal utf string"); 
1399
1400     return len;
1401 }
1402
1403
1404 /*
1405  * These are local overrides for various environment variables in Emacs.
1406  * Please do not remove this and leave it at the end of the file, where
1407  * Emacs will automagically detect them.
1408  * ---------------------------------------------------------------------
1409  * Local variables:
1410  * mode: c
1411  * indent-tabs-mode: t
1412  * c-basic-offset: 4
1413  * tab-width: 4
1414  * End:
1415  */