Save.
[cacao.git] / tables.c
1 /* tables.c - 
2
3    Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003
4    R. Grafl, A. Krall, C. Kruegel, C. Oates, R. Obermaisser,
5    M. Probst, S. Ring, E. Steiner, C. Thalinger, D. Thuernbeck,
6    P. Tomsich, J. Wenninger
7
8    This file is part of CACAO.
9
10    This program is free software; you can redistribute it and/or
11    modify it under the terms of the GNU General Public License as
12    published by the Free Software Foundation; either version 2, or (at
13    your option) any later version.
14
15    This program is distributed in the hope that it will be useful, but
16    WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18    General Public License for more details.
19
20    You should have received a copy of the GNU General Public License
21    along with this program; if not, write to the Free Software
22    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
23    02111-1307, USA.
24
25    Contact: cacao@complang.tuwien.ac.at
26
27    Authors: Reinhard Grafl
28
29    Changes: Mark Probst
30             Andreas Krall
31
32    Contains support functions for:
33        - Reading of Java class files
34        - Unicode symbols
35        - the heap
36        - additional support functions
37
38    $Id: tables.c 1372 2004-08-01 21:56:10Z stefan $
39
40 */
41
42 #include "global.h"
43
44 #include <string.h>
45 #include <stdlib.h>
46 #include <assert.h>
47 #include <sys/types.h>
48 #include <sys/mman.h>
49 #include <unistd.h>
50 #include "types.h"
51 #include "options.h"
52 #include "tables.h"
53 #include "loader.h"
54 #include "asmpart.h"
55 #include "statistics.h"
56 #include "threads/thread.h"
57 #include "threads/locks.h"
58 #include "toolbox/logging.h"
59 #include "toolbox/memory.h"
60
61
62 hashtable utf_hash;     /* hashtable for utf8-symbols */
63 hashtable string_hash;  /* hashtable for javastrings  */
64 hashtable class_hash;   /* hashtable for classes      */
65
66 list unlinkedclasses;   /* this is only used for eager class loading          */
67
68
69 /******************************************************************************
70  *********************** hashtable functions **********************************
71  ******************************************************************************/
72
73 /* hashsize must be power of 2 */
74
75 #define UTF_HASHSTART   16384   /* initial size of utf-hash */    
76 #define HASHSTART        2048   /* initial size of javastring and class-hash */
77
78
79 /******************** function: init_hashtable ******************************
80
81     Initializes a hashtable structure and allocates memory.
82     The parameter size specifies the initial size of the hashtable.
83         
84 *****************************************************************************/
85
86 void init_hashtable(hashtable *hash, u4 size)
87 {
88         u4 i;
89
90         hash->entries = 0;
91         hash->size    = size;
92         hash->ptr     = MNEW(void*, size);
93
94         /* clear table */
95         for (i = 0; i < size; i++) hash->ptr[i] = NULL;
96 }
97
98
99 /*********************** function: tables_init  *****************************
100
101     creates hashtables for symboltables 
102         (called once at startup)                         
103         
104 *****************************************************************************/
105
106 void tables_init()
107 {
108         init_hashtable(&utf_hash,    UTF_HASHSTART);  /* hashtable for utf8-symbols */
109         init_hashtable(&string_hash, HASHSTART);      /* hashtable for javastrings */
110         init_hashtable(&class_hash,  HASHSTART);      /* hashtable for classes */ 
111
112 /*      if (opt_eager) */
113 /*              list_init(&unlinkedclasses, OFFSET(classinfo, listnode)); */
114
115 #if defined(STATISTICS)
116         if (opt_stat)
117                 count_utf_len += sizeof(utf*) * utf_hash.size;
118 #endif
119 }
120
121
122 /********************** function: tables_close ******************************
123
124         free memory for hashtables                    
125         
126 *****************************************************************************/
127
128 void tables_close()
129 {
130         utf *u = NULL;
131         literalstring *s;
132         u4 i;
133         
134         /* dispose utf symbols */
135         for (i = 0; i < utf_hash.size; i++) {
136                 u = utf_hash.ptr[i];
137                 while (u) {
138                         /* process elements in external hash chain */
139                         utf *nextu = u->hashlink;
140                         MFREE(u->text, u1, u->blength);
141                         FREE(u, utf);
142                         u = nextu;
143                 }       
144         }
145
146         /* dispose javastrings */
147         for (i = 0; i < string_hash.size; i++) {
148                 s = string_hash.ptr[i];
149                 while (u) {
150                         /* process elements in external hash chain */
151                         literalstring *nexts = s->hashlink;
152                         literalstring_free(s->string);
153                         FREE(s, literalstring);
154                         s = nexts;
155                 }       
156         }
157
158         /* dispose hashtable structures */
159         MFREE(utf_hash.ptr,    void*, utf_hash.size);
160         MFREE(string_hash.ptr, void*, string_hash.size);
161         MFREE(class_hash.ptr,  void*, class_hash.size);
162 }
163
164
165 /********************* function: utf_display *********************************
166
167         write utf symbol to stdout (debugging purposes)
168
169 ******************************************************************************/
170
171 void utf_display(utf *u)
172 {
173     char *endpos  = utf_end(u);  /* points behind utf string       */
174     char *utf_ptr = u->text;     /* current position in utf text   */
175
176         if (!u)
177                 return;
178
179     while (utf_ptr < endpos) {
180                 /* read next unicode character */                
181                 u2 c = utf_nextu2(&utf_ptr);
182                 if (c >= 32 && c <= 127) printf("%c", c);
183                 else printf("?");
184         }
185
186         fflush(stdout);
187 }
188
189
190 /********************* function: utf_display *********************************
191
192         write utf symbol to stdout (debugging purposes)
193
194 ******************************************************************************/
195
196 void utf_display_classname(utf *u)
197 {
198     char *endpos  = utf_end(u);  /* points behind utf string       */
199     char *utf_ptr = u->text;     /* current position in utf text   */
200
201         if (!u)
202                 return;
203
204     while (utf_ptr < endpos) {
205                 /* read next unicode character */                
206                 u2 c = utf_nextu2(&utf_ptr);
207                 if (c == '/') c = '.';
208                 if (c >= 32 && c <= 127) printf("%c", c);
209                 else printf("?");
210         }
211
212         fflush(stdout);
213 }
214
215
216 /************************* function: log_utf *********************************
217
218         log utf symbol
219
220 ******************************************************************************/
221
222 void log_utf(utf *u)
223 {
224         char buf[MAXLOGTEXT];
225         utf_sprint(buf, u);
226         dolog("%s", buf);
227 }
228
229
230 /********************** function: log_plain_utf ******************************
231
232         log utf symbol (without printing "LOG: " and newline)
233
234 ******************************************************************************/
235
236 void log_plain_utf(utf *u)
237 {
238         char buf[MAXLOGTEXT];
239         utf_sprint(buf, u);
240         dolog_plain("%s", buf);
241 }
242
243
244 /************************ function: utf_sprint *******************************
245         
246     write utf symbol into c-string (debugging purposes)                                          
247
248 ******************************************************************************/
249
250 void utf_sprint(char *buffer, utf *u)
251 {
252     char *endpos  = utf_end(u);  /* points behind utf string       */
253     char *utf_ptr = u->text;     /* current position in utf text   */ 
254     u2 pos = 0;                  /* position in c-string           */
255
256     while (utf_ptr < endpos) 
257                 /* copy next unicode character */       
258                 buffer[pos++] = utf_nextu2(&utf_ptr);
259
260     /* terminate string */
261     buffer[pos] = '\0';
262 }
263
264
265 /************************ function: utf_sprint_classname *********************
266         
267     write utf symbol into c-string (debugging purposes)
268
269 ******************************************************************************/ 
270
271 void utf_sprint_classname(char *buffer, utf *u)
272 {
273     char *endpos  = utf_end(u);  /* points behind utf string       */
274     char *utf_ptr = u->text;     /* current position in utf text   */ 
275     u2 pos = 0;                  /* position in c-string           */
276
277     while (utf_ptr < endpos) {
278                 /* copy next unicode character */       
279                 u2 c = utf_nextu2(&utf_ptr);
280                 if (c == '/') c = '.';
281                 buffer[pos++] = c;
282         }
283
284     /* terminate string */
285     buffer[pos] = '\0';
286 }
287
288
289 /********************* Funktion: utf_fprint **********************************
290         
291     write utf symbol into file          
292
293 ******************************************************************************/
294
295 void utf_fprint(FILE *file, utf *u)
296 {
297     char *endpos  = utf_end(u);  /* points behind utf string       */
298     char *utf_ptr = u->text;     /* current position in utf text   */ 
299
300     if (!u)
301                 return;
302
303     while (utf_ptr < endpos) { 
304                 /* read next unicode character */                
305                 u2 c = utf_nextu2(&utf_ptr);                            
306
307                 if (c >= 32 && c <= 127) fprintf(file, "%c", c);
308                 else fprintf(file, "?");
309         }
310 }
311
312
313 /********************* Funktion: utf_fprint **********************************
314         
315     write utf symbol into file          
316
317 ******************************************************************************/
318
319 void utf_fprint_classname(FILE *file, utf *u)
320 {
321     char *endpos  = utf_end(u);  /* points behind utf string       */
322     char *utf_ptr = u->text;     /* current position in utf text   */ 
323
324     if (!u)
325                 return;
326
327     while (utf_ptr < endpos) { 
328                 /* read next unicode character */                
329                 u2 c = utf_nextu2(&utf_ptr);                            
330                 if (c == '/') c = '.';
331
332                 if (c >= 32 && c <= 127) fprintf(file, "%c", c);
333                 else fprintf(file, "?");
334         }
335 }
336
337
338 /****************** internal function: utf_hashkey ***************************
339
340         The hashkey is computed from the utf-text by using up to 8 characters.
341         For utf-symbols longer than 15 characters 3 characters are taken from
342         the beginning and the end, 2 characters are taken from the middle.
343
344 ******************************************************************************/ 
345
346 #define nbs(val) ((u4) *(++text) << val) /* get next byte, left shift by val  */
347 #define fbs(val) ((u4) *(  text) << val) /* get first byte, left shift by val */
348
349 static u4 utf_hashkey(char *text, u4 length)
350 {
351         char *start_pos = text; /* pointer to utf text */
352         u4 a;
353
354         switch (length) {               
355                 
356         case 0: /* empty string */
357                 return 0;
358
359         case 1: return fbs(0);
360         case 2: return fbs(0) ^ nbs(3);
361         case 3: return fbs(0) ^ nbs(3) ^ nbs(5);
362         case 4: return fbs(0) ^ nbs(2) ^ nbs(4) ^ nbs(6);
363         case 5: return fbs(0) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(6);
364         case 6: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(5) ^ nbs(6);
365         case 7: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(5) ^ nbs(6);
366         case 8: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(5) ^ nbs(6) ^ nbs(7);
367
368         case 9:
369                 a = fbs(0);
370                 a ^= nbs(1);
371                 a ^= nbs(2);
372                 text++;
373                 return a ^ nbs(4) ^ nbs(5) ^ nbs(6) ^ nbs(7) ^ nbs(8);
374
375         case 10:
376                 a = fbs(0);
377                 text++;
378                 a ^= nbs(2);
379                 a ^= nbs(3);
380                 a ^= nbs(4);
381                 text++;
382                 return a ^ nbs(6) ^ nbs(7) ^ nbs(8) ^ nbs(9);
383
384         case 11:
385                 a = fbs(0);
386                 text++;
387                 a ^= nbs(2);
388                 a ^= nbs(3);
389                 a ^= nbs(4);
390                 text++;
391                 return a ^ nbs(6) ^ nbs(7) ^ nbs(8) ^ nbs(9) ^ nbs(10);
392
393         case 12:
394                 a = fbs(0);
395                 text += 2;
396                 a ^= nbs(2);
397                 a ^= nbs(3);
398                 text++;
399                 a ^= nbs(5);
400                 a ^= nbs(6);
401                 a ^= nbs(7);
402                 text++;
403                 return a ^ nbs(9) ^ nbs(10);
404
405         case 13:
406                 a = fbs(0);
407                 a ^= nbs(1);
408                 text++;
409                 a ^= nbs(3);
410                 a ^= nbs(4);
411                 text += 2;      
412                 a ^= nbs(7);
413                 a ^= nbs(8);
414                 text += 2;
415                 return a ^ nbs(9) ^ nbs(10);
416
417         case 14:
418                 a = fbs(0);
419                 text += 2;      
420                 a ^= nbs(3);
421                 a ^= nbs(4);
422                 text += 2;      
423                 a ^= nbs(7);
424                 a ^= nbs(8);
425                 text += 2;
426                 return a ^ nbs(9) ^ nbs(10) ^ nbs(11);
427
428         case 15:
429                 a = fbs(0);
430                 text += 2;      
431                 a ^= nbs(3);
432                 a ^= nbs(4);
433                 text += 2;      
434                 a ^= nbs(7);
435                 a ^= nbs(8);
436                 text += 2;
437                 return a ^ nbs(9) ^ nbs(10) ^ nbs(11);
438
439         default:  /* 3 characters from beginning */
440                 a = fbs(0);
441                 text += 2;
442                 a ^= nbs(3);
443                 a ^= nbs(4);
444
445                 /* 2 characters from middle */
446                 text = start_pos + (length / 2);
447                 a ^= fbs(5);
448                 text += 2;
449                 a ^= nbs(6);    
450
451                 /* 3 characters from end */
452                 text = start_pos + length - 4;
453
454                 a ^= fbs(7);
455                 text++;
456
457                 return a ^ nbs(10) ^ nbs(11);
458     }
459 }
460
461
462 /*************************** function: utf_hashkey ***************************
463
464     compute the hashkey of a unicode string
465
466 ******************************************************************************/ 
467
468 u4 unicode_hashkey(u2 *text, u2 len)
469 {
470         return utf_hashkey((char*) text, len);
471 }
472
473
474 /************************ function: utf_new **********************************
475
476         Creates a new utf-symbol, the text of the symbol is passed as a 
477         u1-array. The function searches the utf-hashtable for a utf-symbol 
478         with this text. On success the element returned, otherwise a new 
479         hashtable element is created.
480
481         If the number of entries in the hashtable exceeds twice the size of the
482         hashtable slots a reorganization of the hashtable is done and the utf 
483         symbols are copied to a new hashtable with doubled size.
484
485 ******************************************************************************/
486
487 utf *utf_new_intern(char *text, u2 length)
488 {
489         u4 key;            /* hashkey computed from utf-text */
490         u4 slot;           /* slot in hashtable */
491         utf *u;            /* hashtable element */
492         u2 i;
493
494 #ifdef STATISTICS
495         if (opt_stat)
496                 count_utf_new++;
497 #endif
498
499         key  = utf_hashkey(text, length);
500         slot = key & (utf_hash.size-1);
501         u    = utf_hash.ptr[slot];
502
503         /* search external hash chain for utf-symbol */
504         while (u) {
505                 if (u->blength == length) {
506
507                         /* compare text of hashtable elements */
508                         for (i = 0; i < length; i++)
509                                 if (text[i] != u->text[i]) goto nomatch;
510                         
511 #ifdef STATISTICS
512                         if (opt_stat)
513                                 count_utf_new_found++;
514 #endif
515 /*                      log_text("symbol found in hash table");*/
516                         /* symbol found in hashtable */
517 /*                                      utf_display(u);
518                                         {
519                                                 utf blup;
520                                                 blup.blength=length;
521                                                 blup.text=text;
522                                                 utf_display(&blup);
523                                         }*/
524                         return u;
525                 }
526         nomatch:
527                 u = u->hashlink; /* next element in external chain */
528         }
529
530 #ifdef STATISTICS
531         if (opt_stat)
532                 count_utf_len += sizeof(utf) + length;
533 #endif
534
535         /* location in hashtable found, create new utf element */
536         u = NEW(utf);
537         u->blength  = length;               /* length in bytes of utfstring       */
538         u->hashlink = utf_hash.ptr[slot];   /* link in external hashchain         */
539         u->text     = mem_alloc(length + 1);/* allocate memory for utf-text       */
540         memcpy(u->text, text, length);      /* copy utf-text                      */
541         u->text[length] = '\0';
542         utf_hash.ptr[slot] = u;             /* insert symbol into table           */
543
544         utf_hash.entries++;                 /* update number of entries           */
545
546         if (utf_hash.entries > (utf_hash.size * 2)) {
547
548         /* reorganization of hashtable, average length of 
549            the external chains is approx. 2                */  
550
551                 u4 i;
552                 utf *u;
553                 hashtable newhash; /* the new hashtable */
554
555                 /* create new hashtable, double the size */
556                 init_hashtable(&newhash, utf_hash.size * 2);
557                 newhash.entries = utf_hash.entries;
558
559 #ifdef STATISTICS
560                 if (opt_stat)
561                         count_utf_len += sizeof(utf*) * utf_hash.size;
562 #endif
563
564                 /* transfer elements to new hashtable */
565                 for (i = 0; i < utf_hash.size; i++) {
566                         u = (utf *) utf_hash.ptr[i];
567                         while (u) {
568                                 utf *nextu = u->hashlink;
569                                 u4 slot = utf_hashkey(u->text, u->blength) & (newhash.size - 1);
570                                                 
571                                 u->hashlink = (utf *) newhash.ptr[slot];
572                                 newhash.ptr[slot] = u;
573
574                                 /* follow link in external hash chain */
575                                 u = nextu;
576                         }
577                 }
578         
579                 /* dispose old table */
580                 MFREE(utf_hash.ptr, void*, utf_hash.size);
581                 utf_hash = newhash;
582         }
583
584         return u;
585 }
586
587
588 utf *utf_new(char *text, u2 length)
589 {
590     utf *r;
591
592 #if defined(USE_THREADS) && defined(NATIVE_THREADS)
593     tables_lock();
594 #endif
595
596     r = utf_new_intern(text, length);
597
598 #if defined(USE_THREADS) && defined(NATIVE_THREADS)
599     tables_unlock();
600 #endif
601
602     return r;
603 }
604
605
606 /********************* function: utf_new_char ********************************
607
608     creates a new utf symbol, the text for this symbol is passed
609     as a c-string ( = char* )
610
611 ******************************************************************************/
612
613 utf *utf_new_char(char *text)
614 {
615         return utf_new(text, strlen(text));
616 }
617
618
619 /********************* function: utf_new_char ********************************
620
621     creates a new utf symbol, the text for this symbol is passed
622     as a c-string ( = char* )
623     "." characters are going to be replaced by "/". since the above function is
624     used often, this is a separte function, instead of an if
625
626 ******************************************************************************/
627
628 utf *utf_new_char_classname(char *text)
629 {
630         if (strchr(text, '.')) {
631                 char *txt = strdup(text);
632                 char *end = txt + strlen(txt);
633                 char *c;
634                 utf *tmpRes;
635                 for (c = txt; c < end; c++)
636                         if (*c == '.') *c = '/';
637                 tmpRes = utf_new(txt, strlen(txt));
638                 free(txt);
639                 return tmpRes;
640
641         } else
642                 return utf_new(text, strlen(text));
643 }
644
645
646 /************************** Funktion: utf_show ******************************
647
648     writes the utf symbols in the utfhash to stdout and
649     displays the number of external hash chains grouped 
650     according to the chainlength
651     (debugging purposes)
652
653 *****************************************************************************/
654
655 void utf_show()
656 {
657
658 #define CHAIN_LIMIT 20               /* limit for seperated enumeration */
659
660         u4 chain_count[CHAIN_LIMIT]; /* numbers of chains */
661         u4 max_chainlength = 0;      /* maximum length of the chains */
662         u4 sum_chainlength = 0;      /* sum of the chainlengths */
663         u4 beyond_limit = 0;         /* number of utf-symbols in chains with length>=CHAIN_LIMIT-1 */
664         u4 i;
665
666         printf ("UTF-HASH:\n");
667
668         /* show element of utf-hashtable */
669         for (i=0; i<utf_hash.size; i++) {
670                 utf *u = utf_hash.ptr[i];
671                 if (u) {
672                         printf ("SLOT %d: ", (int) i);
673                         while (u) {
674                                 printf ("'");
675                                 utf_display (u);
676                                 printf ("' ");
677                                 u = u->hashlink;
678                         }       
679                         printf ("\n");
680                 }
681                 
682         }
683
684         printf ("UTF-HASH: %d slots for %d entries\n", 
685                         (int) utf_hash.size, (int) utf_hash.entries );
686
687
688         if (utf_hash.entries == 0)
689                 return;
690
691         printf("chains:\n  chainlength    number of chains    %% of utfstrings\n");
692
693         for (i=0;i<CHAIN_LIMIT;i++)
694                 chain_count[i]=0;
695
696         /* count numbers of hashchains according to their length */
697         for (i=0; i<utf_hash.size; i++) {
698                   
699                 utf *u = (utf*) utf_hash.ptr[i];
700                 u4 chain_length = 0;
701
702                 /* determine chainlength */
703                 while (u) {
704                         u = u->hashlink;
705                         chain_length++;
706                 }
707
708                 /* update sum of all chainlengths */
709                 sum_chainlength+=chain_length;
710
711                 /* determine the maximum length of the chains */
712                 if (chain_length>max_chainlength)
713                         max_chainlength = chain_length;
714
715                 /* update number of utf-symbols in chains with length>=CHAIN_LIMIT-1 */
716                 if (chain_length>=CHAIN_LIMIT) {
717                         beyond_limit+=chain_length;
718                         chain_length=CHAIN_LIMIT-1;
719                 }
720
721                 /* update number of hashchains of current length */
722                 chain_count[chain_length]++;
723         }
724
725         /* display results */  
726         for (i=1;i<CHAIN_LIMIT-1;i++) 
727                 printf("       %2d %17d %18.2f%%\n",i,chain_count[i],(((float) chain_count[i]*i*100)/utf_hash.entries));
728           
729         printf("     >=%2d %17d %18.2f%%\n",CHAIN_LIMIT-1,chain_count[CHAIN_LIMIT-1],((float) beyond_limit*100)/utf_hash.entries);
730
731
732         printf("max. chainlength:%5d\n",max_chainlength);
733
734         /* avg. chainlength = sum of chainlengths / number of chains */
735         printf("avg. chainlength:%5.2f\n",(float) sum_chainlength / (utf_hash.size-chain_count[0]));
736 }
737
738 /******************************************************************************
739 *********************** Misc support functions ********************************
740 ******************************************************************************/
741
742
743 /******************** Function: desc_to_type **********************************
744    
745         Determines the corresponding Java base data type for a given type
746         descriptor.
747         
748 ******************************************************************************/
749
750 u2 desc_to_type(utf *descriptor)
751 {
752         char *utf_ptr = descriptor->text;  /* current position in utf text */
753         char logtext[MAXLOGTEXT];
754
755         if (descriptor->blength < 1) panic("Type-Descriptor is empty string");
756         
757         switch (*utf_ptr++) {
758         case 'B': 
759         case 'C':
760         case 'I':
761         case 'S':  
762         case 'Z':  return TYPE_INT;
763         case 'D':  return TYPE_DOUBLE;
764         case 'F':  return TYPE_FLOAT;
765         case 'J':  return TYPE_LONG;
766         case 'L':
767         case '[':  return TYPE_ADDRESS;
768         }
769                         
770         sprintf(logtext, "Invalid Type-Descriptor: ");
771         utf_sprint(logtext+strlen(logtext), descriptor);
772         error("%s",logtext);
773
774         return 0;
775 }
776
777
778 /********************** Function: desc_typesize *******************************
779
780         Calculates the lenght in bytes needed for a data element of the type given
781         by its type descriptor.
782         
783 ******************************************************************************/
784
785 u2 desc_typesize(utf *descriptor)
786 {
787         switch (desc_to_type(descriptor)) {
788         case TYPE_INT:     return 4;
789         case TYPE_LONG:    return 8;
790         case TYPE_FLOAT:   return 4;
791         case TYPE_DOUBLE:  return 8;
792         case TYPE_ADDRESS: return sizeof(voidptr);
793         default:           return 0;
794         }
795 }
796
797
798 /********************** function: utf_nextu2 *********************************
799
800     read the next unicode character from the utf string and
801     increment the utf-string pointer accordingly
802
803 ******************************************************************************/
804
805 u2 utf_nextu2(char **utf_ptr) 
806 {
807     /* uncompressed unicode character */
808     u2 unicode_char = 0;
809     /* current position in utf text */  
810     unsigned char *utf = (unsigned char *) (*utf_ptr);
811     /* bytes representing the unicode character */
812     unsigned char ch1, ch2, ch3;
813     /* number of bytes used to represent the unicode character */
814     int len = 0;
815         
816     switch ((ch1 = utf[0]) >> 4) {
817         default: /* 1 byte */
818                 (*utf_ptr)++;
819                 return (u2) ch1;
820         case 0xC: 
821         case 0xD: /* 2 bytes */
822                 if (((ch2 = utf[1]) & 0xC0) == 0x80) {
823                         unsigned char high = ch1 & 0x1F;
824                         unsigned char low  = ch2 & 0x3F;
825                         unicode_char = (high << 6) + low;
826                         len = 2;
827                 }
828                 break;
829
830         case 0xE: /* 2 or 3 bytes */
831                 if (((ch2 = utf[1]) & 0xC0) == 0x80) {
832                         if (((ch3 = utf[2]) & 0xC0) == 0x80) {
833                                 unsigned char low  = ch3 & 0x3f;
834                                 unsigned char mid  = ch2 & 0x3f;
835                                 unsigned char high = ch1 & 0x0f;
836                                 unicode_char = (((high << 6) + mid) << 6) + low;
837                                 len = 3;
838                         } else
839                                 len = 2;                                           
840                 }
841                 break;
842     }
843
844     /* update position in utf-text */
845     *utf_ptr = (char *) (utf + len);
846     return unicode_char;
847 }
848
849
850 /********************* function: is_valid_utf ********************************
851
852     return true if the given string is a valid UTF-8 string
853
854     utf_ptr...points to first character
855     end_pos...points after last character
856
857 ******************************************************************************/
858
859 static unsigned long min_codepoint[6] = {0,1L<<7,1L<<11,1L<<16,1L<<21,1L<<26};
860
861 bool
862 is_valid_utf(char *utf_ptr,char *end_pos)
863 {
864         int bytes;
865         int len,i;
866         char c;
867         unsigned long v;
868
869         if (end_pos < utf_ptr) return false;
870         bytes = end_pos - utf_ptr;
871         while (bytes--) {
872                 c = *utf_ptr++;
873                 /*dolog("%c %02x",c,c);*/
874                 if (!c) return false;                     /* 0x00 is not allowed */
875                 if ((c & 0x80) == 0) continue;            /* ASCII */
876
877                 if      ((c & 0xe0) == 0xc0) len = 1;     /* 110x xxxx */
878                 else if ((c & 0xf0) == 0xe0) len = 2;     /* 1110 xxxx */
879                 else if ((c & 0xf8) == 0xf0) len = 3;     /* 1111 0xxx */
880                 else if ((c & 0xfc) == 0xf8) len = 4;     /* 1111 10xx */
881                 else if ((c & 0xfe) == 0xfc) len = 5;     /* 1111 110x */
882                 else return false;                        /* invalid leading byte */
883
884                 if (len > 2) return false;                /* Java limitation */
885
886                 v = (unsigned long)c & (0x3f >> len);
887                 
888                 if ((bytes -= len) < 0) return false;     /* missing bytes */
889
890                 for (i = len; i--; ) {
891                         c = *utf_ptr++;
892                         /*dolog("    %c %02x",c,c);*/
893                         if ((c & 0xc0) != 0x80)               /* 10xx xxxx */
894                                 return false;
895                         v = (v<<6) | (c & 0x3f);
896                 }
897
898                 /*              dolog("v=%d",v);*/
899
900                 if (v == 0) {
901                         if (len != 1) return false;           /* Java special */
902                 }
903                 else {
904                         /* Sun Java seems to allow overlong UTF-8 encodings */
905                         
906                         if (v < min_codepoint[len]) { /* overlong UTF-8 */
907                                 if (!opt_liberalutf)
908                                         fprintf(stderr,"WARNING: Overlong UTF-8 sequence found.\n");
909                                 /* XXX change this to panic? */
910                         }
911                 }
912
913                 /* surrogates in UTF-8 seem to be allowed in Java classfiles */
914                 /* if (v >= 0xd800 && v <= 0xdfff) return false; */ /* surrogates */
915
916                 /* even these seem to be allowed */
917                 /* if (v == 0xfffe || v == 0xffff) return false; */ /* invalid codepoints */
918         }
919
920         return true;
921 }
922  
923 /********************* function: is_valid_name *******************************
924
925     return true if the given string may be used as a class/field/method name.
926     (Currently this only disallows empty strings and control characters.)
927
928     NOTE: The string is assumed to have passed is_valid_utf!
929
930     utf_ptr...points to first character
931     end_pos...points after last character
932
933 ******************************************************************************/
934
935 bool
936 is_valid_name(char *utf_ptr,char *end_pos)
937 {
938         if (end_pos <= utf_ptr) return false; /* disallow empty names */
939
940         while (utf_ptr < end_pos) {
941                 unsigned char c = *utf_ptr++;
942
943                 if (c < 0x20) return false; /* disallow control characters */
944                 if (c == 0xc0 && (unsigned char)*utf_ptr == 0x80) return false; /* disallow zero */
945         }
946         return true;
947 }
948
949 bool
950 is_valid_name_utf(utf *u)
951 {
952         return is_valid_name(u->text,utf_end(u));
953 }
954
955 /******************** Function: class_new **************************************
956
957     searches for the class with the specified name in the classes hashtable,
958     if there is no such class a new classinfo structure is created and inserted
959     into the list of classes to be loaded
960
961 *******************************************************************************/
962
963 classinfo *class_new_intern(utf *classname)
964 {
965         classinfo *c;     /* hashtable element */
966         u4 key;           /* hashkey computed from classname */
967         u4 slot;          /* slot in hashtable */
968         u2 i;
969
970         key  = utf_hashkey(classname->text, classname->blength);
971         slot = key & (class_hash.size - 1);
972         c    = class_hash.ptr[slot];
973
974         /* search external hash chain for the class */
975         while (c) {
976                 if (c->name->blength == classname->blength) {
977                         for (i = 0; i < classname->blength; i++)
978                                 if (classname->text[i] != c->name->text[i]) goto nomatch;
979                                                 
980                         /* class found in hashtable */
981                         return c;
982                 }
983                         
984         nomatch:
985                 c = c->hashlink; /* next element in external chain */
986         }
987
988         /* location in hashtable found, create new classinfo structure */
989
990 #if defined(STATISTICS)
991         if (opt_stat)
992                 count_class_infos += sizeof(classinfo);
993 #endif
994
995         if (initverbose) {
996                 char logtext[MAXLOGTEXT];
997                 sprintf(logtext, "Creating class: ");
998                 utf_sprint_classname(logtext + strlen(logtext), classname);
999                 log_text(logtext);
1000         }
1001
1002         c = GCNEW(classinfo, 1); /*JOWENN: NEW*/
1003         /*c=NEW(classinfo);*/
1004         c->vmClass = 0;
1005         c->flags = 0;
1006         c->name = classname;
1007         c->packagename = NULL;
1008         c->cpcount = 0;
1009         c->cptags = NULL;
1010         c->cpinfos = NULL;
1011         c->super = NULL;
1012         c->sub = NULL;
1013         c->nextsub = NULL;
1014         c->interfacescount = 0;
1015         c->interfaces = NULL;
1016         c->fieldscount = 0;
1017         c->fields = NULL;
1018         c->methodscount = 0;
1019         c->methods = NULL;
1020         c->linked = false;
1021         c->loaded = false;
1022         c->index = 0;
1023         c->instancesize = 0;
1024         c->header.vftbl = NULL;
1025         c->innerclasscount = 0;
1026         c->innerclass = NULL;
1027         c->vftbl = NULL;
1028         c->initialized = false;
1029         c->initializing = false;
1030         c->classvftbl = false;
1031     c->classUsed = 0;
1032     c->impldBy = NULL;
1033         c->classloader = NULL;
1034         c->sourcefile = NULL;
1035         
1036         /* insert class into the hashtable */
1037         c->hashlink = class_hash.ptr[slot];
1038         class_hash.ptr[slot] = c;
1039
1040         /* update number of hashtable-entries */
1041         class_hash.entries++;
1042
1043         if (class_hash.entries > (class_hash.size * 2)) {
1044
1045                 /* reorganization of hashtable, average length of 
1046                    the external chains is approx. 2                */  
1047
1048                 u4 i;
1049                 classinfo *c;
1050                 hashtable newhash;  /* the new hashtable */
1051
1052                 /* create new hashtable, double the size */
1053                 init_hashtable(&newhash, class_hash.size * 2);
1054                 newhash.entries = class_hash.entries;
1055
1056                 /* transfer elements to new hashtable */
1057                 for (i = 0; i < class_hash.size; i++) {
1058                         c = (classinfo *) class_hash.ptr[i];
1059                         while (c) {
1060                                 classinfo *nextc = c->hashlink;
1061                                 u4 slot = (utf_hashkey(c->name->text, c->name->blength)) & (newhash.size - 1);
1062                                                 
1063                                 c->hashlink = newhash.ptr[slot];
1064                                 newhash.ptr[slot] = c;
1065
1066                                 c = nextc;
1067                         }
1068                 }
1069         
1070                 /* dispose old table */ 
1071                 MFREE(class_hash.ptr, void*, class_hash.size);
1072                 class_hash = newhash;
1073         }
1074
1075     /* Array classes need further initialization. */
1076     if (c->name->text[0] == '[') {
1077                 /* Array classes are not loaded from classfiles. */
1078                 c->loaded = true;
1079         class_new_array(c);
1080                 c->packagename = array_packagename;
1081
1082         } else {
1083                 /* Find the package name */
1084                 /* Classes in the unnamed package keep packagename == NULL. */
1085                 char *p = utf_end(c->name) - 1;
1086                 char *start = c->name->text;
1087                 for (;p > start; --p) {
1088                         if (*p == '.') {
1089                                 c->packagename = utf_new(start, p - start);
1090                                 break;
1091                         }
1092                 }
1093         }
1094 #if defined(USE_THREADS) && defined(NATIVE_THREADS)
1095         initObjectLock(&c->header);
1096 #endif
1097
1098         return c;
1099 }
1100
1101
1102 classinfo *class_new(utf *classname)
1103 {
1104     classinfo *c;
1105
1106 #if defined(USE_THREADS) && defined(NATIVE_THREADS)
1107     tables_lock();
1108 #endif
1109
1110     c = class_new_intern(classname);
1111
1112         /* we support eager class loading and linking on demand */
1113
1114         if (opt_eager) {
1115                 classinfo *tc;
1116                 classinfo *tmp;
1117
1118                 list_init(&unlinkedclasses, OFFSET(classinfo, listnode));
1119
1120                 if (!c->loaded) {
1121                         if (!class_load(c)) {
1122 #if defined(USE_THREADS) && defined(NATIVE_THREADS)
1123                                 tables_unlock();
1124 #endif
1125                                 return c;
1126                         }
1127                 }
1128
1129                 /* link all referenced classes */
1130
1131                 tc = list_first(&unlinkedclasses);
1132
1133                 while (tc) {
1134                         /* skip the current loaded/linked class */
1135                         if (tc != c) {
1136                                 if (!class_link(tc)) {
1137 #if defined(USE_THREADS) && defined(NATIVE_THREADS)
1138                                         tables_unlock();
1139 #endif
1140                                         return c;
1141                                 }
1142                         }
1143
1144                         /* we need a tmp variable here, because list_remove sets prev and
1145                            next to NULL */
1146                         tmp = list_next(&unlinkedclasses, tc);
1147                         list_remove(&unlinkedclasses, tc);
1148                         tc = tmp;
1149                 }
1150
1151                 if (!c->linked) {
1152                         if (!class_link(c)) {
1153 #if defined(USE_THREADS) && defined(NATIVE_THREADS)
1154                                 tables_unlock();
1155 #endif
1156                                 return c;
1157                         }
1158                 }
1159         }
1160
1161 #if defined(USE_THREADS) && defined(NATIVE_THREADS)
1162     tables_unlock();
1163 #endif
1164
1165     return c;
1166 }
1167
1168
1169 /******************** Function: class_get **************************************
1170
1171     searches for the class with the specified name in the classes hashtable
1172     if there is no such class NULL is returned
1173
1174 *******************************************************************************/
1175
1176 classinfo *class_get(utf *classname)
1177 {
1178         classinfo *c;  /* hashtable element */ 
1179         u4 key;        /* hashkey computed from classname */   
1180         u4 slot;       /* slot in hashtable */
1181         u2 i;  
1182
1183         key  = utf_hashkey(classname->text, classname->blength);
1184         slot = key & (class_hash.size-1);
1185         c    = class_hash.ptr[slot];
1186
1187         /* search external hash-chain */
1188         while (c) {
1189                 if (c->name->blength == classname->blength) {
1190                         /* compare classnames */
1191                         for (i = 0; i < classname->blength; i++) 
1192                                 if (classname->text[i] != c->name->text[i])
1193                                         goto nomatch;
1194
1195                         /* class found in hashtable */                          
1196                         return c;
1197                 }
1198                         
1199         nomatch:
1200                 c = c->hashlink;
1201         }
1202
1203         /* class not found */
1204         return NULL;
1205 }
1206
1207
1208 /* class_remove ****************************************************************
1209
1210    removes the class entry wth the specified name in the classes hashtable,
1211    furthermore the class' resources are freed
1212    if there is no such class false is returned
1213
1214 *******************************************************************************/
1215
1216 bool class_remove(classinfo *c)
1217 {
1218         classinfo *tc;  /* hashtable element */
1219         classinfo *pc;
1220         u4 key;         /* hashkey computed from classname */   
1221         u4 slot;        /* slot in hashtable */
1222         u2 i;  
1223
1224         key  = utf_hashkey(c->name->text, c->name->blength);
1225         slot = key & (class_hash.size - 1);
1226         tc   = class_hash.ptr[slot];
1227         pc   = NULL;
1228
1229         /* search external hash-chain */
1230         while (tc) {
1231                 if (tc->name->blength == c->name->blength) {
1232                         
1233                         /* compare classnames */
1234                         for (i = 0; i < c->name->blength; i++)
1235                                 if (tc->name->text[i] != c->name->text[i])
1236                                         goto nomatch;
1237
1238                         /* class found in hashtable */
1239                         if (!pc) {
1240                                 class_hash.ptr[slot] = tc->hashlink;
1241
1242                         } else {
1243                                 pc->hashlink = tc->hashlink;
1244                         }
1245
1246                         class_free(tc);
1247
1248                         return true;
1249                 }
1250                         
1251         nomatch:
1252                 pc = tc;
1253                 tc = tc->hashlink;
1254         }
1255
1256         /* class not found */
1257         return false;
1258 }
1259
1260
1261 /***************** Function: class_array_of ***********************************
1262
1263     Returns an array class with the given component class.
1264     The array class is dynamically created if neccessary.
1265
1266 *******************************************************************************/
1267
1268 classinfo *class_array_of(classinfo *component)
1269 {
1270     int namelen;
1271     char *namebuf;
1272         classinfo *c;
1273
1274     /* Assemble the array class name */
1275     namelen = component->name->blength;
1276     
1277     if (component->name->text[0] == '[') {
1278         /* the component is itself an array */
1279         namebuf = DMNEW(char, namelen + 1);
1280         namebuf[0] = '[';
1281         memcpy(namebuf + 1, component->name->text, namelen);
1282         namelen++;
1283
1284     } else {
1285         /* the component is a non-array class */
1286         namebuf = DMNEW(char, namelen + 3);
1287         namebuf[0] = '[';
1288         namebuf[1] = 'L';
1289         memcpy(namebuf + 2, component->name->text, namelen);
1290         namebuf[2 + namelen] = ';';
1291         namelen += 3;
1292     }
1293
1294         /* load this class ;-) and link it */
1295         c = class_new(utf_new(namebuf, namelen));
1296         c->loaded = 1;
1297         class_link(c);
1298
1299     return c;
1300 }
1301
1302 /*************** Function: class_multiarray_of ********************************
1303
1304     Returns an array class with the given dimension and element class.
1305     The array class is dynamically created if neccessary.
1306
1307 *******************************************************************************/
1308
1309 classinfo *class_multiarray_of(int dim, classinfo *element)
1310 {
1311     int namelen;
1312     char *namebuf;
1313
1314         if (dim < 1)
1315                 panic("Invalid array dimension requested");
1316
1317     /* Assemble the array class name */
1318     namelen = element->name->blength;
1319     
1320     if (element->name->text[0] == '[') {
1321         /* the element is itself an array */
1322         namebuf = DMNEW(char, namelen + dim);
1323         memcpy(namebuf + dim, element->name->text, namelen);
1324         namelen += dim;
1325     }
1326     else {
1327         /* the element is a non-array class */
1328         namebuf = DMNEW(char, namelen + 2 + dim);
1329         namebuf[dim] = 'L';
1330         memcpy(namebuf + dim + 1, element->name->text, namelen);
1331         namelen += (2 + dim);
1332         namebuf[namelen - 1] = ';';
1333     }
1334         memset(namebuf, '[', dim);
1335
1336     return class_new(utf_new(namebuf, namelen));
1337 }
1338
1339 /************************** function: utf_strlen ******************************
1340
1341     determine number of unicode characters in the utf string
1342
1343 *******************************************************************************/
1344
1345 u4 utf_strlen(utf *u) 
1346 {
1347     char *endpos  = utf_end(u);  /* points behind utf string       */
1348     char *utf_ptr = u->text;     /* current position in utf text   */
1349     u4 len = 0;                  /* number of unicode characters   */
1350
1351     while (utf_ptr < endpos) {
1352                 len++;
1353                 /* next unicode character */
1354                 utf_nextu2(&utf_ptr);
1355     }
1356
1357     if (utf_ptr != endpos)
1358         /* string ended abruptly */
1359                 panic("illegal utf string"); 
1360
1361     return len;
1362 }
1363
1364
1365 /*
1366  * These are local overrides for various environment variables in Emacs.
1367  * Please do not remove this and leave it at the end of the file, where
1368  * Emacs will automagically detect them.
1369  * ---------------------------------------------------------------------
1370  * Local variables:
1371  * mode: c
1372  * indent-tabs-mode: t
1373  * c-basic-offset: 4
1374  * tab-width: 4
1375  * End:
1376  */