Throw nullpointer exception in utf_strlen if NULL is passed.
[cacao.git] / tables.c
1 /* tables.c - 
2
3    Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003
4    R. Grafl, A. Krall, C. Kruegel, C. Oates, R. Obermaisser,
5    M. Probst, S. Ring, E. Steiner, C. Thalinger, D. Thuernbeck,
6    P. Tomsich, J. Wenninger
7
8    This file is part of CACAO.
9
10    This program is free software; you can redistribute it and/or
11    modify it under the terms of the GNU General Public License as
12    published by the Free Software Foundation; either version 2, or (at
13    your option) any later version.
14
15    This program is distributed in the hope that it will be useful, but
16    WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18    General Public License for more details.
19
20    You should have received a copy of the GNU General Public License
21    along with this program; if not, write to the Free Software
22    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
23    02111-1307, USA.
24
25    Contact: cacao@complang.tuwien.ac.at
26
27    Authors: Reinhard Grafl
28
29    Changes: Mark Probst
30             Andreas Krall
31
32    Contains support functions for:
33        - Reading of Java class files
34        - Unicode symbols
35        - the heap
36        - additional support functions
37
38    $Id: tables.c 1445 2004-11-05 13:55:33Z twisti $
39
40 */
41
42 #include "global.h"
43
44 #include <string.h>
45 #include <stdlib.h>
46 #include <assert.h>
47 #include <sys/types.h>
48 #include <sys/mman.h>
49 #include <unistd.h>
50 #include "builtin.h"
51 #include "exceptions.h"
52 #include "types.h"
53 #include "native.h"
54 #include "options.h"
55 #include "tables.h"
56 #include "loader.h"
57 #include "asmpart.h"
58 #include "statistics.h"
59 #include "threads/thread.h"
60 #include "threads/locks.h"
61 #include "toolbox/logging.h"
62 #include "toolbox/memory.h"
63
64
65 hashtable utf_hash;     /* hashtable for utf8-symbols */
66 hashtable string_hash;  /* hashtable for javastrings  */
67 hashtable class_hash;   /* hashtable for classes      */
68
69 list unlinkedclasses;   /* this is only used for eager class loading          */
70
71
72 /******************************************************************************
73  *********************** hashtable functions **********************************
74  ******************************************************************************/
75
76 /* hashsize must be power of 2 */
77
78 #define UTF_HASHSTART   16384   /* initial size of utf-hash */    
79 #define HASHSTART        2048   /* initial size of javastring and class-hash */
80
81
82 /******************** function: init_hashtable ******************************
83
84     Initializes a hashtable structure and allocates memory.
85     The parameter size specifies the initial size of the hashtable.
86         
87 *****************************************************************************/
88
89 void init_hashtable(hashtable *hash, u4 size)
90 {
91         u4 i;
92
93         hash->entries = 0;
94         hash->size    = size;
95         hash->ptr     = MNEW(void*, size);
96
97         /* clear table */
98         for (i = 0; i < size; i++) hash->ptr[i] = NULL;
99 }
100
101
102 /*********************** function: tables_init  *****************************
103
104     creates hashtables for symboltables 
105         (called once at startup)                         
106         
107 *****************************************************************************/
108
109 void tables_init()
110 {
111         init_hashtable(&utf_hash,    UTF_HASHSTART);  /* hashtable for utf8-symbols */
112         init_hashtable(&string_hash, HASHSTART);      /* hashtable for javastrings */
113         init_hashtable(&class_hash,  HASHSTART);      /* hashtable for classes */ 
114
115 /*      if (opt_eager) */
116 /*              list_init(&unlinkedclasses, OFFSET(classinfo, listnode)); */
117
118 #if defined(STATISTICS)
119         if (opt_stat)
120                 count_utf_len += sizeof(utf*) * utf_hash.size;
121 #endif
122 }
123
124
125 /********************** function: tables_close ******************************
126
127         free memory for hashtables                    
128         
129 *****************************************************************************/
130
131 void tables_close()
132 {
133         utf *u = NULL;
134         literalstring *s;
135         u4 i;
136         
137         /* dispose utf symbols */
138         for (i = 0; i < utf_hash.size; i++) {
139                 u = utf_hash.ptr[i];
140                 while (u) {
141                         /* process elements in external hash chain */
142                         utf *nextu = u->hashlink;
143                         MFREE(u->text, u1, u->blength);
144                         FREE(u, utf);
145                         u = nextu;
146                 }       
147         }
148
149         /* dispose javastrings */
150         for (i = 0; i < string_hash.size; i++) {
151                 s = string_hash.ptr[i];
152                 while (u) {
153                         /* process elements in external hash chain */
154                         literalstring *nexts = s->hashlink;
155                         literalstring_free(s->string);
156                         FREE(s, literalstring);
157                         s = nexts;
158                 }       
159         }
160
161         /* dispose hashtable structures */
162         MFREE(utf_hash.ptr,    void*, utf_hash.size);
163         MFREE(string_hash.ptr, void*, string_hash.size);
164         MFREE(class_hash.ptr,  void*, class_hash.size);
165 }
166
167
168 /********************* function: utf_display *********************************
169
170         write utf symbol to stdout (debugging purposes)
171
172 ******************************************************************************/
173
174 void utf_display(utf *u)
175 {
176     char *endpos  = utf_end(u);  /* points behind utf string       */
177     char *utf_ptr = u->text;     /* current position in utf text   */
178
179         if (!u)
180                 return;
181
182     while (utf_ptr < endpos) {
183                 /* read next unicode character */                
184                 u2 c = utf_nextu2(&utf_ptr);
185                 if (c >= 32 && c <= 127) printf("%c", c);
186                 else printf("?");
187         }
188
189         fflush(stdout);
190 }
191
192
193 /********************* function: utf_display *********************************
194
195         write utf symbol to stdout (debugging purposes)
196
197 ******************************************************************************/
198
199 void utf_display_classname(utf *u)
200 {
201     char *endpos  = utf_end(u);  /* points behind utf string       */
202     char *utf_ptr = u->text;     /* current position in utf text   */
203
204         if (!u)
205                 return;
206
207     while (utf_ptr < endpos) {
208                 /* read next unicode character */                
209                 u2 c = utf_nextu2(&utf_ptr);
210                 if (c == '/') c = '.';
211                 if (c >= 32 && c <= 127) printf("%c", c);
212                 else printf("?");
213         }
214
215         fflush(stdout);
216 }
217
218
219 /************************* function: log_utf *********************************
220
221         log utf symbol
222
223 ******************************************************************************/
224
225 void log_utf(utf *u)
226 {
227         char buf[MAXLOGTEXT];
228         utf_sprint(buf, u);
229         dolog("%s", buf);
230 }
231
232
233 /********************** function: log_plain_utf ******************************
234
235         log utf symbol (without printing "LOG: " and newline)
236
237 ******************************************************************************/
238
239 void log_plain_utf(utf *u)
240 {
241         char buf[MAXLOGTEXT];
242         utf_sprint(buf, u);
243         dolog_plain("%s", buf);
244 }
245
246
247 /************************ function: utf_sprint *******************************
248         
249     write utf symbol into c-string (debugging purposes)                                          
250
251 ******************************************************************************/
252
253 void utf_sprint(char *buffer, utf *u)
254 {
255     char *endpos  = utf_end(u);  /* points behind utf string       */
256     char *utf_ptr = u->text;     /* current position in utf text   */ 
257     u2 pos = 0;                  /* position in c-string           */
258
259     while (utf_ptr < endpos) 
260                 /* copy next unicode character */       
261                 buffer[pos++] = utf_nextu2(&utf_ptr);
262
263     /* terminate string */
264     buffer[pos] = '\0';
265 }
266
267
268 /************************ function: utf_sprint_classname *********************
269         
270     write utf symbol into c-string (debugging purposes)
271
272 ******************************************************************************/ 
273
274 void utf_sprint_classname(char *buffer, utf *u)
275 {
276     char *endpos  = utf_end(u);  /* points behind utf string       */
277     char *utf_ptr = u->text;     /* current position in utf text   */ 
278     u2 pos = 0;                  /* position in c-string           */
279
280     while (utf_ptr < endpos) {
281                 /* copy next unicode character */       
282                 u2 c = utf_nextu2(&utf_ptr);
283                 if (c == '/') c = '.';
284                 buffer[pos++] = c;
285         }
286
287     /* terminate string */
288     buffer[pos] = '\0';
289 }
290
291
292 /********************* Funktion: utf_fprint **********************************
293         
294     write utf symbol into file          
295
296 ******************************************************************************/
297
298 void utf_fprint(FILE *file, utf *u)
299 {
300     char *endpos  = utf_end(u);  /* points behind utf string       */
301     char *utf_ptr = u->text;     /* current position in utf text   */ 
302
303     if (!u)
304                 return;
305
306     while (utf_ptr < endpos) { 
307                 /* read next unicode character */                
308                 u2 c = utf_nextu2(&utf_ptr);                            
309
310                 if (c >= 32 && c <= 127) fprintf(file, "%c", c);
311                 else fprintf(file, "?");
312         }
313 }
314
315
316 /********************* Funktion: utf_fprint **********************************
317         
318     write utf symbol into file          
319
320 ******************************************************************************/
321
322 void utf_fprint_classname(FILE *file, utf *u)
323 {
324     char *endpos  = utf_end(u);  /* points behind utf string       */
325     char *utf_ptr = u->text;     /* current position in utf text   */ 
326
327     if (!u)
328                 return;
329
330     while (utf_ptr < endpos) { 
331                 /* read next unicode character */                
332                 u2 c = utf_nextu2(&utf_ptr);                            
333                 if (c == '/') c = '.';
334
335                 if (c >= 32 && c <= 127) fprintf(file, "%c", c);
336                 else fprintf(file, "?");
337         }
338 }
339
340
341 /****************** internal function: utf_hashkey ***************************
342
343         The hashkey is computed from the utf-text by using up to 8 characters.
344         For utf-symbols longer than 15 characters 3 characters are taken from
345         the beginning and the end, 2 characters are taken from the middle.
346
347 ******************************************************************************/ 
348
349 #define nbs(val) ((u4) *(++text) << val) /* get next byte, left shift by val  */
350 #define fbs(val) ((u4) *(  text) << val) /* get first byte, left shift by val */
351
352 static u4 utf_hashkey(char *text, u4 length)
353 {
354         char *start_pos = text; /* pointer to utf text */
355         u4 a;
356
357         switch (length) {               
358                 
359         case 0: /* empty string */
360                 return 0;
361
362         case 1: return fbs(0);
363         case 2: return fbs(0) ^ nbs(3);
364         case 3: return fbs(0) ^ nbs(3) ^ nbs(5);
365         case 4: return fbs(0) ^ nbs(2) ^ nbs(4) ^ nbs(6);
366         case 5: return fbs(0) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(6);
367         case 6: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(5) ^ nbs(6);
368         case 7: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(5) ^ nbs(6);
369         case 8: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(5) ^ nbs(6) ^ nbs(7);
370
371         case 9:
372                 a = fbs(0);
373                 a ^= nbs(1);
374                 a ^= nbs(2);
375                 text++;
376                 return a ^ nbs(4) ^ nbs(5) ^ nbs(6) ^ nbs(7) ^ nbs(8);
377
378         case 10:
379                 a = fbs(0);
380                 text++;
381                 a ^= nbs(2);
382                 a ^= nbs(3);
383                 a ^= nbs(4);
384                 text++;
385                 return a ^ nbs(6) ^ nbs(7) ^ nbs(8) ^ nbs(9);
386
387         case 11:
388                 a = fbs(0);
389                 text++;
390                 a ^= nbs(2);
391                 a ^= nbs(3);
392                 a ^= nbs(4);
393                 text++;
394                 return a ^ nbs(6) ^ nbs(7) ^ nbs(8) ^ nbs(9) ^ nbs(10);
395
396         case 12:
397                 a = fbs(0);
398                 text += 2;
399                 a ^= nbs(2);
400                 a ^= nbs(3);
401                 text++;
402                 a ^= nbs(5);
403                 a ^= nbs(6);
404                 a ^= nbs(7);
405                 text++;
406                 return a ^ nbs(9) ^ nbs(10);
407
408         case 13:
409                 a = fbs(0);
410                 a ^= nbs(1);
411                 text++;
412                 a ^= nbs(3);
413                 a ^= nbs(4);
414                 text += 2;      
415                 a ^= nbs(7);
416                 a ^= nbs(8);
417                 text += 2;
418                 return a ^ nbs(9) ^ nbs(10);
419
420         case 14:
421                 a = fbs(0);
422                 text += 2;      
423                 a ^= nbs(3);
424                 a ^= nbs(4);
425                 text += 2;      
426                 a ^= nbs(7);
427                 a ^= nbs(8);
428                 text += 2;
429                 return a ^ nbs(9) ^ nbs(10) ^ nbs(11);
430
431         case 15:
432                 a = fbs(0);
433                 text += 2;      
434                 a ^= nbs(3);
435                 a ^= nbs(4);
436                 text += 2;      
437                 a ^= nbs(7);
438                 a ^= nbs(8);
439                 text += 2;
440                 return a ^ nbs(9) ^ nbs(10) ^ nbs(11);
441
442         default:  /* 3 characters from beginning */
443                 a = fbs(0);
444                 text += 2;
445                 a ^= nbs(3);
446                 a ^= nbs(4);
447
448                 /* 2 characters from middle */
449                 text = start_pos + (length / 2);
450                 a ^= fbs(5);
451                 text += 2;
452                 a ^= nbs(6);    
453
454                 /* 3 characters from end */
455                 text = start_pos + length - 4;
456
457                 a ^= fbs(7);
458                 text++;
459
460                 return a ^ nbs(10) ^ nbs(11);
461     }
462 }
463
464
465 /*************************** function: utf_hashkey ***************************
466
467     compute the hashkey of a unicode string
468
469 ******************************************************************************/ 
470
471 u4 unicode_hashkey(u2 *text, u2 len)
472 {
473         return utf_hashkey((char*) text, len);
474 }
475
476
477 /************************ function: utf_new **********************************
478
479         Creates a new utf-symbol, the text of the symbol is passed as a 
480         u1-array. The function searches the utf-hashtable for a utf-symbol 
481         with this text. On success the element returned, otherwise a new 
482         hashtable element is created.
483
484         If the number of entries in the hashtable exceeds twice the size of the
485         hashtable slots a reorganization of the hashtable is done and the utf 
486         symbols are copied to a new hashtable with doubled size.
487
488 ******************************************************************************/
489
490 utf *utf_new_intern(char *text, u2 length)
491 {
492         u4 key;            /* hashkey computed from utf-text */
493         u4 slot;           /* slot in hashtable */
494         utf *u;            /* hashtable element */
495         u2 i;
496
497 #ifdef STATISTICS
498         if (opt_stat)
499                 count_utf_new++;
500 #endif
501
502         key  = utf_hashkey(text, length);
503         slot = key & (utf_hash.size-1);
504         u    = utf_hash.ptr[slot];
505
506         /* search external hash chain for utf-symbol */
507         while (u) {
508                 if (u->blength == length) {
509
510                         /* compare text of hashtable elements */
511                         for (i = 0; i < length; i++)
512                                 if (text[i] != u->text[i]) goto nomatch;
513                         
514 #ifdef STATISTICS
515                         if (opt_stat)
516                                 count_utf_new_found++;
517 #endif
518 /*                      log_text("symbol found in hash table");*/
519                         /* symbol found in hashtable */
520 /*                                      utf_display(u);
521                                         {
522                                                 utf blup;
523                                                 blup.blength=length;
524                                                 blup.text=text;
525                                                 utf_display(&blup);
526                                         }*/
527                         return u;
528                 }
529         nomatch:
530                 u = u->hashlink; /* next element in external chain */
531         }
532
533 #ifdef STATISTICS
534         if (opt_stat)
535                 count_utf_len += sizeof(utf) + length;
536 #endif
537
538         /* location in hashtable found, create new utf element */
539         u = NEW(utf);
540         u->blength  = length;               /* length in bytes of utfstring       */
541         u->hashlink = utf_hash.ptr[slot];   /* link in external hashchain         */
542         u->text     = mem_alloc(length + 1);/* allocate memory for utf-text       */
543         memcpy(u->text, text, length);      /* copy utf-text                      */
544         u->text[length] = '\0';
545         utf_hash.ptr[slot] = u;             /* insert symbol into table           */
546
547         utf_hash.entries++;                 /* update number of entries           */
548
549         if (utf_hash.entries > (utf_hash.size * 2)) {
550
551         /* reorganization of hashtable, average length of 
552            the external chains is approx. 2                */  
553
554                 u4 i;
555                 utf *u;
556                 hashtable newhash; /* the new hashtable */
557
558                 /* create new hashtable, double the size */
559                 init_hashtable(&newhash, utf_hash.size * 2);
560                 newhash.entries = utf_hash.entries;
561
562 #ifdef STATISTICS
563                 if (opt_stat)
564                         count_utf_len += sizeof(utf*) * utf_hash.size;
565 #endif
566
567                 /* transfer elements to new hashtable */
568                 for (i = 0; i < utf_hash.size; i++) {
569                         u = (utf *) utf_hash.ptr[i];
570                         while (u) {
571                                 utf *nextu = u->hashlink;
572                                 u4 slot = utf_hashkey(u->text, u->blength) & (newhash.size - 1);
573                                                 
574                                 u->hashlink = (utf *) newhash.ptr[slot];
575                                 newhash.ptr[slot] = u;
576
577                                 /* follow link in external hash chain */
578                                 u = nextu;
579                         }
580                 }
581         
582                 /* dispose old table */
583                 MFREE(utf_hash.ptr, void*, utf_hash.size);
584                 utf_hash = newhash;
585         }
586
587         return u;
588 }
589
590
591 utf *utf_new(char *text, u2 length)
592 {
593     utf *r;
594
595 #if defined(USE_THREADS) && defined(NATIVE_THREADS)
596     tables_lock();
597 #endif
598
599     r = utf_new_intern(text, length);
600
601 #if defined(USE_THREADS) && defined(NATIVE_THREADS)
602     tables_unlock();
603 #endif
604
605     return r;
606 }
607
608
609 /********************* function: utf_new_char ********************************
610
611     creates a new utf symbol, the text for this symbol is passed
612     as a c-string ( = char* )
613
614 ******************************************************************************/
615
616 utf *utf_new_char(char *text)
617 {
618         return utf_new(text, strlen(text));
619 }
620
621
622 /********************* function: utf_new_char ********************************
623
624     creates a new utf symbol, the text for this symbol is passed
625     as a c-string ( = char* )
626     "." characters are going to be replaced by "/". since the above function is
627     used often, this is a separte function, instead of an if
628
629 ******************************************************************************/
630
631 utf *utf_new_char_classname(char *text)
632 {
633         if (strchr(text, '.')) {
634                 char *txt = strdup(text);
635                 char *end = txt + strlen(txt);
636                 char *c;
637                 utf *tmpRes;
638                 for (c = txt; c < end; c++)
639                         if (*c == '.') *c = '/';
640                 tmpRes = utf_new(txt, strlen(txt));
641                 free(txt);
642                 return tmpRes;
643
644         } else
645                 return utf_new(text, strlen(text));
646 }
647
648
649 /************************** Funktion: utf_show ******************************
650
651     writes the utf symbols in the utfhash to stdout and
652     displays the number of external hash chains grouped 
653     according to the chainlength
654     (debugging purposes)
655
656 *****************************************************************************/
657
658 void utf_show()
659 {
660
661 #define CHAIN_LIMIT 20               /* limit for seperated enumeration */
662
663         u4 chain_count[CHAIN_LIMIT]; /* numbers of chains */
664         u4 max_chainlength = 0;      /* maximum length of the chains */
665         u4 sum_chainlength = 0;      /* sum of the chainlengths */
666         u4 beyond_limit = 0;         /* number of utf-symbols in chains with length>=CHAIN_LIMIT-1 */
667         u4 i;
668
669         printf ("UTF-HASH:\n");
670
671         /* show element of utf-hashtable */
672         for (i=0; i<utf_hash.size; i++) {
673                 utf *u = utf_hash.ptr[i];
674                 if (u) {
675                         printf ("SLOT %d: ", (int) i);
676                         while (u) {
677                                 printf ("'");
678                                 utf_display (u);
679                                 printf ("' ");
680                                 u = u->hashlink;
681                         }       
682                         printf ("\n");
683                 }
684                 
685         }
686
687         printf ("UTF-HASH: %d slots for %d entries\n", 
688                         (int) utf_hash.size, (int) utf_hash.entries );
689
690
691         if (utf_hash.entries == 0)
692                 return;
693
694         printf("chains:\n  chainlength    number of chains    %% of utfstrings\n");
695
696         for (i=0;i<CHAIN_LIMIT;i++)
697                 chain_count[i]=0;
698
699         /* count numbers of hashchains according to their length */
700         for (i=0; i<utf_hash.size; i++) {
701                   
702                 utf *u = (utf*) utf_hash.ptr[i];
703                 u4 chain_length = 0;
704
705                 /* determine chainlength */
706                 while (u) {
707                         u = u->hashlink;
708                         chain_length++;
709                 }
710
711                 /* update sum of all chainlengths */
712                 sum_chainlength+=chain_length;
713
714                 /* determine the maximum length of the chains */
715                 if (chain_length>max_chainlength)
716                         max_chainlength = chain_length;
717
718                 /* update number of utf-symbols in chains with length>=CHAIN_LIMIT-1 */
719                 if (chain_length>=CHAIN_LIMIT) {
720                         beyond_limit+=chain_length;
721                         chain_length=CHAIN_LIMIT-1;
722                 }
723
724                 /* update number of hashchains of current length */
725                 chain_count[chain_length]++;
726         }
727
728         /* display results */  
729         for (i=1;i<CHAIN_LIMIT-1;i++) 
730                 printf("       %2d %17d %18.2f%%\n",i,chain_count[i],(((float) chain_count[i]*i*100)/utf_hash.entries));
731           
732         printf("     >=%2d %17d %18.2f%%\n",CHAIN_LIMIT-1,chain_count[CHAIN_LIMIT-1],((float) beyond_limit*100)/utf_hash.entries);
733
734
735         printf("max. chainlength:%5d\n",max_chainlength);
736
737         /* avg. chainlength = sum of chainlengths / number of chains */
738         printf("avg. chainlength:%5.2f\n",(float) sum_chainlength / (utf_hash.size-chain_count[0]));
739 }
740
741 /******************************************************************************
742 *********************** Misc support functions ********************************
743 ******************************************************************************/
744
745
746 /******************** Function: desc_to_type **********************************
747    
748         Determines the corresponding Java base data type for a given type
749         descriptor.
750         
751 ******************************************************************************/
752
753 u2 desc_to_type(utf *descriptor)
754 {
755         char *utf_ptr = descriptor->text;  /* current position in utf text */
756         char logtext[MAXLOGTEXT];
757
758         if (descriptor->blength < 1) panic("Type-Descriptor is empty string");
759         
760         switch (*utf_ptr++) {
761         case 'B': 
762         case 'C':
763         case 'I':
764         case 'S':  
765         case 'Z':  return TYPE_INT;
766         case 'D':  return TYPE_DOUBLE;
767         case 'F':  return TYPE_FLOAT;
768         case 'J':  return TYPE_LONG;
769         case 'L':
770         case '[':  return TYPE_ADDRESS;
771         }
772                         
773         sprintf(logtext, "Invalid Type-Descriptor: ");
774         utf_sprint(logtext+strlen(logtext), descriptor);
775         error("%s",logtext);
776
777         return 0;
778 }
779
780
781 /********************** Function: desc_typesize *******************************
782
783         Calculates the lenght in bytes needed for a data element of the type given
784         by its type descriptor.
785         
786 ******************************************************************************/
787
788 u2 desc_typesize(utf *descriptor)
789 {
790         switch (desc_to_type(descriptor)) {
791         case TYPE_INT:     return 4;
792         case TYPE_LONG:    return 8;
793         case TYPE_FLOAT:   return 4;
794         case TYPE_DOUBLE:  return 8;
795         case TYPE_ADDRESS: return sizeof(voidptr);
796         default:           return 0;
797         }
798 }
799
800
801 /********************** function: utf_nextu2 *********************************
802
803     read the next unicode character from the utf string and
804     increment the utf-string pointer accordingly
805
806 ******************************************************************************/
807
808 u2 utf_nextu2(char **utf_ptr) 
809 {
810     /* uncompressed unicode character */
811     u2 unicode_char = 0;
812     /* current position in utf text */  
813     unsigned char *utf = (unsigned char *) (*utf_ptr);
814     /* bytes representing the unicode character */
815     unsigned char ch1, ch2, ch3;
816     /* number of bytes used to represent the unicode character */
817     int len = 0;
818         
819     switch ((ch1 = utf[0]) >> 4) {
820         default: /* 1 byte */
821                 (*utf_ptr)++;
822                 return (u2) ch1;
823         case 0xC: 
824         case 0xD: /* 2 bytes */
825                 if (((ch2 = utf[1]) & 0xC0) == 0x80) {
826                         unsigned char high = ch1 & 0x1F;
827                         unsigned char low  = ch2 & 0x3F;
828                         unicode_char = (high << 6) + low;
829                         len = 2;
830                 }
831                 break;
832
833         case 0xE: /* 2 or 3 bytes */
834                 if (((ch2 = utf[1]) & 0xC0) == 0x80) {
835                         if (((ch3 = utf[2]) & 0xC0) == 0x80) {
836                                 unsigned char low  = ch3 & 0x3f;
837                                 unsigned char mid  = ch2 & 0x3f;
838                                 unsigned char high = ch1 & 0x0f;
839                                 unicode_char = (((high << 6) + mid) << 6) + low;
840                                 len = 3;
841                         } else
842                                 len = 2;                                           
843                 }
844                 break;
845     }
846
847     /* update position in utf-text */
848     *utf_ptr = (char *) (utf + len);
849     return unicode_char;
850 }
851
852
853 /********************* function: is_valid_utf ********************************
854
855     return true if the given string is a valid UTF-8 string
856
857     utf_ptr...points to first character
858     end_pos...points after last character
859
860 ******************************************************************************/
861
862 static unsigned long min_codepoint[6] = {0,1L<<7,1L<<11,1L<<16,1L<<21,1L<<26};
863
864 bool
865 is_valid_utf(char *utf_ptr,char *end_pos)
866 {
867         int bytes;
868         int len,i;
869         char c;
870         unsigned long v;
871
872         if (end_pos < utf_ptr) return false;
873         bytes = end_pos - utf_ptr;
874         while (bytes--) {
875                 c = *utf_ptr++;
876                 /*dolog("%c %02x",c,c);*/
877                 if (!c) return false;                     /* 0x00 is not allowed */
878                 if ((c & 0x80) == 0) continue;            /* ASCII */
879
880                 if      ((c & 0xe0) == 0xc0) len = 1;     /* 110x xxxx */
881                 else if ((c & 0xf0) == 0xe0) len = 2;     /* 1110 xxxx */
882                 else if ((c & 0xf8) == 0xf0) len = 3;     /* 1111 0xxx */
883                 else if ((c & 0xfc) == 0xf8) len = 4;     /* 1111 10xx */
884                 else if ((c & 0xfe) == 0xfc) len = 5;     /* 1111 110x */
885                 else return false;                        /* invalid leading byte */
886
887                 if (len > 2) return false;                /* Java limitation */
888
889                 v = (unsigned long)c & (0x3f >> len);
890                 
891                 if ((bytes -= len) < 0) return false;     /* missing bytes */
892
893                 for (i = len; i--; ) {
894                         c = *utf_ptr++;
895                         /*dolog("    %c %02x",c,c);*/
896                         if ((c & 0xc0) != 0x80)               /* 10xx xxxx */
897                                 return false;
898                         v = (v<<6) | (c & 0x3f);
899                 }
900
901                 /*              dolog("v=%d",v);*/
902
903                 if (v == 0) {
904                         if (len != 1) return false;           /* Java special */
905                 }
906                 else {
907                         /* Sun Java seems to allow overlong UTF-8 encodings */
908                         
909                         if (v < min_codepoint[len]) { /* overlong UTF-8 */
910                                 if (!opt_liberalutf)
911                                         fprintf(stderr,"WARNING: Overlong UTF-8 sequence found.\n");
912                                 /* XXX change this to panic? */
913                         }
914                 }
915
916                 /* surrogates in UTF-8 seem to be allowed in Java classfiles */
917                 /* if (v >= 0xd800 && v <= 0xdfff) return false; */ /* surrogates */
918
919                 /* even these seem to be allowed */
920                 /* if (v == 0xfffe || v == 0xffff) return false; */ /* invalid codepoints */
921         }
922
923         return true;
924 }
925  
926 /********************* function: is_valid_name *******************************
927
928     return true if the given string may be used as a class/field/method name.
929     (Currently this only disallows empty strings and control characters.)
930
931     NOTE: The string is assumed to have passed is_valid_utf!
932
933     utf_ptr...points to first character
934     end_pos...points after last character
935
936 ******************************************************************************/
937
938 bool
939 is_valid_name(char *utf_ptr,char *end_pos)
940 {
941         if (end_pos <= utf_ptr) return false; /* disallow empty names */
942
943         while (utf_ptr < end_pos) {
944                 unsigned char c = *utf_ptr++;
945
946                 if (c < 0x20) return false; /* disallow control characters */
947                 if (c == 0xc0 && (unsigned char)*utf_ptr == 0x80) return false; /* disallow zero */
948         }
949         return true;
950 }
951
952 bool
953 is_valid_name_utf(utf *u)
954 {
955         return is_valid_name(u->text,utf_end(u));
956 }
957
958 /******************** Function: class_new **************************************
959
960     searches for the class with the specified name in the classes hashtable,
961     if there is no such class a new classinfo structure is created and inserted
962     into the list of classes to be loaded
963
964 *******************************************************************************/
965
966 classinfo *class_new_intern(utf *classname)
967 {
968         classinfo *c;     /* hashtable element */
969         u4 key;           /* hashkey computed from classname */
970         u4 slot;          /* slot in hashtable */
971         u2 i;
972
973         key  = utf_hashkey(classname->text, classname->blength);
974         slot = key & (class_hash.size - 1);
975         c    = class_hash.ptr[slot];
976
977         /* search external hash chain for the class */
978         while (c) {
979                 if (c->name->blength == classname->blength) {
980                         for (i = 0; i < classname->blength; i++)
981                                 if (classname->text[i] != c->name->text[i]) goto nomatch;
982                                                 
983                         /* class found in hashtable */
984                         return c;
985                 }
986                         
987         nomatch:
988                 c = c->hashlink; /* next element in external chain */
989         }
990
991         /* location in hashtable found, create new classinfo structure */
992
993 #if defined(STATISTICS)
994         if (opt_stat)
995                 count_class_infos += sizeof(classinfo);
996 #endif
997
998         if (initverbose) {
999                 char logtext[MAXLOGTEXT];
1000                 sprintf(logtext, "Creating class: ");
1001                 utf_sprint_classname(logtext + strlen(logtext), classname);
1002                 log_text(logtext);
1003         }
1004
1005         c = GCNEW(classinfo, 1); /*JOWENN: NEW*/
1006         /*c=NEW(classinfo);*/
1007         c->vmClass = 0;
1008         c->flags = 0;
1009         c->name = classname;
1010         c->packagename = NULL;
1011         c->cpcount = 0;
1012         c->cptags = NULL;
1013         c->cpinfos = NULL;
1014         c->super = NULL;
1015         c->sub = NULL;
1016         c->nextsub = NULL;
1017         c->interfacescount = 0;
1018         c->interfaces = NULL;
1019         c->fieldscount = 0;
1020         c->fields = NULL;
1021         c->methodscount = 0;
1022         c->methods = NULL;
1023         c->linked = false;
1024         c->loaded = false;
1025         c->index = 0;
1026         c->instancesize = 0;
1027         c->header.vftbl = NULL;
1028         c->innerclasscount = 0;
1029         c->innerclass = NULL;
1030         c->vftbl = NULL;
1031         c->initialized = false;
1032         c->initializing = false;
1033         c->classvftbl = false;
1034     c->classUsed = 0;
1035     c->impldBy = NULL;
1036         c->classloader = NULL;
1037         c->sourcefile = NULL;
1038         
1039         /* insert class into the hashtable */
1040         c->hashlink = class_hash.ptr[slot];
1041         class_hash.ptr[slot] = c;
1042
1043         /* update number of hashtable-entries */
1044         class_hash.entries++;
1045
1046         if (class_hash.entries > (class_hash.size * 2)) {
1047
1048                 /* reorganization of hashtable, average length of 
1049                    the external chains is approx. 2                */  
1050
1051                 u4 i;
1052                 classinfo *c;
1053                 hashtable newhash;  /* the new hashtable */
1054
1055                 /* create new hashtable, double the size */
1056                 init_hashtable(&newhash, class_hash.size * 2);
1057                 newhash.entries = class_hash.entries;
1058
1059                 /* transfer elements to new hashtable */
1060                 for (i = 0; i < class_hash.size; i++) {
1061                         c = (classinfo *) class_hash.ptr[i];
1062                         while (c) {
1063                                 classinfo *nextc = c->hashlink;
1064                                 u4 slot = (utf_hashkey(c->name->text, c->name->blength)) & (newhash.size - 1);
1065                                                 
1066                                 c->hashlink = newhash.ptr[slot];
1067                                 newhash.ptr[slot] = c;
1068
1069                                 c = nextc;
1070                         }
1071                 }
1072         
1073                 /* dispose old table */ 
1074                 MFREE(class_hash.ptr, void*, class_hash.size);
1075                 class_hash = newhash;
1076         }
1077
1078     /* Array classes need further initialization. */
1079     if (c->name->text[0] == '[') {
1080                 /* Array classes are not loaded from classfiles. */
1081                 c->loaded = true;
1082         class_new_array(c);
1083                 c->packagename = array_packagename;
1084
1085         } else {
1086                 /* Find the package name */
1087                 /* Classes in the unnamed package keep packagename == NULL. */
1088                 char *p = utf_end(c->name) - 1;
1089                 char *start = c->name->text;
1090                 for (;p > start; --p) {
1091                         if (*p == '.') {
1092                                 c->packagename = utf_new(start, p - start);
1093                                 break;
1094                         }
1095                 }
1096         }
1097 #if defined(USE_THREADS) && defined(NATIVE_THREADS)
1098         initObjectLock(&c->header);
1099 #endif
1100
1101         return c;
1102 }
1103
1104
1105 classinfo *class_new(utf *classname)
1106 {
1107     classinfo *c;
1108
1109 #if defined(USE_THREADS) && defined(NATIVE_THREADS)
1110     tables_lock();
1111 #endif
1112
1113     c = class_new_intern(classname);
1114
1115         /* we support eager class loading and linking on demand */
1116
1117         if (opt_eager) {
1118                 classinfo *tc;
1119                 classinfo *tmp;
1120
1121                 list_init(&unlinkedclasses, OFFSET(classinfo, listnode));
1122
1123                 if (!c->loaded) {
1124                         if (!class_load(c)) {
1125 #if defined(USE_THREADS) && defined(NATIVE_THREADS)
1126                                 tables_unlock();
1127 #endif
1128                                 return c;
1129                         }
1130                 }
1131
1132                 /* link all referenced classes */
1133
1134                 tc = list_first(&unlinkedclasses);
1135
1136                 while (tc) {
1137                         /* skip the current loaded/linked class */
1138                         if (tc != c) {
1139                                 if (!class_link(tc)) {
1140 #if defined(USE_THREADS) && defined(NATIVE_THREADS)
1141                                         tables_unlock();
1142 #endif
1143                                         return c;
1144                                 }
1145                         }
1146
1147                         /* we need a tmp variable here, because list_remove sets prev and
1148                            next to NULL */
1149                         tmp = list_next(&unlinkedclasses, tc);
1150                         list_remove(&unlinkedclasses, tc);
1151                         tc = tmp;
1152                 }
1153
1154                 if (!c->linked) {
1155                         if (!class_link(c)) {
1156 #if defined(USE_THREADS) && defined(NATIVE_THREADS)
1157                                 tables_unlock();
1158 #endif
1159                                 return c;
1160                         }
1161                 }
1162         }
1163
1164 #if defined(USE_THREADS) && defined(NATIVE_THREADS)
1165     tables_unlock();
1166 #endif
1167
1168     return c;
1169 }
1170
1171
1172 /******************** Function: class_get **************************************
1173
1174     searches for the class with the specified name in the classes hashtable
1175     if there is no such class NULL is returned
1176
1177 *******************************************************************************/
1178
1179 classinfo *class_get(utf *classname)
1180 {
1181         classinfo *c;  /* hashtable element */ 
1182         u4 key;        /* hashkey computed from classname */   
1183         u4 slot;       /* slot in hashtable */
1184         u2 i;  
1185
1186         key  = utf_hashkey(classname->text, classname->blength);
1187         slot = key & (class_hash.size-1);
1188         c    = class_hash.ptr[slot];
1189
1190         /* search external hash-chain */
1191         while (c) {
1192                 if (c->name->blength == classname->blength) {
1193                         /* compare classnames */
1194                         for (i = 0; i < classname->blength; i++) 
1195                                 if (classname->text[i] != c->name->text[i])
1196                                         goto nomatch;
1197
1198                         /* class found in hashtable */                          
1199                         return c;
1200                 }
1201                         
1202         nomatch:
1203                 c = c->hashlink;
1204         }
1205
1206         /* class not found */
1207         return NULL;
1208 }
1209
1210
1211 /* class_remove ****************************************************************
1212
1213    removes the class entry wth the specified name in the classes hashtable,
1214    furthermore the class' resources are freed
1215    if there is no such class false is returned
1216
1217 *******************************************************************************/
1218
1219 bool class_remove(classinfo *c)
1220 {
1221         classinfo *tc;  /* hashtable element */
1222         classinfo *pc;
1223         u4 key;         /* hashkey computed from classname */   
1224         u4 slot;        /* slot in hashtable */
1225         u2 i;  
1226
1227         key  = utf_hashkey(c->name->text, c->name->blength);
1228         slot = key & (class_hash.size - 1);
1229         tc   = class_hash.ptr[slot];
1230         pc   = NULL;
1231
1232         /* search external hash-chain */
1233         while (tc) {
1234                 if (tc->name->blength == c->name->blength) {
1235                         
1236                         /* compare classnames */
1237                         for (i = 0; i < c->name->blength; i++)
1238                                 if (tc->name->text[i] != c->name->text[i])
1239                                         goto nomatch;
1240
1241                         /* class found in hashtable */
1242                         if (!pc) {
1243                                 class_hash.ptr[slot] = tc->hashlink;
1244
1245                         } else {
1246                                 pc->hashlink = tc->hashlink;
1247                         }
1248
1249                         class_free(tc);
1250
1251                         return true;
1252                 }
1253                         
1254         nomatch:
1255                 pc = tc;
1256                 tc = tc->hashlink;
1257         }
1258
1259         /* class not found */
1260         return false;
1261 }
1262
1263
1264 /***************** Function: class_array_of ***********************************
1265
1266     Returns an array class with the given component class.
1267     The array class is dynamically created if neccessary.
1268
1269 *******************************************************************************/
1270
1271 classinfo *class_array_of(classinfo *component)
1272 {
1273     int namelen;
1274     char *namebuf;
1275         classinfo *c;
1276
1277     /* Assemble the array class name */
1278     namelen = component->name->blength;
1279     
1280     if (component->name->text[0] == '[') {
1281         /* the component is itself an array */
1282         namebuf = DMNEW(char, namelen + 1);
1283         namebuf[0] = '[';
1284         memcpy(namebuf + 1, component->name->text, namelen);
1285         namelen++;
1286
1287     } else {
1288         /* the component is a non-array class */
1289         namebuf = DMNEW(char, namelen + 3);
1290         namebuf[0] = '[';
1291         namebuf[1] = 'L';
1292         memcpy(namebuf + 2, component->name->text, namelen);
1293         namebuf[2 + namelen] = ';';
1294         namelen += 3;
1295     }
1296
1297         /* load this class ;-) and link it */
1298         c = class_new(utf_new(namebuf, namelen));
1299         c->loaded = 1;
1300         class_link(c);
1301
1302     return c;
1303 }
1304
1305 /*************** Function: class_multiarray_of ********************************
1306
1307     Returns an array class with the given dimension and element class.
1308     The array class is dynamically created if neccessary.
1309
1310 *******************************************************************************/
1311
1312 classinfo *class_multiarray_of(int dim, classinfo *element)
1313 {
1314     int namelen;
1315     char *namebuf;
1316
1317         if (dim < 1)
1318                 panic("Invalid array dimension requested");
1319
1320     /* Assemble the array class name */
1321     namelen = element->name->blength;
1322     
1323     if (element->name->text[0] == '[') {
1324         /* the element is itself an array */
1325         namebuf = DMNEW(char, namelen + dim);
1326         memcpy(namebuf + dim, element->name->text, namelen);
1327         namelen += dim;
1328     }
1329     else {
1330         /* the element is a non-array class */
1331         namebuf = DMNEW(char, namelen + 2 + dim);
1332         namebuf[dim] = 'L';
1333         memcpy(namebuf + dim + 1, element->name->text, namelen);
1334         namelen += (2 + dim);
1335         namebuf[namelen - 1] = ';';
1336     }
1337         memset(namebuf, '[', dim);
1338
1339     return class_new(utf_new(namebuf, namelen));
1340 }
1341
1342 /************************** function: utf_strlen ******************************
1343
1344     determine number of unicode characters in the utf string
1345
1346 *******************************************************************************/
1347
1348 u4 utf_strlen(utf *u) 
1349 {
1350     char *endpos;                   /* points behind utf string       */
1351     char *utf_ptr;                  /* current position in utf text   */
1352     u4 len = 0;                     /* number of unicode characters   */
1353
1354         if (!u) {
1355                 *exceptionptr = new_nullpointerexception();
1356                 return 0;
1357         }
1358
1359         endpos = utf_end(u);
1360         utf_ptr = u->text;
1361
1362     while (utf_ptr < endpos) {
1363                 len++;
1364                 /* next unicode character */
1365                 utf_nextu2(&utf_ptr);
1366     }
1367
1368     if (utf_ptr != endpos)
1369         /* string ended abruptly */
1370                 panic("illegal utf string"); 
1371
1372     return len;
1373 }
1374
1375
1376 /*
1377  * These are local overrides for various environment variables in Emacs.
1378  * Please do not remove this and leave it at the end of the file, where
1379  * Emacs will automagically detect them.
1380  * ---------------------------------------------------------------------
1381  * Local variables:
1382  * mode: c
1383  * indent-tabs-mode: t
1384  * c-basic-offset: 4
1385  * tab-width: 4
1386  * End:
1387  */