X-Git-Url: http://wien.tomnetworks.com/gitweb/?a=blobdiff_plain;f=tables.c;h=9cbd95e9ae8a8ea4091834cbc91f074b72abf05b;hb=8614dad1cecffc80317d6c41be07611d6ff1f1ae;hp=8797971aacf2849963412267cadbc398ffa2d337;hpb=d63183a34c979f60d72fb194dc58996635ee38a1;p=cacao.git diff --git a/tables.c b/tables.c index 8797971aa..9cbd95e9a 100644 --- a/tables.c +++ b/tables.c @@ -1,46 +1,74 @@ -/* -*- mode: c; tab-width: 4; c-basic-offset: 4 -*- */ -/****************************** tables.c *************************************** +/* tables.c - - Copyright (c) 1997 A. Krall, R. Grafl, M. Gschwind, M. Probst + Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003 + R. Grafl, A. Krall, C. Kruegel, C. Oates, R. Obermaisser, + M. Probst, S. Ring, E. Steiner, C. Thalinger, D. Thuernbeck, + P. Tomsich, J. Wenninger - See file COPYRIGHT for information on usage and disclaimer of warranties + This file is part of CACAO. - Enth"alt Supportfunktionen f"ur: - - Lesen von JavaClass-Files - - unicode-Symbole - - den Heap - - zus"atzliche Support-Funktionen + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2, or (at + your option) any later version. - Authors: Reinhard Grafl EMAIL: cacao@complang.tuwien.ac.at - Changes: Mark Probst EMAIL: cacao@complang.tuwien.ac.at - Andreas Krall EMAIL: cacao@complang.tuwien.ac.at - - Last Change: 1998/03/24 + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. -*******************************************************************************/ + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + 02111-1307, USA. + + Contact: cacao@complang.tuwien.ac.at + + Authors: Reinhard Grafl + + Changes: Mark Probst + Andreas Krall + + Contains support functions for: + - Reading of Java class files + - Unicode symbols + - the heap + - additional support functions + $Id: tables.c 1482 2004-11-11 14:39:13Z twisti $ + +*/ + +#include "global.h" + +#include +#include #include +#include #include #include -#include "global.h" +#include "builtin.h" +#include "exceptions.h" +#include "types.h" +#include "native.h" +#include "options.h" #include "tables.h" +#include "loader.h" #include "asmpart.h" -#include "callargs.h" - -#include "threads/thread.h" /* schani */ +#include "statistics.h" +#include "threads/thread.h" #include "threads/locks.h" +#include "toolbox/logging.h" +#include "toolbox/memory.h" -bool runverbose = false; - -/* statistics */ -int count_utf_len = 0; /* size of utf hash */ -int count_utf_new = 0; /* calls of utf_new */ -int count_utf_new_found = 0; /* calls of utf_new with fast return */ hashtable utf_hash; /* hashtable for utf8-symbols */ hashtable string_hash; /* hashtable for javastrings */ hashtable class_hash; /* hashtable for classes */ +list unlinkedclasses; /* this is only used for eager class loading */ + + /****************************************************************************** *********************** hashtable functions ********************************** ******************************************************************************/ @@ -64,12 +92,13 @@ void init_hashtable(hashtable *hash, u4 size) hash->entries = 0; hash->size = size; - hash->ptr = MNEW (void*, size); + hash->ptr = MNEW(void*, size); /* clear table */ - for (i=0; iptr[i] = NULL; + for (i = 0; i < size; i++) hash->ptr[i] = NULL; } + /*********************** function: tables_init ***************************** creates hashtables for symboltables @@ -77,95 +106,177 @@ void init_hashtable(hashtable *hash, u4 size) *****************************************************************************/ -void tables_init () +void tables_init() { init_hashtable(&utf_hash, UTF_HASHSTART); /* hashtable for utf8-symbols */ init_hashtable(&string_hash, HASHSTART); /* hashtable for javastrings */ init_hashtable(&class_hash, HASHSTART); /* hashtable for classes */ - -#ifdef STATISTICS - count_utf_len += sizeof(utf*) * utf_hash.size; -#endif +/* if (opt_eager) */ +/* list_init(&unlinkedclasses, OFFSET(classinfo, listnode)); */ + +#if defined(STATISTICS) + if (opt_stat) + count_utf_len += sizeof(utf*) * utf_hash.size; +#endif } + /********************** function: tables_close ****************************** free memory for hashtables *****************************************************************************/ -void tables_close (stringdeleter del) +void tables_close() { - utf *u; + utf *u = NULL; literalstring *s; u4 i; /* dispose utf symbols */ - for (i=0; ihashlink; - MFREE (u->text, u1, u->blength); - FREE (u, utf); + MFREE(u->text, u1, u->blength); + FREE(u, utf); u = nextu; - } - } + } + } /* dispose javastrings */ - for (i=0; ihashlink; - del(s->string); + literalstring_free(s->string); FREE(s, literalstring); s = nexts; - } - } + } + } /* dispose hashtable structures */ - MFREE (utf_hash.ptr, void*, utf_hash.size); - MFREE (string_hash.ptr, void*, string_hash.size); - MFREE (class_hash.ptr, void*, class_hash.size); + MFREE(utf_hash.ptr, void*, utf_hash.size); + MFREE(string_hash.ptr, void*, string_hash.size); + MFREE(class_hash.ptr, void*, class_hash.size); } + /********************* function: utf_display ********************************* write utf symbol to stdout (debugging purposes) -******************************************************************************/ +*******************************************************************************/ -void utf_display (utf *u) +void utf_display(utf *u) { - char *endpos = utf_end(u); /* points behind utf string */ - char *utf_ptr = u->text; /* current position in utf text */ + char *endpos; /* points behind utf string */ + char *utf_ptr; /* current position in utf text */ - while (utf_ptrtext; + + while (utf_ptr < endpos) { /* read next unicode character */ - u2 c = utf_nextu2(&utf_ptr); - if (c>=32 && c<=127) printf ("%c",c); - else printf ("?"); + u2 c = utf_nextu2(&utf_ptr); + if (c >= 32 && c <= 127) printf("%c", c); + else printf("?"); } - fflush (stdout); + fflush(stdout); } -/************************ function: utf_sprint ******************************* + +/* utf_display_classname ******************************************************* + + write utf symbol to stdout (debugging purposes) + +*******************************************************************************/ + +void utf_display_classname(utf *u) +{ + char *endpos; /* points behind utf string */ + char *utf_ptr; /* current position in utf text */ + + if (!u) { + printf("NULL"); + fflush(stdout); + return; + } + + endpos = utf_end(u); + utf_ptr = u->text; + + while (utf_ptr < endpos) { + /* read next unicode character */ + u2 c = utf_nextu2(&utf_ptr); + if (c == '/') c = '.'; + if (c >= 32 && c <= 127) printf("%c", c); + else printf("?"); + } + + fflush(stdout); +} + + +/************************* function: log_utf ********************************* + + log utf symbol + +******************************************************************************/ + +void log_utf(utf *u) +{ + char buf[MAXLOGTEXT]; + utf_sprint(buf, u); + dolog("%s", buf); +} + + +/********************** function: log_plain_utf ****************************** + + log utf symbol (without printing "LOG: " and newline) + +******************************************************************************/ + +void log_plain_utf(utf *u) +{ + char buf[MAXLOGTEXT]; + utf_sprint(buf, u); + dolog_plain("%s", buf); +} + + +/* utf_sprint ****************************************************************** - write utf symbol into c-string (debugging purposes) + write utf symbol into c-string (debugging purposes) -******************************************************************************/ +*******************************************************************************/ -void utf_sprint (char *buffer, utf *u) +void utf_sprint(char *buffer, utf *u) { - char *endpos = utf_end(u); /* points behind utf string */ - char *utf_ptr = u->text; /* current position in utf text */ - u2 pos = 0; /* position in c-string */ + char *endpos; /* points behind utf string */ + char *utf_ptr; /* current position in utf text */ + u2 pos = 0; /* position in c-string */ + + if (!u) { + memcpy(buffer, "NULL", 5); /* 4 chars + terminating \0 */ + return; + } + + endpos = utf_end(u); + utf_ptr = u->text; - while (utf_ptrtext; + + while (utf_ptr < endpos) { + /* copy next unicode character */ + u2 c = utf_nextu2(&utf_ptr); + if (c == '/') c = '.'; + buffer[pos++] = c; + } + + /* terminate string */ + buffer[pos] = '\0'; +} + + /********************* Funktion: utf_fprint ********************************** write utf symbol into file -******************************************************************************/ +******************************************************************************/ -void utf_fprint (FILE *file, utf *u) +void utf_fprint(FILE *file, utf *u) { char *endpos = utf_end(u); /* points behind utf string */ char *utf_ptr = u->text; /* current position in utf text */ - while (utf_ptr= 32 && c <= 127) fprintf(file, "%c", c); + else fprintf(file, "?"); + } +} + + +/********************* Funktion: utf_fprint ********************************** + + write utf symbol into file + +******************************************************************************/ + +void utf_fprint_classname(FILE *file, utf *u) +{ + char *endpos = utf_end(u); /* points behind utf string */ + char *utf_ptr = u->text; /* current position in utf text */ + + if (!u) + return; + + while (utf_ptr < endpos) { + /* read next unicode character */ + u2 c = utf_nextu2(&utf_ptr); + if (c == '/') c = '.'; + + if (c >= 32 && c <= 127) fprintf(file, "%c", c); + else fprintf(file, "?"); + } +} /****************** internal function: utf_hashkey *************************** @@ -202,7 +377,7 @@ void utf_fprint (FILE *file, utf *u) #define nbs(val) ((u4) *(++text) << val) /* get next byte, left shift by val */ #define fbs(val) ((u4) *( text) << val) /* get first byte, left shift by val */ -static u4 utf_hashkey (char *text, u4 length) +static u4 utf_hashkey(char *text, u4 length) { char *start_pos = text; /* pointer to utf text */ u4 a; @@ -221,72 +396,96 @@ static u4 utf_hashkey (char *text, u4 length) case 7: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(5) ^ nbs(6); case 8: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(5) ^ nbs(6) ^ nbs(7); - case 9: a = fbs(0) ^ nbs(1) ^ nbs(2); - text++; - return a ^ nbs(4) ^ nbs(5) ^ nbs(6) ^ nbs(7) ^ nbs(8); - - case 10: a = fbs(0); - text++; - a^= nbs(2) ^ nbs(3) ^ nbs(4); - text++; - return a ^ nbs(6) ^ nbs(7) ^ nbs(8) ^ nbs(9); - - case 11: a = fbs(0); - text++; - a^= nbs(2) ^ nbs(3) ^ nbs(4); - text++; - return a ^ nbs(6) ^ nbs(7) ^ nbs(8) ^ nbs(9) ^ nbs(10); - - case 12: a = fbs(0); - text+=2; - a^= nbs(2) ^ nbs(3); - text+=1; - a^= nbs(5) ^ nbs(6) ^ nbs(7); - text+=1; - return a ^ nbs(9) ^ nbs(10); - - case 13: a = fbs(0) ^ nbs(1); - text+=1; - a^= nbs(3) ^ nbs(4); - text+=2; - a^= nbs(7) ^ nbs(8); - text+=2; - return a ^ nbs(9) ^ nbs(10); - - case 14: a = fbs(0); - text+=2; - a^= nbs(3) ^ nbs(4); - text+=2; - a^= nbs(7) ^ nbs(8); - text+=2; - return a ^ nbs(9) ^ nbs(10) ^ nbs(11); - - case 15: a = fbs(0); - text+=2; - a^= nbs(3) ^ nbs(4); - text+=2; - a^= nbs(7) ^ nbs(8); - text+=2; - return a ^ nbs(9) ^ nbs(10) ^ nbs(11); + case 9: + a = fbs(0); + a ^= nbs(1); + a ^= nbs(2); + text++; + return a ^ nbs(4) ^ nbs(5) ^ nbs(6) ^ nbs(7) ^ nbs(8); + + case 10: + a = fbs(0); + text++; + a ^= nbs(2); + a ^= nbs(3); + a ^= nbs(4); + text++; + return a ^ nbs(6) ^ nbs(7) ^ nbs(8) ^ nbs(9); + + case 11: + a = fbs(0); + text++; + a ^= nbs(2); + a ^= nbs(3); + a ^= nbs(4); + text++; + return a ^ nbs(6) ^ nbs(7) ^ nbs(8) ^ nbs(9) ^ nbs(10); + + case 12: + a = fbs(0); + text += 2; + a ^= nbs(2); + a ^= nbs(3); + text++; + a ^= nbs(5); + a ^= nbs(6); + a ^= nbs(7); + text++; + return a ^ nbs(9) ^ nbs(10); + + case 13: + a = fbs(0); + a ^= nbs(1); + text++; + a ^= nbs(3); + a ^= nbs(4); + text += 2; + a ^= nbs(7); + a ^= nbs(8); + text += 2; + return a ^ nbs(9) ^ nbs(10); + + case 14: + a = fbs(0); + text += 2; + a ^= nbs(3); + a ^= nbs(4); + text += 2; + a ^= nbs(7); + a ^= nbs(8); + text += 2; + return a ^ nbs(9) ^ nbs(10) ^ nbs(11); + + case 15: + a = fbs(0); + text += 2; + a ^= nbs(3); + a ^= nbs(4); + text += 2; + a ^= nbs(7); + a ^= nbs(8); + text += 2; + return a ^ nbs(9) ^ nbs(10) ^ nbs(11); default: /* 3 characters from beginning */ - a = fbs(0); - text+=2; - a^= nbs(3) ^ nbs(4); + a = fbs(0); + text += 2; + a ^= nbs(3); + a ^= nbs(4); - /* 2 characters from middle */ - text = start_pos + (length / 2); - a^= fbs(5); - text+=2; - a^= nbs(6); + /* 2 characters from middle */ + text = start_pos + (length / 2); + a ^= fbs(5); + text += 2; + a ^= nbs(6); - /* 3 characters from end */ - text = start_pos + length - 4; + /* 3 characters from end */ + text = start_pos + length - 4; - a^= fbs(7); - text+=1; + a ^= fbs(7); + text++; - return a ^ nbs(10) ^ nbs(11); + return a ^ nbs(10) ^ nbs(11); } } @@ -297,11 +496,12 @@ static u4 utf_hashkey (char *text, u4 length) ******************************************************************************/ -u4 unicode_hashkey (u2 *text, u2 len) +u4 unicode_hashkey(u2 *text, u2 len) { - utf_hashkey((char*) text, len); + return utf_hashkey((char*) text, len); } + /************************ function: utf_new ********************************** Creates a new utf-symbol, the text of the symbol is passed as a @@ -315,18 +515,19 @@ u4 unicode_hashkey (u2 *text, u2 len) ******************************************************************************/ -utf *utf_new (char *text, u2 length) +utf *utf_new_intern(char *text, u2 length) { u4 key; /* hashkey computed from utf-text */ u4 slot; /* slot in hashtable */ utf *u; /* hashtable element */ u2 i; - + #ifdef STATISTICS - count_utf_new++; + if (opt_stat) + count_utf_new++; #endif - key = utf_hashkey (text, length); + key = utf_hashkey(text, length); slot = key & (utf_hash.size-1); u = utf_hash.ptr[slot]; @@ -335,74 +536,104 @@ utf *utf_new (char *text, u2 length) if (u->blength == length) { /* compare text of hashtable elements */ - for (i=0; itext[i]) goto nomatch; #ifdef STATISTICS - count_utf_new_found++; + if (opt_stat) + count_utf_new_found++; #endif - /* symbol found in hashtable */ +/* log_text("symbol found in hash table");*/ + /* symbol found in hashtable */ +/* utf_display(u); + { + utf blup; + blup.blength=length; + blup.text=text; + utf_display(&blup); + }*/ return u; } - nomatch: + nomatch: u = u->hashlink; /* next element in external chain */ } #ifdef STATISTICS - count_utf_len += sizeof(utf) + length; + if (opt_stat) + count_utf_len += sizeof(utf) + length; #endif /* location in hashtable found, create new utf element */ - u = NEW (utf); - u->blength = length; /* length in bytes of utfstring */ - u->hashlink = utf_hash.ptr[slot]; /* link in external hashchain */ - u->text = mem_alloc(length); /* allocate memory for utf-text */ - memcpy(u->text,text,length); /* copy utf-text */ - utf_hash.ptr[slot] = u; /* insert symbol into table */ + u = NEW(utf); + u->blength = length; /* length in bytes of utfstring */ + u->hashlink = utf_hash.ptr[slot]; /* link in external hashchain */ + u->text = mem_alloc(length + 1);/* allocate memory for utf-text */ + memcpy(u->text, text, length); /* copy utf-text */ + u->text[length] = '\0'; + utf_hash.ptr[slot] = u; /* insert symbol into table */ - utf_hash.entries++; /* update number of entries */ + utf_hash.entries++; /* update number of entries */ - if ( utf_hash.entries > (utf_hash.size*2)) { + if (utf_hash.entries > (utf_hash.size * 2)) { /* reorganization of hashtable, average length of the external chains is approx. 2 */ - u4 i; - utf *u; - hashtable newhash; /* the new hashtable */ + u4 i; + utf *u; + hashtable newhash; /* the new hashtable */ - /* create new hashtable, double the size */ - init_hashtable(&newhash, utf_hash.size*2); - newhash.entries=utf_hash.entries; + /* create new hashtable, double the size */ + init_hashtable(&newhash, utf_hash.size * 2); + newhash.entries = utf_hash.entries; #ifdef STATISTICS - count_utf_len += sizeof(utf*) * utf_hash.size; + if (opt_stat) + count_utf_len += sizeof(utf*) * utf_hash.size; #endif - /* transfer elements to new hashtable */ - for (i=0; i hashlink; - u4 slot = (utf_hashkey(u->text,u->blength)) & (newhash.size-1); + /* transfer elements to new hashtable */ + for (i = 0; i < utf_hash.size; i++) { + u = (utf *) utf_hash.ptr[i]; + while (u) { + utf *nextu = u->hashlink; + u4 slot = utf_hashkey(u->text, u->blength) & (newhash.size - 1); - u->hashlink = (utf*) newhash.ptr[slot]; - newhash.ptr[slot] = u; + u->hashlink = (utf *) newhash.ptr[slot]; + newhash.ptr[slot] = u; - /* follow link in external hash chain */ - u = nextu; + /* follow link in external hash chain */ + u = nextu; } } - /* dispose old table */ - MFREE (utf_hash.ptr, void*, utf_hash.size); - utf_hash = newhash; + /* dispose old table */ + MFREE(utf_hash.ptr, void*, utf_hash.size); + utf_hash = newhash; } - + return u; } +utf *utf_new(char *text, u2 length) +{ + utf *r; + +#if defined(USE_THREADS) && defined(NATIVE_THREADS) + tables_lock(); +#endif + + r = utf_new_intern(text, length); + +#if defined(USE_THREADS) && defined(NATIVE_THREADS) + tables_unlock(); +#endif + + return r; +} + + /********************* function: utf_new_char ******************************** creates a new utf symbol, the text for this symbol is passed @@ -410,11 +641,39 @@ utf *utf_new (char *text, u2 length) ******************************************************************************/ -utf *utf_new_char (char *text) +utf *utf_new_char(char *text) { return utf_new(text, strlen(text)); } + +/********************* function: utf_new_char ******************************** + + creates a new utf symbol, the text for this symbol is passed + as a c-string ( = char* ) + "." characters are going to be replaced by "/". since the above function is + used often, this is a separte function, instead of an if + +******************************************************************************/ + +utf *utf_new_char_classname(char *text) +{ + if (strchr(text, '.')) { + char *txt = strdup(text); + char *end = txt + strlen(txt); + char *c; + utf *tmpRes; + for (c = txt; c < end; c++) + if (*c == '.') *c = '/'; + tmpRes = utf_new(txt, strlen(txt)); + free(txt); + return tmpRes; + + } else + return utf_new(text, strlen(text)); +} + + /************************** Funktion: utf_show ****************************** writes the utf symbols in the utfhash to stdout and @@ -424,7 +683,7 @@ utf *utf_new_char (char *text) *****************************************************************************/ -void utf_show () +void utf_show() { #define CHAIN_LIMIT 20 /* limit for seperated enumeration */ @@ -447,14 +706,14 @@ void utf_show () utf_display (u); printf ("' "); u = u->hashlink; - } + } printf ("\n"); - } - } + + } printf ("UTF-HASH: %d slots for %d entries\n", - (int) utf_hash.size, (int) utf_hash.entries ); + (int) utf_hash.size, (int) utf_hash.entries ); if (utf_hash.entries == 0) @@ -475,7 +734,7 @@ void utf_show () while (u) { u = u->hashlink; chain_length++; - } + } /* update sum of all chainlengths */ sum_chainlength+=chain_length; @@ -492,7 +751,7 @@ void utf_show () /* update number of hashchains of current length */ chain_count[chain_length]++; - } + } /* display results */ for (i=1;itext; /* current position in utf text */ + char logtext[MAXLOGTEXT]; - if (descriptor->blength < 1) panic ("Type-Descriptor is empty string"); + if (descriptor->blength < 1) panic("Type-Descriptor is empty string"); switch (*utf_ptr++) { case 'B': @@ -538,21 +798,22 @@ u2 desc_to_type (utf *descriptor) case '[': return TYPE_ADDRESS; } - sprintf (logtext, "Invalid Type-Descriptor: "); - utf_sprint (logtext+strlen(logtext), descriptor); - error (); + sprintf(logtext, "Invalid Type-Descriptor: "); + utf_sprint(logtext+strlen(logtext), descriptor); + error("%s",logtext); + return 0; } -/********************** Funktion: desc_typesize ******************************* +/********************** Function: desc_typesize ******************************* - Berechnet die L"ange (in Byte) eines Datenelements gegebenen Typs, - der durch den Typdescriptor gegeben ist. + Calculates the lenght in bytes needed for a data element of the type given + by its type descriptor. ******************************************************************************/ -u2 desc_typesize (utf *descriptor) +u2 desc_typesize(utf *descriptor) { switch (desc_to_type(descriptor)) { case TYPE_INT: return 4; @@ -575,48 +836,154 @@ u2 desc_typesize (utf *descriptor) u2 utf_nextu2(char **utf_ptr) { /* uncompressed unicode character */ - u2 unicode_char; + u2 unicode_char = 0; /* current position in utf text */ - unsigned char *utf = (unsigned char *) (*utf_ptr); + unsigned char *utf = (unsigned char *) (*utf_ptr); /* bytes representing the unicode character */ unsigned char ch1, ch2, ch3; /* number of bytes used to represent the unicode character */ - int len; + int len = 0; switch ((ch1 = utf[0]) >> 4) { - default: /* 1 byte */ - (*utf_ptr)++; - return ch1; - case 0xC: - case 0xD: /* 2 bytes */ - if (((ch2 = utf[1]) & 0xC0) == 0x80) { - unsigned char high = ch1 & 0x1F; - unsigned char low = ch2 & 0x3F; - unicode_char = (high << 6) + low; - len = 2; - } - break; - - case 0xE: /* 2 or 3 bytes */ - if (((ch2 = utf[1]) & 0xC0) == 0x80) { - if (((ch3 = utf[2]) & 0xC0) == 0x80) { - unsigned char low = ch3 & 0x3f; - unsigned char mid = ch2 & 0x3f; - unsigned char high = ch1 & 0x0f; - unicode_char = (((high << 6) + mid) << 6) + low; - len = 3; - } else - len = 2; - } - break; + default: /* 1 byte */ + (*utf_ptr)++; + return (u2) ch1; + case 0xC: + case 0xD: /* 2 bytes */ + if (((ch2 = utf[1]) & 0xC0) == 0x80) { + unsigned char high = ch1 & 0x1F; + unsigned char low = ch2 & 0x3F; + unicode_char = (high << 6) + low; + len = 2; + } + break; + + case 0xE: /* 2 or 3 bytes */ + if (((ch2 = utf[1]) & 0xC0) == 0x80) { + if (((ch3 = utf[2]) & 0xC0) == 0x80) { + unsigned char low = ch3 & 0x3f; + unsigned char mid = ch2 & 0x3f; + unsigned char high = ch1 & 0x0f; + unicode_char = (((high << 6) + mid) << 6) + low; + len = 3; + } else + len = 2; + } + break; } /* update position in utf-text */ *utf_ptr = (char *) (utf + len); return unicode_char; } + + +/********************* function: is_valid_utf ******************************** + + return true if the given string is a valid UTF-8 string + + utf_ptr...points to first character + end_pos...points after last character + +******************************************************************************/ + +static unsigned long min_codepoint[6] = {0,1L<<7,1L<<11,1L<<16,1L<<21,1L<<26}; + +bool +is_valid_utf(char *utf_ptr,char *end_pos) +{ + int bytes; + int len,i; + char c; + unsigned long v; + + if (end_pos < utf_ptr) return false; + bytes = end_pos - utf_ptr; + while (bytes--) { + c = *utf_ptr++; + /*dolog("%c %02x",c,c);*/ + if (!c) return false; /* 0x00 is not allowed */ + if ((c & 0x80) == 0) continue; /* ASCII */ + + if ((c & 0xe0) == 0xc0) len = 1; /* 110x xxxx */ + else if ((c & 0xf0) == 0xe0) len = 2; /* 1110 xxxx */ + else if ((c & 0xf8) == 0xf0) len = 3; /* 1111 0xxx */ + else if ((c & 0xfc) == 0xf8) len = 4; /* 1111 10xx */ + else if ((c & 0xfe) == 0xfc) len = 5; /* 1111 110x */ + else return false; /* invalid leading byte */ + + if (len > 2) return false; /* Java limitation */ + + v = (unsigned long)c & (0x3f >> len); + + if ((bytes -= len) < 0) return false; /* missing bytes */ + + for (i = len; i--; ) { + c = *utf_ptr++; + /*dolog(" %c %02x",c,c);*/ + if ((c & 0xc0) != 0x80) /* 10xx xxxx */ + return false; + v = (v<<6) | (c & 0x3f); + } + + /* dolog("v=%d",v);*/ + + if (v == 0) { + if (len != 1) return false; /* Java special */ + } + else { + /* Sun Java seems to allow overlong UTF-8 encodings */ + + if (v < min_codepoint[len]) { /* overlong UTF-8 */ + if (!opt_liberalutf) + fprintf(stderr,"WARNING: Overlong UTF-8 sequence found.\n"); + /* XXX change this to panic? */ + } + } + + /* surrogates in UTF-8 seem to be allowed in Java classfiles */ + /* if (v >= 0xd800 && v <= 0xdfff) return false; */ /* surrogates */ + + /* even these seem to be allowed */ + /* if (v == 0xfffe || v == 0xffff) return false; */ /* invalid codepoints */ + } + + return true; +} -/******************** Funktion: class_new ************************************** +/********************* function: is_valid_name ******************************* + + return true if the given string may be used as a class/field/method name. + (Currently this only disallows empty strings and control characters.) + + NOTE: The string is assumed to have passed is_valid_utf! + + utf_ptr...points to first character + end_pos...points after last character + +******************************************************************************/ + +bool +is_valid_name(char *utf_ptr,char *end_pos) +{ + if (end_pos <= utf_ptr) return false; /* disallow empty names */ + + while (utf_ptr < end_pos) { + unsigned char c = *utf_ptr++; + + if (c < 0x20) return false; /* disallow control characters */ + if (c == 0xc0 && (unsigned char)*utf_ptr == 0x80) return false; /* disallow zero */ + } + return true; +} + +bool +is_valid_name_utf(utf *u) +{ + return is_valid_name(u->text,utf_end(u)); +} + +/******************** Function: class_new ************************************** searches for the class with the specified name in the classes hashtable, if there is no such class a new classinfo structure is created and inserted @@ -624,64 +991,79 @@ u2 utf_nextu2(char **utf_ptr) *******************************************************************************/ -classinfo *class_new (utf *u) +classinfo *class_new_intern(utf *classname) { - classinfo *c; /* hashtable element */ - u4 key; /* hashkey computed from classname */ + classinfo *c; /* hashtable element */ + u4 key; /* hashkey computed from classname */ u4 slot; /* slot in hashtable */ u2 i; - key = utf_hashkey (u->text, u->blength); - slot = key & (class_hash.size-1); + key = utf_hashkey(classname->text, classname->blength); + slot = key & (class_hash.size - 1); c = class_hash.ptr[slot]; /* search external hash chain for the class */ while (c) { - if (c->name->blength == u->blength) { - for (i=0; iblength; i++) - if (u->text[i] != c->name->text[i]) goto nomatch; + if (c->name->blength == classname->blength) { + for (i = 0; i < classname->blength; i++) + if (classname->text[i] != c->name->text[i]) goto nomatch; - /* class found in hashtable */ - return c; - } + /* class found in hashtable */ + return c; + } - nomatch: + nomatch: c = c->hashlink; /* next element in external chain */ - } + } /* location in hashtable found, create new classinfo structure */ -#ifdef STATISTICS - count_class_infos += sizeof(classinfo); +#if defined(STATISTICS) + if (opt_stat) + count_class_infos += sizeof(classinfo); #endif - c = NEW (classinfo); - c -> flags = 0; - c -> name = u; - c -> cpcount = 0; - c -> cptags = NULL; - c -> cpinfos = NULL; - c -> super = NULL; - c -> sub = NULL; - c -> nextsub = NULL; - c -> interfacescount = 0; - c -> interfaces = NULL; - c -> fieldscount = 0; - c -> fields = NULL; - c -> methodscount = 0; - c -> methods = NULL; - c -> linked = false; - c -> index = 0; - c -> instancesize = 0; - c -> header.vftbl = NULL; - c -> innerclasscount = 0; - c -> innerclass = NULL; - c -> vftbl = NULL; - c -> initialized = false; - - /* prepare loading of the class */ - list_addlast (&unloadedclasses, c); + if (initverbose) { + char logtext[MAXLOGTEXT]; + sprintf(logtext, "Creating class: "); + utf_sprint_classname(logtext + strlen(logtext), classname); + log_text(logtext); + } + c = GCNEW(classinfo, 1); /*JOWENN: NEW*/ + /*c=NEW(classinfo);*/ + c->vmClass = 0; + c->flags = 0; + c->name = classname; + c->packagename = NULL; + c->cpcount = 0; + c->cptags = NULL; + c->cpinfos = NULL; + c->super = NULL; + c->sub = NULL; + c->nextsub = NULL; + c->interfacescount = 0; + c->interfaces = NULL; + c->fieldscount = 0; + c->fields = NULL; + c->methodscount = 0; + c->methods = NULL; + c->linked = false; + c->loaded = false; + c->index = 0; + c->instancesize = 0; + c->header.vftbl = NULL; + c->innerclasscount = 0; + c->innerclass = NULL; + c->vftbl = NULL; + c->initialized = false; + c->initializing = false; + c->classvftbl = false; + c->classUsed = 0; + c->impldBy = NULL; + c->classloader = NULL; + c->sourcefile = NULL; + /* insert class into the hashtable */ c->hashlink = class_hash.ptr[slot]; class_hash.ptr[slot] = c; @@ -689,127 +1071,345 @@ classinfo *class_new (utf *u) /* update number of hashtable-entries */ class_hash.entries++; - if ( class_hash.entries > (class_hash.size*2)) { + if (class_hash.entries > (class_hash.size * 2)) { - /* reorganization of hashtable, average length of - the external chains is approx. 2 */ + /* reorganization of hashtable, average length of + the external chains is approx. 2 */ - u4 i; - classinfo *c; - hashtable newhash; /* the new hashtable */ + u4 i; + classinfo *c; + hashtable newhash; /* the new hashtable */ - /* create new hashtable, double the size */ - init_hashtable(&newhash, class_hash.size*2); - newhash.entries = class_hash.entries; + /* create new hashtable, double the size */ + init_hashtable(&newhash, class_hash.size * 2); + newhash.entries = class_hash.entries; - /* transfer elements to new hashtable */ - for (i=0; i hashlink; - u4 slot = (utf_hashkey(c->name->text,c->name->blength)) & (newhash.size-1); + /* transfer elements to new hashtable */ + for (i = 0; i < class_hash.size; i++) { + c = (classinfo *) class_hash.ptr[i]; + while (c) { + classinfo *nextc = c->hashlink; + u4 slot = (utf_hashkey(c->name->text, c->name->blength)) & (newhash.size - 1); - c->hashlink = newhash.ptr[slot]; - newhash.ptr[slot] = c; + c->hashlink = newhash.ptr[slot]; + newhash.ptr[slot] = c; - c = nextc; + c = nextc; } } - /* dispose old table */ - MFREE (class_hash.ptr, void*, class_hash.size); - class_hash = newhash; + /* dispose old table */ + MFREE(class_hash.ptr, void*, class_hash.size); + class_hash = newhash; } - + + /* Array classes need further initialization. */ + if (c->name->text[0] == '[') { + /* Array classes are not loaded from classfiles. */ + c->loaded = true; + class_new_array(c); + c->packagename = array_packagename; + + } else { + /* Find the package name */ + /* Classes in the unnamed package keep packagename == NULL. */ + char *p = utf_end(c->name) - 1; + char *start = c->name->text; + for (;p > start; --p) { + if (*p == '.') { + c->packagename = utf_new(start, p - start); + break; + } + } + } +#if defined(USE_THREADS) && defined(NATIVE_THREADS) + initObjectLock(&c->header); +#endif + return c; } -/******************** Funktion: class_get ************************************** + +classinfo *class_new(utf *classname) +{ + classinfo *c; + +#if defined(USE_THREADS) && defined(NATIVE_THREADS) + tables_lock(); +#endif + + c = class_new_intern(classname); + + /* we support eager class loading and linking on demand */ + + if (opt_eager) { + classinfo *tc; + classinfo *tmp; + + list_init(&unlinkedclasses, OFFSET(classinfo, listnode)); + + if (!c->loaded) { + if (!class_load(c)) { +#if defined(USE_THREADS) && defined(NATIVE_THREADS) + tables_unlock(); +#endif + return c; + } + } + + /* link all referenced classes */ + + tc = list_first(&unlinkedclasses); + + while (tc) { + /* skip the current loaded/linked class */ + if (tc != c) { + if (!class_link(tc)) { +#if defined(USE_THREADS) && defined(NATIVE_THREADS) + tables_unlock(); +#endif + return c; + } + } + + /* we need a tmp variable here, because list_remove sets prev and + next to NULL */ + tmp = list_next(&unlinkedclasses, tc); + list_remove(&unlinkedclasses, tc); + tc = tmp; + } + + if (!c->linked) { + if (!class_link(c)) { +#if defined(USE_THREADS) && defined(NATIVE_THREADS) + tables_unlock(); +#endif + return c; + } + } + } + +#if defined(USE_THREADS) && defined(NATIVE_THREADS) + tables_unlock(); +#endif + + return c; +} + + +/******************** Function: class_get ************************************** searches for the class with the specified name in the classes hashtable if there is no such class NULL is returned *******************************************************************************/ -classinfo *class_get (utf *u) +classinfo *class_get(utf *classname) { classinfo *c; /* hashtable element */ u4 key; /* hashkey computed from classname */ u4 slot; /* slot in hashtable */ u2 i; - key = utf_hashkey (u->text, u->blength); + key = utf_hashkey(classname->text, classname->blength); slot = key & (class_hash.size-1); c = class_hash.ptr[slot]; /* search external hash-chain */ while (c) { - if (c->name->blength == u->blength) { - + if (c->name->blength == classname->blength) { /* compare classnames */ - for (i=0; iblength; i++) - if (u->text[i] != c->name->text[i]) goto nomatch; + for (i = 0; i < classname->blength; i++) + if (classname->text[i] != c->name->text[i]) + goto nomatch; /* class found in hashtable */ return c; - } + } - nomatch: + nomatch: c = c->hashlink; - } + } /* class not found */ return NULL; } -/************************** function: utf_strlen ****************************** +/* class_remove **************************************************************** - determine number of unicode characters in the utf string + removes the class entry wth the specified name in the classes hashtable, + furthermore the class' resources are freed + if there is no such class false is returned *******************************************************************************/ -u4 utf_strlen(utf *u) +bool class_remove(classinfo *c) { - char *endpos = utf_end(u); /* points behind utf string */ - char *utf_ptr = u->text; /* current position in utf text */ - u4 len = 0; /* number of unicode characters */ + classinfo *tc; /* hashtable element */ + classinfo *pc; + u4 key; /* hashkey computed from classname */ + u4 slot; /* slot in hashtable */ + u2 i; - while (utf_ptrname->text, c->name->blength); + slot = key & (class_hash.size - 1); + tc = class_hash.ptr[slot]; + pc = NULL; - if (utf_ptr!=endpos) - /* string ended abruptly */ - panic("illegal utf string"); + /* search external hash-chain */ + while (tc) { + if (tc->name->blength == c->name->blength) { + + /* compare classnames */ + for (i = 0; i < c->name->blength; i++) + if (tc->name->text[i] != c->name->text[i]) + goto nomatch; - return len; -} + /* class found in hashtable */ + if (!pc) { + class_hash.ptr[slot] = tc->hashlink; + } else { + pc->hashlink = tc->hashlink; + } + class_free(tc); + return true; + } + + nomatch: + pc = tc; + tc = tc->hashlink; + } + /* class not found */ + return false; +} +/***************** Function: class_array_of *********************************** - + Returns an array class with the given component class. + The array class is dynamically created if neccessary. - +*******************************************************************************/ +classinfo *class_array_of(classinfo *component) +{ + int namelen; + char *namebuf; + classinfo *c; + + /* Assemble the array class name */ + namelen = component->name->blength; + + if (component->name->text[0] == '[') { + /* the component is itself an array */ + namebuf = DMNEW(char, namelen + 1); + namebuf[0] = '['; + memcpy(namebuf + 1, component->name->text, namelen); + namelen++; + + } else { + /* the component is a non-array class */ + namebuf = DMNEW(char, namelen + 3); + namebuf[0] = '['; + namebuf[1] = 'L'; + memcpy(namebuf + 2, component->name->text, namelen); + namebuf[2 + namelen] = ';'; + namelen += 3; + } + /* load this class ;-) and link it */ + c = class_new(utf_new(namebuf, namelen)); + c->loaded = 1; + class_link(c); + return c; +} +/*************** Function: class_multiarray_of ******************************** + Returns an array class with the given dimension and element class. + The array class is dynamically created if neccessary. +*******************************************************************************/ +classinfo *class_multiarray_of(int dim, classinfo *element) +{ + int namelen; + char *namebuf; + + if (dim < 1) + panic("Invalid array dimension requested"); + + /* Assemble the array class name */ + namelen = element->name->blength; + + if (element->name->text[0] == '[') { + /* the element is itself an array */ + namebuf = DMNEW(char, namelen + dim); + memcpy(namebuf + dim, element->name->text, namelen); + namelen += dim; + } + else { + /* the element is a non-array class */ + namebuf = DMNEW(char, namelen + 2 + dim); + namebuf[dim] = 'L'; + memcpy(namebuf + dim + 1, element->name->text, namelen); + namelen += (2 + dim); + namebuf[namelen - 1] = ';'; + } + memset(namebuf, '[', dim); + + return class_new(utf_new(namebuf, namelen)); +} + +/************************** function: utf_strlen ****************************** + + determine number of unicode characters in the utf string +*******************************************************************************/ +u4 utf_strlen(utf *u) +{ + char *endpos; /* points behind utf string */ + char *utf_ptr; /* current position in utf text */ + u4 len = 0; /* number of unicode characters */ + if (!u) { + *exceptionptr = new_nullpointerexception(); + return 0; + } + endpos = utf_end(u); + utf_ptr = u->text; + while (utf_ptr < endpos) { + len++; + /* next unicode character */ + utf_nextu2(&utf_ptr); + } + if (utf_ptr != endpos) + /* string ended abruptly */ + panic("illegal utf string"); + return len; +} +/* + * These are local overrides for various environment variables in Emacs. + * Please do not remove this and leave it at the end of the file, where + * Emacs will automagically detect them. + * --------------------------------------------------------------------- + * Local variables: + * mode: c + * indent-tabs-mode: t + * c-basic-offset: 4 + * tab-width: 4 + * End: + */