X-Git-Url: http://wien.tomnetworks.com/gitweb/?a=blobdiff_plain;f=tables.c;h=9cbd95e9ae8a8ea4091834cbc91f074b72abf05b;hb=8614dad1cecffc80317d6c41be07611d6ff1f1ae;hp=fd979fd6dd8dba8deb960a37f400b7323095cf94;hpb=3a5f5900ebb2b3f0fc418b5a6a0d551db494fc12;p=cacao.git diff --git a/tables.c b/tables.c index fd979fd6d..9cbd95e9a 100644 --- a/tables.c +++ b/tables.c @@ -1,638 +1,791 @@ -/* -*- mode: c; tab-width: 4; c-basic-offset: 4 -*- */ -/****************************** tables.c *************************************** +/* tables.c - - Copyright (c) 1997 A. Krall, R. Grafl, M. Gschwind, M. Probst + Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003 + R. Grafl, A. Krall, C. Kruegel, C. Oates, R. Obermaisser, + M. Probst, S. Ring, E. Steiner, C. Thalinger, D. Thuernbeck, + P. Tomsich, J. Wenninger - See file COPYRIGHT for information on usage and disclaimer of warranties + This file is part of CACAO. - Enth"alt Supportfunktionen f"ur: - - Lesen von JavaClass-Files - - unicode-Symbole - - den Heap - - zus"atzliche Support-Funktionen + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2, or (at + your option) any later version. - Authors: Reinhard Grafl EMAIL: cacao@complang.tuwien.ac.at - Changes: Mark Probst EMAIL: cacao@complang.tuwien.ac.at - Andreas Krall EMAIL: cacao@complang.tuwien.ac.at + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. - Last Change: 1998/03/24 + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + 02111-1307, USA. -*******************************************************************************/ + Contact: cacao@complang.tuwien.ac.at + + Authors: Reinhard Grafl + + Changes: Mark Probst + Andreas Krall + + Contains support functions for: + - Reading of Java class files + - Unicode symbols + - the heap + - additional support functions + + $Id: tables.c 1482 2004-11-11 14:39:13Z twisti $ + +*/ + +#include "global.h" +#include +#include #include +#include #include #include -#include "global.h" +#include "builtin.h" +#include "exceptions.h" +#include "types.h" +#include "native.h" +#include "options.h" #include "tables.h" +#include "loader.h" #include "asmpart.h" -#include "callargs.h" - -#include "threads/thread.h" /* schani */ +#include "statistics.h" +#include "threads/thread.h" #include "threads/locks.h" +#include "toolbox/logging.h" +#include "toolbox/memory.h" -bool runverbose = false; +hashtable utf_hash; /* hashtable for utf8-symbols */ +hashtable string_hash; /* hashtable for javastrings */ +hashtable class_hash; /* hashtable for classes */ -int count_unicode_len = 0; +list unlinkedclasses; /* this is only used for eager class loading */ /****************************************************************************** -************************* Der Dateien-Sauger ********************************** -******************************************************************************* + *********************** hashtable functions ********************************** + ******************************************************************************/ - dient zum Behandeln von Java-ClassFiles ("offnen, schlie"sen, - einlesen von 8-, 16-, 32-, 64-bit Integers und 32-, 64- bit Floats) - -******************************************************************************/ +/* hashsize must be power of 2 */ -static FILE *classfile = NULL; /* File-handle der gerade gelesenen Datei */ -static char *classpath = ""; /* Suchpfad f"ur die ClassFiles */ +#define UTF_HASHSTART 16384 /* initial size of utf-hash */ +#define HASHSTART 2048 /* initial size of javastring and class-hash */ +/******************** function: init_hashtable ****************************** -/************************** Funktion: suck_init ****************************** + Initializes a hashtable structure and allocates memory. + The parameter size specifies the initial size of the hashtable. + +*****************************************************************************/ - Wird zu Programmstart einmal aufgerufen und setzt den Suchpfad f"ur - Klassenfiles +void init_hashtable(hashtable *hash, u4 size) +{ + u4 i; -******************************************************************************/ + hash->entries = 0; + hash->size = size; + hash->ptr = MNEW(void*, size); -void suck_init (char *cpath) -{ - classfile = NULL; - classpath = cpath; + /* clear table */ + for (i = 0; i < size; i++) hash->ptr[i] = NULL; } -/************************** Funktion: suck_start ****************************** +/*********************** function: tables_init ***************************** - "Offnet die Datei f"ur die Klasse des gegebenen Namens zum Lesen. - Dabei werden alle im Suchpfad angegebenen Verzeichnisse durchsucht, - bis eine entsprechende Datei ( .class) gefunden wird. + creates hashtables for symboltables + (called once at startup) -******************************************************************************/ +*****************************************************************************/ -bool suck_start (unicode *classname) +void tables_init() { -#define MAXFILENAME 1000 /* Maximale Langes des Dateinamens plus Pfad */ - - char filename[MAXFILENAME+10]; /* Platz fuer '.class' */ - u2 filenamelen; - char *pathpos; - u2 i,c; - - - pathpos = classpath; - - while (*pathpos) { - while ( *pathpos == ':' ) pathpos++; - - filenamelen=0; - while ( (*pathpos) && (*pathpos!=':') ) { - PANICIF (filenamelen >= MAXFILENAME, "Filename too long") ; - - filename[filenamelen++] = *(pathpos++); - } + init_hashtable(&utf_hash, UTF_HASHSTART); /* hashtable for utf8-symbols */ + init_hashtable(&string_hash, HASHSTART); /* hashtable for javastrings */ + init_hashtable(&class_hash, HASHSTART); /* hashtable for classes */ - filename[filenamelen++] = '/'; - - for (i=0; i < classname -> length; i++) { - PANICIF (filenamelen >= MAXFILENAME, "Filename too long"); - - c = classname -> text [i]; - if (c=='/') c = '/'; /* Slashes im Namen passen zu UNIX */ - else { - if ( c<=' ' || c>'z') { - c = '?'; - } - } - - filename[filenamelen++] = c; - } - - strcpy (filename+filenamelen, ".class"); +/* if (opt_eager) */ +/* list_init(&unlinkedclasses, OFFSET(classinfo, listnode)); */ - classfile = fopen(filename, "r"); - if (classfile) { - return true; - } - - - } - - sprintf (logtext,"Can not open class file '%s'", filename); - error(); - return false; +#if defined(STATISTICS) + if (opt_stat) + count_utf_len += sizeof(utf*) * utf_hash.size; +#endif } -/************************** Funktion: suck_stop ******************************* +/********************** function: tables_close ****************************** - Schlie"st die offene Datei wieder. + free memory for hashtables -******************************************************************************/ +*****************************************************************************/ -void suck_stop () +void tables_close() { - u4 rest=0; - u1 dummy; + utf *u = NULL; + literalstring *s; + u4 i; - while ( fread (&dummy, 1,1, classfile) > 0) rest++; - if (rest) { - sprintf (logtext,"There are %d access bytes at end of classfile", - (int) rest); - dolog(); - } - - fclose (classfile); - classfile = NULL; + /* dispose utf symbols */ + for (i = 0; i < utf_hash.size; i++) { + u = utf_hash.ptr[i]; + while (u) { + /* process elements in external hash chain */ + utf *nextu = u->hashlink; + MFREE(u->text, u1, u->blength); + FREE(u, utf); + u = nextu; + } + } + + /* dispose javastrings */ + for (i = 0; i < string_hash.size; i++) { + s = string_hash.ptr[i]; + while (u) { + /* process elements in external hash chain */ + literalstring *nexts = s->hashlink; + literalstring_free(s->string); + FREE(s, literalstring); + s = nexts; + } + } + + /* dispose hashtable structures */ + MFREE(utf_hash.ptr, void*, utf_hash.size); + MFREE(string_hash.ptr, void*, string_hash.size); + MFREE(class_hash.ptr, void*, class_hash.size); } - -/************************** Lesefunktionen *********************************** +/********************* function: utf_display ********************************* - Lesen von der Datei in verschieden grossen Paketen - (8,16,32,64-bit Integer oder Float) + write utf symbol to stdout (debugging purposes) -*****************************************************************************/ +*******************************************************************************/ -void suck_nbytes (u1 *buffer, u4 len) +void utf_display(utf *u) { - if ( fread (buffer, 1, len, classfile) != len) panic ("Unexpected EOF"); -} + char *endpos; /* points behind utf string */ + char *utf_ptr; /* current position in utf text */ + if (!u) { + printf("NULL"); + fflush(stdout); + return; + } -void skip_nbytes (u4 len) -{ - u4 i; - for (i=0; itext; + while (utf_ptr < endpos) { + /* read next unicode character */ + u2 c = utf_nextu2(&utf_ptr); + if (c >= 32 && c <= 127) printf("%c", c); + else printf("?"); + } -u1 suck_u1 () -{ - u1 b; - if ( fread (&b, 1,1, classfile) != 1) panic ("Unexpected EOF"); - return b; + fflush(stdout); } -s1 suck_s1 () -{ - s1 b; - if ( fread (&b, 1,1, classfile) != 1) panic ("Unexpected EOF"); - return b; -} +/* utf_display_classname ******************************************************* -u2 suck_u2 () -{ - u1 b[2]; - if ( fread (b, 1,2, classfile) != 2) panic ("Unexpected EOF"); - return (b[0]<<8) + b[1]; -} + write utf symbol to stdout (debugging purposes) + +*******************************************************************************/ -s2 suck_s2 () +void utf_display_classname(utf *u) { - return suck_u2 (); -} + char *endpos; /* points behind utf string */ + char *utf_ptr; /* current position in utf text */ + if (!u) { + printf("NULL"); + fflush(stdout); + return; + } -u4 suck_u4 () -{ - u1 b[4]; - u4 v; - if ( fread (b, 1,4, classfile) != 4) panic ("Unexpected EOF"); - v = ( ((u4)b[0]) <<24) + ( ((u4)b[1])<<16) + ( ((u4)b[2])<<8) + ((u4)b[3]); - return v; -} + endpos = utf_end(u); + utf_ptr = u->text; -s4 suck_s4 () -{ - s4 v = suck_u4 (); - return v; -} + while (utf_ptr < endpos) { + /* read next unicode character */ + u2 c = utf_nextu2(&utf_ptr); + if (c == '/') c = '.'; + if (c >= 32 && c <= 127) printf("%c", c); + else printf("?"); + } -u8 suck_u8 () -{ -#if U8_AVAILABLE - u8 lo,hi; - hi = suck_u4(); - lo = suck_u4(); - return (hi<<32) + lo; -#else - u8 v; - v.high = suck_u4(); - v.low = suck_u4(); - return v; -#endif + fflush(stdout); } -s8 suck_s8 () -{ - return suck_u8 (); -} - -float suck_float () -{ - float f; - -#if !WORDS_BIGENDIAN - u1 buffer[4]; - u2 i; - for (i=0; i<4; i++) buffer[3-i] = suck_u1 (); - memcpy ( (u1*) (&f), buffer, 4); -#else - suck_nbytes ( (u1*) (&f), 4 ); -#endif +/************************* function: log_utf ********************************* - PANICIF (sizeof(float) != 4, "Incompatible float-format"); - - return f; -} + log utf symbol +******************************************************************************/ -double suck_double () +void log_utf(utf *u) { - double d; - -#if !WORDS_BIGENDIAN - u1 buffer[8]; - u2 i; - for (i=0; i<8; i++) buffer[7-i] = suck_u1 (); - memcpy ( (u1*) (&d), buffer, 8); -#else - suck_nbytes ( (u1*) (&d), 8 ); -#endif - - PANICIF (sizeof(double) != 8, "Incompatible double-format" ); - - return d; + char buf[MAXLOGTEXT]; + utf_sprint(buf, u); + dolog("%s", buf); } +/********************** function: log_plain_utf ****************************** + log utf symbol (without printing "LOG: " and newline) -/****************************************************************************** -******************** Der Unicode-Symbol-Verwalter ***************************** -******************************************************************************* - - legt eine Hashtabelle f"ur unicode-Symbole an und verwaltet - das Eintragen neuer Symbole - ******************************************************************************/ +void log_plain_utf(utf *u) +{ + char buf[MAXLOGTEXT]; + utf_sprint(buf, u); + dolog_plain("%s", buf); +} -#define UNICODESTART 2187 /* Startgr"osse: moeglichst gross und prim */ +/* utf_sprint ****************************************************************** + + write utf symbol into c-string (debugging purposes) -static u4 unicodeentries; /* Anzahl der Eintr"age in der Tabelle */ -static u4 unicodehashsize; /* Gr"osse der Tabelle */ -static unicode ** unicodehash; /* Zeiger auf die Tabelle selbst */ +*******************************************************************************/ +void utf_sprint(char *buffer, utf *u) +{ + char *endpos; /* points behind utf string */ + char *utf_ptr; /* current position in utf text */ + u2 pos = 0; /* position in c-string */ -/*********************** Funktion: unicode_init ****************************** + if (!u) { + memcpy(buffer, "NULL", 5); /* 4 chars + terminating \0 */ + return; + } - Initialisiert die unicode-Symboltabelle (muss zu Systemstart einmal - aufgerufen werden) - -*****************************************************************************/ + endpos = utf_end(u); + utf_ptr = u->text; -void unicode_init () -{ - u4 i; - -#ifdef STATISTICS - count_unicode_len += sizeof(unicode*) * unicodehashsize; -#endif + while (utf_ptr < endpos) + /* copy next unicode character */ + buffer[pos++] = utf_nextu2(&utf_ptr); - unicodeentries = 0; - unicodehashsize = UNICODESTART; - unicodehash = MNEW (unicode*, unicodehashsize); - for (i=0; ihashlink; + char *endpos; /* points behind utf string */ + char *utf_ptr; /* current position in utf text */ + u2 pos = 0; /* position in c-string */ - if (u->string) del (u->string); - - MFREE (u->text, u2, u->length); - FREE (u, unicode); - u = nextu; - } - } - MFREE (unicodehash, unicode*, unicodehashsize); + if (!u) { + memcpy(buffer, "NULL", 5); /* 4 chars + terminating \0 */ + return; + } + + endpos = utf_end(u); + utf_ptr = u->text; + + while (utf_ptr < endpos) { + /* copy next unicode character */ + u2 c = utf_nextu2(&utf_ptr); + if (c == '/') c = '.'; + buffer[pos++] = c; + } + + /* terminate string */ + buffer[pos] = '\0'; } -/********************* Funktion: unicode_display ****************************** +/********************* Funktion: utf_fprint ********************************** - Gibt ein unicode-Symbol auf stdout aus (zu Debugzwecken) + write utf symbol into file ******************************************************************************/ -void unicode_display (unicode *u) +void utf_fprint(FILE *file, utf *u) { - u2 i,c; - for (i=0; i < u->length; i++) { - c = u->text[i]; - if (c>=32 && c<=127) printf ("%c",c); - else printf ("?"); - } - fflush (stdout); -} - + char *endpos = utf_end(u); /* points behind utf string */ + char *utf_ptr = u->text; /* current position in utf text */ -/********************* Funktion: unicode_sprint ****************************** - - Schreibt ein unicode-Symbol in einen C-String + if (!u) + return; -******************************************************************************/ + while (utf_ptr < endpos) { + /* read next unicode character */ + u2 c = utf_nextu2(&utf_ptr); -void unicode_sprint (char *buffer, unicode *u) -{ - u2 i; - for (i=0; i < u->length; i++) buffer[i] = u->text[i]; - buffer[i] = '\0'; + if (c >= 32 && c <= 127) fprintf(file, "%c", c); + else fprintf(file, "?"); + } } -/********************* Funktion: unicode_fprint ****************************** +/********************* Funktion: utf_fprint ********************************** - Schreibt ein unicode-Symbol auf eine Datei aus + write utf symbol into file -******************************************************************************/ +******************************************************************************/ -void unicode_fprint (FILE *file, unicode *u) +void utf_fprint_classname(FILE *file, utf *u) { - u2 i; - for (i=0; i < u->length; i++) putc (u->text[i], file); -} + char *endpos = utf_end(u); /* points behind utf string */ + char *utf_ptr = u->text; /* current position in utf text */ + if (!u) + return; -/****************** interne Funktion: u_hashkey ******************************/ + while (utf_ptr < endpos) { + /* read next unicode character */ + u2 c = utf_nextu2(&utf_ptr); + if (c == '/') c = '.'; -static u4 u_hashkey (u2 *text, u2 length) -{ - u4 k = 0; - u2 i,sh=0; - - for (i=0; i= 32 && c <= 127) fprintf(file, "%c", c); + else fprintf(file, "?"); + } } -/*************** interne Funktion: u_reorganizehash **************************/ -static void u_reorganizehash () +/****************** internal function: utf_hashkey *************************** + + The hashkey is computed from the utf-text by using up to 8 characters. + For utf-symbols longer than 15 characters 3 characters are taken from + the beginning and the end, 2 characters are taken from the middle. + +******************************************************************************/ + +#define nbs(val) ((u4) *(++text) << val) /* get next byte, left shift by val */ +#define fbs(val) ((u4) *( text) << val) /* get first byte, left shift by val */ + +static u4 utf_hashkey(char *text, u4 length) { - u4 i; - unicode *u; + char *start_pos = text; /* pointer to utf text */ + u4 a; + + switch (length) { + + case 0: /* empty string */ + return 0; + + case 1: return fbs(0); + case 2: return fbs(0) ^ nbs(3); + case 3: return fbs(0) ^ nbs(3) ^ nbs(5); + case 4: return fbs(0) ^ nbs(2) ^ nbs(4) ^ nbs(6); + case 5: return fbs(0) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(6); + case 6: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(5) ^ nbs(6); + case 7: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(5) ^ nbs(6); + case 8: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(5) ^ nbs(6) ^ nbs(7); + + case 9: + a = fbs(0); + a ^= nbs(1); + a ^= nbs(2); + text++; + return a ^ nbs(4) ^ nbs(5) ^ nbs(6) ^ nbs(7) ^ nbs(8); + + case 10: + a = fbs(0); + text++; + a ^= nbs(2); + a ^= nbs(3); + a ^= nbs(4); + text++; + return a ^ nbs(6) ^ nbs(7) ^ nbs(8) ^ nbs(9); + + case 11: + a = fbs(0); + text++; + a ^= nbs(2); + a ^= nbs(3); + a ^= nbs(4); + text++; + return a ^ nbs(6) ^ nbs(7) ^ nbs(8) ^ nbs(9) ^ nbs(10); + + case 12: + a = fbs(0); + text += 2; + a ^= nbs(2); + a ^= nbs(3); + text++; + a ^= nbs(5); + a ^= nbs(6); + a ^= nbs(7); + text++; + return a ^ nbs(9) ^ nbs(10); + + case 13: + a = fbs(0); + a ^= nbs(1); + text++; + a ^= nbs(3); + a ^= nbs(4); + text += 2; + a ^= nbs(7); + a ^= nbs(8); + text += 2; + return a ^ nbs(9) ^ nbs(10); + + case 14: + a = fbs(0); + text += 2; + a ^= nbs(3); + a ^= nbs(4); + text += 2; + a ^= nbs(7); + a ^= nbs(8); + text += 2; + return a ^ nbs(9) ^ nbs(10) ^ nbs(11); + + case 15: + a = fbs(0); + text += 2; + a ^= nbs(3); + a ^= nbs(4); + text += 2; + a ^= nbs(7); + a ^= nbs(8); + text += 2; + return a ^ nbs(9) ^ nbs(10) ^ nbs(11); + + default: /* 3 characters from beginning */ + a = fbs(0); + text += 2; + a ^= nbs(3); + a ^= nbs(4); + + /* 2 characters from middle */ + text = start_pos + (length / 2); + a ^= fbs(5); + text += 2; + a ^= nbs(6); + + /* 3 characters from end */ + text = start_pos + length - 4; + + a ^= fbs(7); + text++; + + return a ^ nbs(10) ^ nbs(11); + } +} - u4 newhashsize = unicodehashsize*2; - unicode **newhash = MNEW (unicode*, newhashsize); -#ifdef STATISTICS - count_unicode_len += sizeof(unicode*) * unicodehashsize; -#endif +/*************************** function: utf_hashkey *************************** - for (i=0; i hashlink; - u4 slot = (u->key) % newhashsize; - - u->hashlink = newhash[slot]; - newhash[slot] = u; +******************************************************************************/ - u = nextu; - } - } - - MFREE (unicodehash, unicode*, unicodehashsize); - unicodehash = newhash; - unicodehashsize = newhashsize; +u4 unicode_hashkey(u2 *text, u2 len) +{ + return utf_hashkey((char*) text, len); } -/****************** Funktion: unicode_new_u2 ********************************** +/************************ function: utf_new ********************************** - Legt ein neues unicode-Symbol an. Der Text des Symbols wird dieser - Funktion als u2-Array "ubergeben + Creates a new utf-symbol, the text of the symbol is passed as a + u1-array. The function searches the utf-hashtable for a utf-symbol + with this text. On success the element returned, otherwise a new + hashtable element is created. + + If the number of entries in the hashtable exceeds twice the size of the + hashtable slots a reorganization of the hashtable is done and the utf + symbols are copied to a new hashtable with doubled size. ******************************************************************************/ -unicode *unicode_new_u2 (u2 *text, u2 length) +utf *utf_new_intern(char *text, u2 length) { - u4 key = u_hashkey (text, length); - u4 slot = key % unicodehashsize; - unicode *u = unicodehash[slot]; + u4 key; /* hashkey computed from utf-text */ + u4 slot; /* slot in hashtable */ + utf *u; /* hashtable element */ u2 i; +#ifdef STATISTICS + if (opt_stat) + count_utf_new++; +#endif + + key = utf_hashkey(text, length); + slot = key & (utf_hash.size-1); + u = utf_hash.ptr[slot]; + + /* search external hash chain for utf-symbol */ while (u) { - if (u->key == key) { - if (u->length == length) { - for (i=0; itext[i]) goto nomatch; - } - return u; - } - } - nomatch: - u = u->hashlink; - } + if (u->blength == length) { + /* compare text of hashtable elements */ + for (i = 0; i < length; i++) + if (text[i] != u->text[i]) goto nomatch; + #ifdef STATISTICS - count_unicode_len += sizeof(unicode) + 2 * length; + if (opt_stat) + count_utf_new_found++; #endif +/* log_text("symbol found in hash table");*/ + /* symbol found in hashtable */ +/* utf_display(u); + { + utf blup; + blup.blength=length; + blup.text=text; + utf_display(&blup); + }*/ + return u; + } + nomatch: + u = u->hashlink; /* next element in external chain */ + } - u = NEW (unicode); - u->key = key; - u->length = length; - u->text = MNEW (u2, length); - u->class = NULL; - u->string = NULL; - u->hashlink = unicodehash[slot]; - unicodehash[slot] = u; - for (i=0; itext[i] = text[i]; - - unicodeentries++; - - if ( unicodeentries > (unicodehashsize/2)) u_reorganizehash(); - - return u; -} +#ifdef STATISTICS + if (opt_stat) + count_utf_len += sizeof(utf) + length; +#endif + /* location in hashtable found, create new utf element */ + u = NEW(utf); + u->blength = length; /* length in bytes of utfstring */ + u->hashlink = utf_hash.ptr[slot]; /* link in external hashchain */ + u->text = mem_alloc(length + 1);/* allocate memory for utf-text */ + memcpy(u->text, text, length); /* copy utf-text */ + u->text[length] = '\0'; + utf_hash.ptr[slot] = u; /* insert symbol into table */ -/********************* Funktion: unicode_new_char ***************************** + utf_hash.entries++; /* update number of entries */ - Legt ein neues unicode-Symbol an. Der Text des Symbols wird dieser - Funktion als C-String ( = char* ) "ubergeben + if (utf_hash.entries > (utf_hash.size * 2)) { -******************************************************************************/ + /* reorganization of hashtable, average length of + the external chains is approx. 2 */ -unicode *unicode_new_char (char *text) -{ -#define MAXNEWCHAR 500 - u2 buffer[MAXNEWCHAR]; - u2 length = 0; - u1 c; - - while ( (c = *text) != '\0' ) { - if (length>=MAXNEWCHAR) panic ("Text too long in unicode_new_char"); - buffer[length++] = c; - text ++; - } - return unicode_new_u2 (buffer, length); -} + u4 i; + utf *u; + hashtable newhash; /* the new hashtable */ + /* create new hashtable, double the size */ + init_hashtable(&newhash, utf_hash.size * 2); + newhash.entries = utf_hash.entries; -/********************** Funktion: unicode_setclasslink ************************ +#ifdef STATISTICS + if (opt_stat) + count_utf_len += sizeof(utf*) * utf_hash.size; +#endif - H"angt einen Verweis auf eine Klasse an ein unicode-Symbol an. + /* transfer elements to new hashtable */ + for (i = 0; i < utf_hash.size; i++) { + u = (utf *) utf_hash.ptr[i]; + while (u) { + utf *nextu = u->hashlink; + u4 slot = utf_hashkey(u->text, u->blength) & (newhash.size - 1); + + u->hashlink = (utf *) newhash.ptr[slot]; + newhash.ptr[slot] = u; -******************************************************************************/ + /* follow link in external hash chain */ + u = nextu; + } + } + + /* dispose old table */ + MFREE(utf_hash.ptr, void*, utf_hash.size); + utf_hash = newhash; + } -void unicode_setclasslink (unicode *u, classinfo *class) -{ - PANICIF (u->class, "Attempt to attach class to already attached symbol"); - u->class = class; + return u; } -/********************** Funktion: unicode_getclasslink ************************ - - Sucht den Verweis von einem unicode-Symbol auf eine Klasse. - Wenn keine solche Klasse existiert, dann wird ein Fehler - ausgegeben. -******************************************************************************/ - -classinfo *unicode_getclasslink (unicode *u) +utf *utf_new(char *text, u2 length) { - PANICIF (!u->class, "Attempt to get unknown class-reference"); - return u->class; -} - + utf *r; +#if defined(USE_THREADS) && defined(NATIVE_THREADS) + tables_lock(); +#endif -/********************* Funktion: unicode_unlinkclass ************************* + r = utf_new_intern(text, length); - Entfernt den Verweis auf eine Klasse wieder von einem Symbol - -******************************************************************************/ +#if defined(USE_THREADS) && defined(NATIVE_THREADS) + tables_unlock(); +#endif -void unicode_unlinkclass (unicode *u) -{ - PANICIF (!u->class, "Attempt to unlink not yet linked symbol"); - u -> class = NULL; + return r; } +/********************* function: utf_new_char ******************************** -/******************* Funktion> unicode_setstringlink ********************* + creates a new utf symbol, the text for this symbol is passed + as a c-string ( = char* ) - H"angt einen Verweis auf einen konstanten String an ein - Unicode-Symbol - -*************************************************************************/ +******************************************************************************/ -void unicode_setstringlink (unicode *u, java_objectheader *str) +utf *utf_new_char(char *text) { - PANICIF (u->string, "Attempt to attach string to already attached symbol"); - u->string = str; + return utf_new(text, strlen(text)); } -/********************* Funktion: unicode_unlinkstring ************************* +/********************* function: utf_new_char ******************************** + + creates a new utf symbol, the text for this symbol is passed + as a c-string ( = char* ) + "." characters are going to be replaced by "/". since the above function is + used often, this is a separte function, instead of an if - Entfernt den Verweis auf einen String wieder von einem Symbol - ******************************************************************************/ -void unicode_unlinkstring (unicode *u) +utf *utf_new_char_classname(char *text) { - PANICIF (!u->class, "Attempt to unlink not yet linked symbol"); - u -> string = NULL; + if (strchr(text, '.')) { + char *txt = strdup(text); + char *end = txt + strlen(txt); + char *c; + utf *tmpRes; + for (c = txt; c < end; c++) + if (*c == '.') *c = '/'; + tmpRes = utf_new(txt, strlen(txt)); + free(txt); + return tmpRes; + + } else + return utf_new(text, strlen(text)); } +/************************** Funktion: utf_show ****************************** -/*********************** Funktion: unicode_show ****************************** + writes the utf symbols in the utfhash to stdout and + displays the number of external hash chains grouped + according to the chainlength + (debugging purposes) - gibt eine Aufstellung aller Symbol im unicode-hash auf stdout aus. - (nur f"ur Debug-Zwecke) - *****************************************************************************/ -void unicode_show () +void utf_show() { - unicode *u; + +#define CHAIN_LIMIT 20 /* limit for seperated enumeration */ + + u4 chain_count[CHAIN_LIMIT]; /* numbers of chains */ + u4 max_chainlength = 0; /* maximum length of the chains */ + u4 sum_chainlength = 0; /* sum of the chainlengths */ + u4 beyond_limit = 0; /* number of utf-symbols in chains with length>=CHAIN_LIMIT-1 */ u4 i; - printf ("UNICODE-HASH: %d slots for %d entries\n", - (int) unicodehashsize, (int) unicodeentries ); - - for (i=0; istring) printf ("(string) "); u = u->hashlink; - } + } printf ("\n"); - } + } + } + + printf ("UTF-HASH: %d slots for %d entries\n", + (int) utf_hash.size, (int) utf_hash.entries ); + + + if (utf_hash.entries == 0) + return; + + printf("chains:\n chainlength number of chains %% of utfstrings\n"); + + for (i=0;ihashlink; + chain_length++; + } + + /* update sum of all chainlengths */ + sum_chainlength+=chain_length; + + /* determine the maximum length of the chains */ + if (chain_length>max_chainlength) + max_chainlength = chain_length; + + /* update number of utf-symbols in chains with length>=CHAIN_LIMIT-1 */ + if (chain_length>=CHAIN_LIMIT) { + beyond_limit+=chain_length; + chain_length=CHAIN_LIMIT-1; } -} + /* update number of hashchains of current length */ + chain_count[chain_length]++; + } + + /* display results */ + for (i=1;i=%2d %17d %18.2f%%\n",CHAIN_LIMIT-1,chain_count[CHAIN_LIMIT-1],((float) beyond_limit*100)/utf_hash.entries); + + + printf("max. chainlength:%5d\n",max_chainlength); + /* avg. chainlength = sum of chainlengths / number of chains */ + printf("avg. chainlength:%5.2f\n",(float) sum_chainlength / (utf_hash.size-chain_count[0])); +} /****************************************************************************** -*********************** Diverse Support-Funktionen **************************** +*********************** Misc support functions ******************************** ******************************************************************************/ -/******************** Funktion: desc_to_type ********************************** - - Findet zu einem gegebenen Typdescriptor den entsprechenden - Java-Grunddatentyp. +/******************** Function: desc_to_type ********************************** + + Determines the corresponding Java base data type for a given type + descriptor. ******************************************************************************/ -u2 desc_to_type (unicode *descriptor) +u2 desc_to_type(utf *descriptor) { - if (descriptor->length < 1) panic ("Type-Descriptor is empty string"); + char *utf_ptr = descriptor->text; /* current position in utf text */ + char logtext[MAXLOGTEXT]; + + if (descriptor->blength < 1) panic("Type-Descriptor is empty string"); - switch (descriptor->text[0]) { + switch (*utf_ptr++) { case 'B': case 'C': case 'I': @@ -645,21 +798,22 @@ u2 desc_to_type (unicode *descriptor) case '[': return TYPE_ADDRESS; } - sprintf (logtext, "Invalid Type-Descriptor: "); - unicode_sprint (logtext+strlen(logtext), descriptor); - error (); + sprintf(logtext, "Invalid Type-Descriptor: "); + utf_sprint(logtext+strlen(logtext), descriptor); + error("%s",logtext); + return 0; } -/********************** Funktion: desc_typesize ******************************* +/********************** Function: desc_typesize ******************************* - Berechnet die L"ange (in Byte) eines Datenelements gegebenen Typs, - der durch den Typdescriptor gegeben ist. + Calculates the lenght in bytes needed for a data element of the type given + by its type descriptor. ******************************************************************************/ -u2 desc_typesize (unicode *descriptor) +u2 desc_typesize(utf *descriptor) { switch (desc_to_type(descriptor)) { case TYPE_INT: return 4; @@ -672,887 +826,590 @@ u2 desc_typesize (unicode *descriptor) } +/********************** function: utf_nextu2 ********************************* + read the next unicode character from the utf string and + increment the utf-string pointer accordingly - -/****************************************************************************** -********************* Der garbage-collected Heap ****************************** -******************************************************************************* - - verwaltet einen Heap mit automatischer Speicherfreigabe. - - (eine ausf"uhrliche Dokumentation findet sich in der Datei: - collect.doc) - ******************************************************************************/ - -bool collectverbose = false; /* soll Meldung beim GC ausgegeben werden? */ - - -#define BLOCKSIZE 8 /* Gr"osse eines Speicherblockes */ -typedef u8 heapblock; /* Datentyp mit der Gr"osse eines Speicherblocks */ - -#if U8_AVAILABLE && SUPPORT_LONG -#define bitfieldtype u8 -#define BITFIELDBITS 64 -#else -#define bitfieldtype u4 -#define BITFIELDBITS 32 -#endif - -u4 heapsize; /* Gr"osse des Heap in Blocks */ -u4 topofheap; /* Bisherige Obergrenze Heaps */ -heapblock *heap; /* Speicher f"ur den Heap selbst */ - -bitfieldtype *startbits; /* Bitfeld f"ur Bereichsstartkennung */ -bitfieldtype *markbits; /* Bitfeld f"ur Markierung */ -bitfieldtype *referencebits; /* Bitfeld f"ur Folgereferenzenkennung */ - -u4 heapfillgrade; /* Menge der Daten im Heap */ -u4 collectthreashold; /* Schwellwert f"ur n"achstes GC */ - -void **bottom_of_stack; /* Zeiger auf Untergrenze des C-Stacks */ -chain *allglobalreferences; /* Liste f"ur alle globalen Zeiger */ - -typedef struct finalizernode { - struct finalizernode *next; - u4 objstart; - methodinfo *finalizer; - } finalizernode; +u2 utf_nextu2(char **utf_ptr) +{ + /* uncompressed unicode character */ + u2 unicode_char = 0; + /* current position in utf text */ + unsigned char *utf = (unsigned char *) (*utf_ptr); + /* bytes representing the unicode character */ + unsigned char ch1, ch2, ch3; + /* number of bytes used to represent the unicode character */ + int len = 0; -finalizernode *livefinalizees; -finalizernode *deadfinalizees; - + switch ((ch1 = utf[0]) >> 4) { + default: /* 1 byte */ + (*utf_ptr)++; + return (u2) ch1; + case 0xC: + case 0xD: /* 2 bytes */ + if (((ch2 = utf[1]) & 0xC0) == 0x80) { + unsigned char high = ch1 & 0x1F; + unsigned char low = ch2 & 0x3F; + unicode_char = (high << 6) + low; + len = 2; + } + break; + + case 0xE: /* 2 or 3 bytes */ + if (((ch2 = utf[1]) & 0xC0) == 0x80) { + if (((ch3 = utf[2]) & 0xC0) == 0x80) { + unsigned char low = ch3 & 0x3f; + unsigned char mid = ch2 & 0x3f; + unsigned char high = ch1 & 0x0f; + unicode_char = (((high << 6) + mid) << 6) + low; + len = 3; + } else + len = 2; + } + break; + } -typedef struct memarea { /* Datenstruktur f"ur einen Freispeicherbereich */ - struct memarea *next; - } memarea; + /* update position in utf-text */ + *utf_ptr = (char *) (utf + len); + return unicode_char; +} -typedef struct bigmemarea { /* Datenstruktur f"ur einen */ - struct bigmemarea *next; /* Freispeicherbereich variabler L"ange */ - u4 size; - } bigmemarea; - -#define DIFFERENTSIZES 128 /* Anzahl der 'kleinen' Freispeicherlisten */ -memarea *memlist[DIFFERENTSIZES]; /* Die 'kleinen' Freispeicherlisten */ -bitfieldtype memlistbits[DIFFERENTSIZES/BITFIELDBITS]; - /* Bitfeld, in dem jeweils ein Bit gesetzt */ - /* ist, wenn eine Liste noch etwas enth"alt */ -bigmemarea *bigmemlist; /* Liste der gr"osseren Freispeicherbereiche */ +/********************* function: is_valid_utf ******************************** + return true if the given string is a valid UTF-8 string + utf_ptr...points to first character + end_pos...points after last character +******************************************************************************/ -/**************** Hilfsfunktion: lowest ************************************** +static unsigned long min_codepoint[6] = {0,1L<<7,1L<<11,1L<<16,1L<<21,1L<<26}; -liefert als Ergebnis die Nummer des niederwertigsten Bits einer -Zahl vom Typ bitfieldtype, das 1 ist. -Wenn die ganze Zahl keine 1en enth"alt, dann ist egal, was f"ur einen -Wert die Funktion l"iefert. -z.B.: lowest(1) = 0, lowest(12) = 2, lowest(1024) = 10 +bool +is_valid_utf(char *utf_ptr,char *end_pos) +{ + int bytes; + int len,i; + char c; + unsigned long v; + + if (end_pos < utf_ptr) return false; + bytes = end_pos - utf_ptr; + while (bytes--) { + c = *utf_ptr++; + /*dolog("%c %02x",c,c);*/ + if (!c) return false; /* 0x00 is not allowed */ + if ((c & 0x80) == 0) continue; /* ASCII */ + + if ((c & 0xe0) == 0xc0) len = 1; /* 110x xxxx */ + else if ((c & 0xf0) == 0xe0) len = 2; /* 1110 xxxx */ + else if ((c & 0xf8) == 0xf0) len = 3; /* 1111 0xxx */ + else if ((c & 0xfc) == 0xf8) len = 4; /* 1111 10xx */ + else if ((c & 0xfe) == 0xfc) len = 5; /* 1111 110x */ + else return false; /* invalid leading byte */ + + if (len > 2) return false; /* Java limitation */ + + v = (unsigned long)c & (0x3f >> len); + + if ((bytes -= len) < 0) return false; /* missing bytes */ + + for (i = len; i--; ) { + c = *utf_ptr++; + /*dolog(" %c %02x",c,c);*/ + if ((c & 0xc0) != 0x80) /* 10xx xxxx */ + return false; + v = (v<<6) | (c & 0x3f); + } -*****************************************************************************/ + /* dolog("v=%d",v);*/ -static u1 lowesttable[256] = { - 255, 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, - 4, 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, - 5, 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, - 4, 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, - 6, 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, - 4, 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, - 5, 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, - 4, 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, - 7, 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, - 4, 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, - 5, 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, - 4, 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, - 6, 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, - 4, 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, - 5, 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, - 4, 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 }; - -static int lowest(bitfieldtype i) -{ -#if BITFIELDBITS==64 - u8 i1 = i<<32; - if (i1) { - u8 i11 = i1<<16; - if (i11) { - u8 i111 = i11<<8; - if (i111) return lowesttable[i111>>56]; - else return 8+lowesttable[i11>>56]; - } - else { - u8 i112 = i1<<8; - if (i112) return 16+lowesttable[i112>>56]; - else return 24+lowesttable[i1>>56]; - } + if (v == 0) { + if (len != 1) return false; /* Java special */ } - else { - u8 i12 = i<<16; - if (i12) { - u8 i121 = i12<<8; - if (i121) return 32+lowesttable[i121>>56]; - else return 40+lowesttable[i12>>56]; - } else { - u8 i122 = i<<8; - if (i122) return 48+lowesttable[i122>>56]; - else return 56+lowesttable[i>>56]; + /* Sun Java seems to allow overlong UTF-8 encodings */ + + if (v < min_codepoint[len]) { /* overlong UTF-8 */ + if (!opt_liberalutf) + fprintf(stderr,"WARNING: Overlong UTF-8 sequence found.\n"); + /* XXX change this to panic? */ } } -#else - u4 i1 = i<<16; - if (i1) { - u4 i11 = i1<<8; - if (i11) return lowesttable[i11>>24]; - else return 8+lowesttable[i1>>24]; - } - else { - u4 i12 = i<<8; - if (i12) return 16+lowesttable[i12>>24]; - else return 24+lowesttable[i>>24]; - } -#endif -} + /* surrogates in UTF-8 seem to be allowed in Java classfiles */ + /* if (v >= 0xd800 && v <= 0xdfff) return false; */ /* surrogates */ -/******** Funktionen zum Setzen und L"oschen von Bits in Bitfeldern ***********/ - -static void setbit(bitfieldtype *bitfield, u4 bitnumber) -{ - bitfield[bitnumber/BITFIELDBITS] |= - ((bitfieldtype) 1) << (bitnumber%BITFIELDBITS); -} + /* even these seem to be allowed */ + /* if (v == 0xfffe || v == 0xffff) return false; */ /* invalid codepoints */ + } -static void clearbit(bitfieldtype *bitfield, u4 bitnumber) -{ - bitfield[bitnumber/BITFIELDBITS] &= - ~((bitfieldtype) 1) << (bitnumber%BITFIELDBITS); + return true; } + +/********************* function: is_valid_name ******************************* -static bool isbitset(bitfieldtype *bitfield, u4 bitnumber) -{ - return ( bitfield[bitnumber/BITFIELDBITS] & - ( ((bitfieldtype) 1) << (bitnumber%BITFIELDBITS) ) ) != 0; -} + return true if the given string may be used as a class/field/method name. + (Currently this only disallows empty strings and control characters.) -static bool isbitclear(bitfieldtype *bitfield, u4 bitnumber) -{ - return ( bitfield[bitnumber/BITFIELDBITS] & - ( ((bitfieldtype) 1) << (bitnumber%BITFIELDBITS) ) ) == 0; -} + NOTE: The string is assumed to have passed is_valid_utf! + utf_ptr...points to first character + end_pos...points after last character -/***************** Funktion: clearbitfield ************************************ - - l"oscht ein ganzes Bitfeld - ******************************************************************************/ -static void clearbitfield(bitfieldtype *bitfield, u4 fieldsize) -{ - u4 i,t; - t = ALIGN(fieldsize, BITFIELDBITS) / BITFIELDBITS; - for (i=0; i= fieldsize) return fieldsize; - - pattern = bitfield[bitnumber/BITFIELDBITS]; - pattern >>= (bitnumber%BITFIELDBITS); - if (pattern) return bitnumber + lowest(pattern); - - bitnumber = ((bitnumber + BITFIELDBITS) / BITFIELDBITS) * BITFIELDBITS; - } + return is_valid_name(u->text,utf_end(u)); } +/******************** Function: class_new ************************************** -/************ Funktion: findnextcombination_set_unset ************************* + searches for the class with the specified name in the classes hashtable, + if there is no such class a new classinfo structure is created and inserted + into the list of classes to be loaded - funktioniert wie findnextsetbit, allerdings sucht diese Funktion - nach einer Stelle, an der gleichzeitig ein Bit im ersten - Bitfeld gesetzt ist und im zweiten Bitfeld das entsprechende - Bit nicht gesetzt ist. - -******************************************************************************/ +*******************************************************************************/ -static u4 findnextcombination_set_unset - (bitfieldtype *bitfield1, bitfieldtype* bitfield2, u4 fieldsize, u4 bitnumber) +classinfo *class_new_intern(utf *classname) { - bitfieldtype pattern; + classinfo *c; /* hashtable element */ + u4 key; /* hashkey computed from classname */ + u4 slot; /* slot in hashtable */ + u2 i; - for (;;) { - if (bitnumber >= fieldsize) return fieldsize; - - pattern = bitfield1[bitnumber/BITFIELDBITS] - & (~bitfield2[bitnumber/BITFIELDBITS]); - pattern >>= (bitnumber%BITFIELDBITS); - if (pattern) return bitnumber + lowest(pattern); + key = utf_hashkey(classname->text, classname->blength); + slot = key & (class_hash.size - 1); + c = class_hash.ptr[slot]; - bitnumber = ((bitnumber + BITFIELDBITS) / BITFIELDBITS) * BITFIELDBITS; + /* search external hash chain for the class */ + while (c) { + if (c->name->blength == classname->blength) { + for (i = 0; i < classname->blength; i++) + if (classname->text[i] != c->name->text[i]) goto nomatch; + + /* class found in hashtable */ + return c; } -} + + nomatch: + c = c->hashlink; /* next element in external chain */ + } + /* location in hashtable found, create new classinfo structure */ +#if defined(STATISTICS) + if (opt_stat) + count_class_infos += sizeof(classinfo); +#endif -/************** Funktion: memlist_init **************************************** + if (initverbose) { + char logtext[MAXLOGTEXT]; + sprintf(logtext, "Creating class: "); + utf_sprint_classname(logtext + strlen(logtext), classname); + log_text(logtext); + } - initialisiert die Freispeicherlisten (zum nachfolgenden Eintragen - mit memlist_addrange). + c = GCNEW(classinfo, 1); /*JOWENN: NEW*/ + /*c=NEW(classinfo);*/ + c->vmClass = 0; + c->flags = 0; + c->name = classname; + c->packagename = NULL; + c->cpcount = 0; + c->cptags = NULL; + c->cpinfos = NULL; + c->super = NULL; + c->sub = NULL; + c->nextsub = NULL; + c->interfacescount = 0; + c->interfaces = NULL; + c->fieldscount = 0; + c->fields = NULL; + c->methodscount = 0; + c->methods = NULL; + c->linked = false; + c->loaded = false; + c->index = 0; + c->instancesize = 0; + c->header.vftbl = NULL; + c->innerclasscount = 0; + c->innerclass = NULL; + c->vftbl = NULL; + c->initialized = false; + c->initializing = false; + c->classvftbl = false; + c->classUsed = 0; + c->impldBy = NULL; + c->classloader = NULL; + c->sourcefile = NULL; + + /* insert class into the hashtable */ + c->hashlink = class_hash.ptr[slot]; + class_hash.ptr[slot] = c; -******************************************************************************/ + /* update number of hashtable-entries */ + class_hash.entries++; -static void memlist_init () -{ - u4 i; - for (i=0; i (class_hash.size * 2)) { + /* reorganization of hashtable, average length of + the external chains is approx. 2 */ -/************** Funktion: memlist_addrange ************************************ + u4 i; + classinfo *c; + hashtable newhash; /* the new hashtable */ - f"ugt einen Bereich von Heap-Bl"ocken zur Freispeicherliste - hinzu (in die freien Heap-Bl"ocke werden dabei verschiedene - Verkettungszeiger hineingeschrieben). + /* create new hashtable, double the size */ + init_hashtable(&newhash, class_hash.size * 2); + newhash.entries = class_hash.entries; -******************************************************************************/ + /* transfer elements to new hashtable */ + for (i = 0; i < class_hash.size; i++) { + c = (classinfo *) class_hash.ptr[i]; + while (c) { + classinfo *nextc = c->hashlink; + u4 slot = (utf_hashkey(c->name->text, c->name->blength)) & (newhash.size - 1); + + c->hashlink = newhash.ptr[slot]; + newhash.ptr[slot] = c; -static void memlist_addrange (u4 freestart, u4 freesize) -{ - if (freesize>=DIFFERENTSIZES) { - bigmemarea *m = (bigmemarea*) (heap+freestart); - m -> next = bigmemlist; - m -> size = freesize; - bigmemlist = m; - } - else { - if (freesize*BLOCKSIZE>=sizeof(memarea)) { - memarea *m = (memarea*) (heap+freestart); - m -> next = memlist[freesize]; - memlist[freesize] = m; - setbit (memlistbits, freesize); + c = nextc; } } -} - - -/************** Funktion: memlist_getsuitable ********************************* - - sucht in der Freispeicherliste einen Speicherbereich, der m"oglichst - genau die gew"unschte L"ange hat. - Der Bereich wird dabei auf jeden Fall aus der Liste ausgetragen. - (Wenn der Bereich zu lang sein sollte, dann kann der Aufrufer den - Rest wieder mit 'memlist_addrange' in die Liste einh"angen) - - Return (in Referenzparametern): - *freestart: Anfang des freien Speichers - *freelength: L"ange dieses Speicherbereiches - - Wenn kein passender Speicherblock mehr gefunden werden kann, dann - wird in '*freelength' 0 hineingeschrieben. - -******************************************************************************/ - -static void memlist_getsuitable (u4 *freestart, u4 *freelength, u4 length) -{ - bigmemarea *m; - bigmemarea *prevm = NULL; - if (lengthnext; - if (!m->next) clearbit (memlistbits, firstfreelength); - *freestart = ((heapblock*) m) - heap; - *freelength = firstfreelength; - return; + /* Array classes need further initialization. */ + if (c->name->text[0] == '[') { + /* Array classes are not loaded from classfiles. */ + c->loaded = true; + class_new_array(c); + c->packagename = array_packagename; + + } else { + /* Find the package name */ + /* Classes in the unnamed package keep packagename == NULL. */ + char *p = utf_end(c->name) - 1; + char *start = c->name->text; + for (;p > start; --p) { + if (*p == '.') { + c->packagename = utf_new(start, p - start); + break; } } + } +#if defined(USE_THREADS) && defined(NATIVE_THREADS) + initObjectLock(&c->header); +#endif - m = bigmemlist; - while (m) { - if (m->size >= length) { - if (prevm) prevm->next = m->next; - else bigmemlist = m->next; - - *freestart = ((heapblock*) m) - heap; - *freelength = m->size; - return ; - } - - prevm = m; - m = m->next; - } - - *freelength=0; + return c; } - - - -/******************* Funktion: mark ******************************************* - - Markiert ein m"oglicherweise g"ultiges Objekt. - Sollte sich herausstellen, dass der Zeiger gar nicht auf ein Objekt - am Heap zeigt, dann wird eben gar nichts markiert. - -******************************************************************************/ - -static void markreferences (void **rstart, void **rend); - -static void mark (heapblock *obj) +classinfo *class_new(utf *classname) { - u4 blocknum,objectend; - - if ((long) obj & (BLOCKSIZE-1)) return; - - if (obj=(heap+topofheap) ) return; - - blocknum = obj-heap; - if ( isbitclear(startbits, blocknum) ) return; - if ( isbitset(markbits, blocknum) ) return; - - setbit (markbits, blocknum); - - if ( isbitclear(referencebits, blocknum) ) return; - - objectend = findnextsetbit (startbits, topofheap, blocknum+1); - markreferences ((void**)obj, (void**) (heap+objectend) ); -} + classinfo *c; +#if defined(USE_THREADS) && defined(NATIVE_THREADS) + tables_lock(); +#endif -/******************** Funktion: markreferences ******************************** + c = class_new_intern(classname); - Geht einen Speicherbereich durch, und markiert alle Objekte, auf - die von diesem Bereich aus irgendeine Referenz existiert. + /* we support eager class loading and linking on demand */ -******************************************************************************/ + if (opt_eager) { + classinfo *tc; + classinfo *tmp; -static void markreferences (void **rstart, void **rend) -{ - void **ptr; - - for (ptr=rstart; ptrloaded) { + if (!class_load(c)) { +#if defined(USE_THREADS) && defined(NATIVE_THREADS) + tables_unlock(); +#endif + return c; + } + } -/******************* Funktion: markstack ************************************** + /* link all referenced classes */ - Marks all objects that are referenced by the (C-)stacks of - all live threads. + tc = list_first(&unlinkedclasses); - (The stack-bottom is to be specified in the call to heap_init). + while (tc) { + /* skip the current loaded/linked class */ + if (tc != c) { + if (!class_link(tc)) { +#if defined(USE_THREADS) && defined(NATIVE_THREADS) + tables_unlock(); +#endif + return c; + } + } -******************************************************************************/ + /* we need a tmp variable here, because list_remove sets prev and + next to NULL */ + tmp = list_next(&unlinkedclasses, tc); + list_remove(&unlinkedclasses, tc); + tc = tmp; + } -static void markstack () /* schani */ -{ - void *dummy; - void **top_of_stack = &dummy; - -#ifdef USE_THREADS - thread *aThread; - - for (aThread = liveThreads; aThread != 0; aThread = CONTEXT(aThread).nextlive) { - mark((heapblock*)aThread); - if (aThread == currentThread) { - if (top_of_stack > (void**)CONTEXT(aThread).stackEnd) - markreferences ((void**)CONTEXT(aThread).stackEnd, top_of_stack); - else - markreferences (top_of_stack, (void**)CONTEXT(aThread).stackEnd); - } - else { - if (CONTEXT(aThread).usedStackTop > CONTEXT(aThread).stackEnd) - markreferences ((void**)CONTEXT(aThread).stackEnd, - (void**)CONTEXT(aThread).usedStackTop + 16); - else - markreferences ((void**)CONTEXT(aThread).usedStackTop - 16, - (void**)CONTEXT(aThread).stackEnd); + if (!c->linked) { + if (!class_link(c)) { +#if defined(USE_THREADS) && defined(NATIVE_THREADS) + tables_unlock(); +#endif + return c; } } + } - markreferences((void**)&threadQhead[0], (void**)&threadQhead[MAX_THREAD_PRIO]); -#else - if (top_of_stack > bottom_of_stack) - markreferences(bottom_of_stack, top_of_stack); - else - markreferences(top_of_stack, bottom_of_stack); +#if defined(USE_THREADS) && defined(NATIVE_THREADS) + tables_unlock(); #endif -} - - - -/**************** Funktion: searchlivefinalizees ***************************** - - geht die Liste aller Objekte durch, die noch finalisiert werden m"ussen - (livefinalizees), und tr"agt alle nicht mehr markierten in die - Liste deadfinalizess ein (allerdings werden sie vorher noch als - erreicht markiert, damit sie nicht jetzt gleich gel"oscht werden). - -*****************************************************************************/ - -static void searchlivefinalizees () -{ - finalizernode *fn = livefinalizees; - finalizernode *lastlive = NULL; - - while (fn) { - - /* alle zu finalisierenden Objekte, die nicht mehr markiert sind: */ - if (isbitclear (markbits, fn->objstart)) { - finalizernode *nextfn = fn->next; - - mark (heap + fn->objstart); - if (lastlive) lastlive -> next = nextfn; - else livefinalizees = nextfn; - - fn -> next = deadfinalizees; - deadfinalizees = fn; - - fn = nextfn; - } - else { - lastlive = fn; - fn = fn->next; - } - } + return c; } +/******************** Function: class_get ************************************** -/********************** Funktion: finalizedead ******************************* + searches for the class with the specified name in the classes hashtable + if there is no such class NULL is returned - ruft die 'finalize'-Methode aller Objekte in der 'deadfinalizees'- - Liste auf. - Achtung: Es kann hier eventuell zu neuerlichen Speicheranforderungen - (mit potentiell notwendigem GC) kommen, deshalb m"ussen manche - globalen Variablen in lokale Variblen kopiert werden - (Reentrant-F"ahigkeit!!!) - -******************************************************************************/ +*******************************************************************************/ -static void finalizedead() +classinfo *class_get(utf *classname) { - finalizernode *fn = deadfinalizees; - deadfinalizees = NULL; - - while (fn) { - finalizernode *nextfn = fn->next; - - asm_calljavamethod (fn->finalizer, heap+fn->objstart, NULL,NULL,NULL); - FREE (fn, finalizernode); - - fn = nextfn; + classinfo *c; /* hashtable element */ + u4 key; /* hashkey computed from classname */ + u4 slot; /* slot in hashtable */ + u2 i; + + key = utf_hashkey(classname->text, classname->blength); + slot = key & (class_hash.size-1); + c = class_hash.ptr[slot]; + + /* search external hash-chain */ + while (c) { + if (c->name->blength == classname->blength) { + /* compare classnames */ + for (i = 0; i < classname->blength; i++) + if (classname->text[i] != c->name->text[i]) + goto nomatch; + + /* class found in hashtable */ + return c; } + + nomatch: + c = c->hashlink; + } + /* class not found */ + return NULL; } +/* class_remove **************************************************************** -/****************** Funktion: heap_docollect ********************************** - - F"uhrt eine vollst"andige Garbage Collection durch - -******************************************************************************/ - -static void heap_docollect () -{ - u4 freestart,freeend; - void **p; - bitfieldtype *dummy; - u4 heapfreemem; - u4 oldfillgrade; - - - if (runverbose) { - fprintf(stderr, "doing garbage collection\n"); - } - /* alle Markierungsbits l"oschen */ - clearbitfield (markbits, topofheap); + removes the class entry wth the specified name in the classes hashtable, + furthermore the class' resources are freed + if there is no such class false is returned - /* Alle vom Stack referenzierten Objekte markieren */ - asm_dumpregistersandcall (markstack); - - /* Alle von globalen Variablen erreichbaren Objekte markieren */ - p = chain_first (allglobalreferences); - while (p) { - mark (*p); - p = chain_next (allglobalreferences); - } - - /* alle Objekte durchsehen, die eine finalizer-Methode haben */ - searchlivefinalizees(); - - /* alle Reference-Bits l"oschen, deren Objekte nicht */ - /* mehr erreichbar sind */ - maskfieldwithfield (referencebits, markbits, topofheap); - - - /* Freispeicherliste initialisieren */ - memlist_init (); - - - /* Heap schrittweise durchgehen, und alle freien Bl"ocke merken */ - - heapfreemem = 0; - freeend=0; - for (;;) { - /* Anfang des n"achsten freien Bereiches suchen */ - freestart = findnextcombination_set_unset - (startbits, markbits, topofheap, freeend); - - /* Wenn es keinen freien Bereich mehr gibt -> fertig */ - if (freestart>=topofheap) goto freememfinished; - - /* Anfang des freien Bereiches markieren */ - setbit (markbits, freestart); - - /* Ende des freien Bereiches suchen */ - freeend = findnextsetbit (markbits, topofheap, freestart+1); - - /* Freien Bereich in Freispeicherliste einh"angen */ - memlist_addrange (freestart, freeend-freestart); - - /* Menge des freien Speichers mitnotieren */ - heapfreemem += (freeend-freestart); - } +*******************************************************************************/ - freememfinished: +bool class_remove(classinfo *c) +{ + classinfo *tc; /* hashtable element */ + classinfo *pc; + u4 key; /* hashkey computed from classname */ + u4 slot; /* slot in hashtable */ + u2 i; + + key = utf_hashkey(c->name->text, c->name->blength); + slot = key & (class_hash.size - 1); + tc = class_hash.ptr[slot]; + pc = NULL; + + /* search external hash-chain */ + while (tc) { + if (tc->name->blength == c->name->blength) { + + /* compare classnames */ + for (i = 0; i < c->name->blength; i++) + if (tc->name->text[i] != c->name->text[i]) + goto nomatch; - /* Die Rollen von markbits und startbits vertauschen */ - dummy = markbits; - markbits = startbits; - startbits = dummy; + /* class found in hashtable */ + if (!pc) { + class_hash.ptr[slot] = tc->hashlink; + } else { + pc->hashlink = tc->hashlink; + } - /* Threashold-Wert f"ur n"achstes Collect */ - oldfillgrade = heapfillgrade; - heapfillgrade = topofheap - heapfreemem; - collectthreashold = heapfillgrade*3; /* eine nette Heuristik */ + class_free(tc); - /* Eventuell eine Meldung ausgeben */ - if (collectverbose) { - sprintf (logtext, "Garbage Collection: previous/now = %d / %d ", - (int) (oldfillgrade * BLOCKSIZE), - (int) (heapfillgrade * BLOCKSIZE) ); - dolog (); + return true; } + + nomatch: + pc = tc; + tc = tc->hashlink; + } - - - /* alle gerade unerreichbar gewordenen Objekte mit - finalize-Methoden jetzt finalisieren. - Achtung: Diese Funktion kann zu neuerlichem GC f"uhren!! */ - finalizedead (); - + /* class not found */ + return false; } -/************************* Function: gc_init ********************************** - Initializes the garbage collection thread mechanism. - -******************************************************************************/ +/***************** Function: class_array_of *********************************** -#ifdef USE_THREADS -iMux gcStartMutex; -iMux gcThreadMutex; -iCv gcConditionStart; -iCv gcConditionDone; + Returns an array class with the given component class. + The array class is dynamically created if neccessary. -void -gc_init (void) -{ - gcStartMutex.holder = 0; - gcStartMutex.count = 0; - gcStartMutex.muxWaiters = 0; - - gcThreadMutex.holder = 0; - gcThreadMutex.count = 0; - gcThreadMutex.muxWaiters = 0; - - gcConditionStart.cvWaiters = 0; - gcConditionStart.mux = 0; - - gcConditionDone.cvWaiters = 0; - gcConditionDone.mux = 0; -} -#else -void -gc_init (void) -{ -} -#endif - -/************************* Function: gc_thread ******************************** - - In an endless loop waits for a condition to be posted, then - garbage collects the heap. - -******************************************************************************/ +*******************************************************************************/ -#ifdef USE_THREADS -void -gc_thread (void) +classinfo *class_array_of(classinfo *component) { - intsRestore(); /* all threads start with interrupts disabled */ - - assert(blockInts == 0); - - lock_mutex(&gcThreadMutex); - - for (;;) - { - wait_cond(&gcThreadMutex, &gcConditionStart, 0); - - intsDisable(); - heap_docollect(); - intsRestore(); - - signal_cond(&gcConditionDone); + int namelen; + char *namebuf; + classinfo *c; + + /* Assemble the array class name */ + namelen = component->name->blength; + + if (component->name->text[0] == '[') { + /* the component is itself an array */ + namebuf = DMNEW(char, namelen + 1); + namebuf[0] = '['; + memcpy(namebuf + 1, component->name->text, namelen); + namelen++; + + } else { + /* the component is a non-array class */ + namebuf = DMNEW(char, namelen + 3); + namebuf[0] = '['; + namebuf[1] = 'L'; + memcpy(namebuf + 2, component->name->text, namelen); + namebuf[2 + namelen] = ';'; + namelen += 3; } -} -#endif - -void -gc_call (void) -{ -#ifdef USE_THREADS - lock_mutex(&gcThreadMutex); - - signal_cond(&gcConditionStart); - wait_cond(&gcThreadMutex, &gcConditionDone, 0); - unlock_mutex(&gcThreadMutex); -#else - heap_docollect(); -#endif -} - - -/************************* Funktion: heap_init ******************************** - - Initialisiert den Garbage Collector. - Parameter: - heapbytesize: Maximale Gr"osse des Heap (in Bytes) - heapbytestartsize: Gr"osse des Heaps, bei dem zum ersten mal eine - GC durchgef"uhrt werden soll. - stackbottom: Ein Zeiger auf die Untergrenze des Stacks, im dem - Referenzen auf Objekte stehen k"onnen. - -******************************************************************************/ - -void heap_init (u4 heapbytesize, u4 heapbytestartsize, void **stackbottom) -{ + /* load this class ;-) and link it */ + c = class_new(utf_new(namebuf, namelen)); + c->loaded = 1; + class_link(c); -#ifdef TRACECALLARGS - - heapsize = ALIGN (heapbytesize, getpagesize()); - heapsize = heapsize/BLOCKSIZE; - heap = (void*) mmap ((void*) 0x10000000, (size_t) (heapsize * BLOCKSIZE), - PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, (off_t) 0); - if (heap != (void *) 0x10000000) { - perror("mmap"); - printf("address = %p\n", heap); - panic("mmap failed\n"); - } - -#else - - heapsize = ALIGN (heapbytesize, 1024); - heapsize = heapsize/BLOCKSIZE; - heap = MNEW (heapblock, heapsize); - -#endif - - topofheap = 0; - startbits = MNEW (bitfieldtype, heapsize/BITFIELDBITS); - markbits = MNEW (bitfieldtype, heapsize/BITFIELDBITS); - referencebits = MNEW (bitfieldtype, heapsize/BITFIELDBITS); - clearbitfield (startbits, heapsize); - clearbitfield (markbits, heapsize); - clearbitfield (referencebits, heapsize); - - heapfillgrade = 0; - collectthreashold = heapbytestartsize/BLOCKSIZE; - allglobalreferences = chain_new (); - bottom_of_stack = stackbottom; - - livefinalizees = NULL; - deadfinalizees = NULL; -} - - -/****************** Funktion: heap_close ************************************** - - Gibt allen ben"otigten Speicher frei - -******************************************************************************/ - -void heap_close () -{ -#ifndef TRACECALLARGS - MFREE (heap, heapblock, heapsize); */ -#endif - MFREE (startbits, bitfieldtype, heapsize/BITFIELDBITS); - MFREE (markbits, bitfieldtype, heapsize/BITFIELDBITS); - MFREE (referencebits, bitfieldtype, heapsize/BITFIELDBITS); - chain_free (allglobalreferences); - - while (livefinalizees) { - finalizernode *n = livefinalizees->next; - FREE (livefinalizees, finalizernode); - livefinalizees = n; - } + return c; } +/*************** Function: class_multiarray_of ******************************** -/***************** Funktion: heap_addreference ******************************** + Returns an array class with the given dimension and element class. + The array class is dynamically created if neccessary. - Teilt dem GC eine Speicherstelle mit, an der eine globale Referenz - auf Objekte gespeichert sein kann. - -******************************************************************************/ +*******************************************************************************/ -void heap_addreference (void **reflocation) +classinfo *class_multiarray_of(int dim, classinfo *element) { - intsDisable(); /* schani */ - - chain_addlast (allglobalreferences, reflocation); + int namelen; + char *namebuf; + + if (dim < 1) + panic("Invalid array dimension requested"); + + /* Assemble the array class name */ + namelen = element->name->blength; + + if (element->name->text[0] == '[') { + /* the element is itself an array */ + namebuf = DMNEW(char, namelen + dim); + memcpy(namebuf + dim, element->name->text, namelen); + namelen += dim; + } + else { + /* the element is a non-array class */ + namebuf = DMNEW(char, namelen + 2 + dim); + namebuf[dim] = 'L'; + memcpy(namebuf + dim + 1, element->name->text, namelen); + namelen += (2 + dim); + namebuf[namelen - 1] = ';'; + } + memset(namebuf, '[', dim); - intsRestore(); /* schani */ + return class_new(utf_new(namebuf, namelen)); } +/************************** function: utf_strlen ****************************** + determine number of unicode characters in the utf string -/***************** Funktion: heap_allocate ************************************ - - Fordert einen Speicher vom Heap an, der eine gew"unschte Gr"osse - (in Byte angegeben) haben muss. - Wenn kein Speicher mehr vorhanden ist (auch nicht nach einem - Garbage Collection-Durchlauf), dann wird NULL zur"uckgegeben. - Zus"atzlich wird durch den Parameter 'references' angegeben, ob - in das angelegte Objekt Referenzen auf andere Objekte eingetragen - werden k"onnten. - (Wichtig wegen Beschleunigung des Mark&Sweep-Durchlauf) - Im Zweifelsfalle immer references=true verwenden. - -******************************************************************************/ +*******************************************************************************/ -void *heap_allocate (u4 bytelength, bool references, methodinfo *finalizer) +u4 utf_strlen(utf *u) { - u4 freestart,freelength; - u4 length = ALIGN(bytelength, BLOCKSIZE) / BLOCKSIZE; - - intsDisable(); /* schani */ - - memlist_getsuitable (&freestart, &freelength, length); + char *endpos; /* points behind utf string */ + char *utf_ptr; /* current position in utf text */ + u4 len = 0; /* number of unicode characters */ -onemoretry: - if (!freelength) { - if ( (topofheap+length > collectthreashold) - || (topofheap+length > heapsize) ) { - - intsRestore(); - gc_call(); - intsDisable(); - - memlist_getsuitable (&freestart, &freelength, length); - if (freelength) goto onemoretry; - } - - if (topofheap+length > heapsize) { - sprintf (logtext, "Heap-Allocation failed for %d bytes", - (int) bytelength); - dolog(); - intsRestore(); /* schani */ - return NULL; - } - - freestart = topofheap; - freelength = length; - setbit (startbits, topofheap); - topofheap += length; - } - else { - if (freelength>length) { - setbit (startbits, freestart+length); - memlist_addrange (freestart+length, freelength-length); - } - } - - if (references) setbit (referencebits, freestart); + if (!u) { + *exceptionptr = new_nullpointerexception(); + return 0; + } - heapfillgrade += length; + endpos = utf_end(u); + utf_ptr = u->text; - if (finalizer) { - finalizernode *n = NEW(finalizernode); - n -> next = livefinalizees; - n -> objstart = freestart; - n -> finalizer = finalizer; - livefinalizees = n; - } - - intsRestore(); /* schani */ + while (utf_ptr < endpos) { + len++; + /* next unicode character */ + utf_nextu2(&utf_ptr); + } - if (runverbose) { - sprintf (logtext, "new returns: %lx", (long) (heap + freestart)); - dolog (); - } + if (utf_ptr != endpos) + /* string ended abruptly */ + panic("illegal utf string"); - return (void*) (heap + freestart); + return len; } - - +/* + * These are local overrides for various environment variables in Emacs. + * Please do not remove this and leave it at the end of the file, where + * Emacs will automagically detect them. + * --------------------------------------------------------------------- + * Local variables: + * mode: c + * indent-tabs-mode: t + * c-basic-offset: 4 + * tab-width: 4 + * End: + */