Moved string stuff into string.[ch], utf into utf8.[ch] and class stuff

author twisti <none@none>

Thu, 10 Feb 2005 10:54:28 +0000 (10:54 +0000)

committer twisti <none@none>

Thu, 10 Feb 2005 10:54:28 +0000 (10:54 +0000)
author twisti <none@none>
Thu, 10 Feb 2005 10:54:28 +0000 (10:54 +0000)
committer twisti <none@none>
Thu, 10 Feb 2005 10:54:28 +0000 (10:54 +0000)
diff --git a/src/vm/tables.c b/src/vm/tables.c

index 0bbde3683e9fcd6268eeaef3baae39e6f99ac6bb..71e18b637e4447964830be7237ca300dc9b5deb1 100644 (file)
--- a/src/vm/tables.c
+++ b/src/vm/tables.c
@@ -35,10 +35,11 @@
         - the heap
         - additional support functions
  
-   $Id: tables.c 1843 2005-01-04 11:21:02Z twisti $
+   $Id: tables.c 1930 2005-02-10 10:54:28Z twisti $
  
  */
  
+
  #include <string.h>
  #include <stdlib.h>
  #include <assert.h>
@@ -56,14 +57,11 @@
  #include "vm/loader.h"
  #include "vm/options.h"
  #include "vm/statistics.h"
+#include "vm/stringlocal.h"
  #include "vm/tables.h"
  
  
-hashtable utf_hash;     /* hashtable for utf8-symbols */
  hashtable string_hash;  /* hashtable for javastrings  */
-hashtable class_hash;   /* hashtable for classes      */
-
-list unlinkedclasses;   /* this is only used for eager class loading          */
  
  
  /******************************************************************************
@@ -162,609 +160,6 @@ void tables_close()
  }
  
  
-/********************* function: utf_display *********************************
-
-       write utf symbol to stdout (debugging purposes)
-
-*******************************************************************************/
-
-void utf_display(utf *u)
-{
-    char *endpos;                       /* points behind utf string           */
-    char *utf_ptr;                      /* current position in utf text       */
-
-       if (!u) {
-               printf("NULL");
-               fflush(stdout);
-               return;
-       }
-
-    endpos = utf_end(u);
-    utf_ptr = u->text;
-
-    while (utf_ptr < endpos) {
-               /* read next unicode character */                
-               u2 c = utf_nextu2(&utf_ptr);
-               if (c >= 32 && c <= 127) printf("%c", c);
-               else printf("?");
-       }
-
-       fflush(stdout);
-}
-
-
-/* utf_display_classname *******************************************************
-
-   write utf symbol to stdout (debugging purposes)
-
-*******************************************************************************/
-
-void utf_display_classname(utf *u)
-{
-    char *endpos;                       /* points behind utf string           */
-    char *utf_ptr;                      /* current position in utf text       */
-
-       if (!u) {
-               printf("NULL");
-               fflush(stdout);
-               return;
-       }
-
-    endpos = utf_end(u);
-    utf_ptr = u->text;
-
-    while (utf_ptr < endpos) {
-               /* read next unicode character */                
-               u2 c = utf_nextu2(&utf_ptr);
-               if (c == '/') c = '.';
-               if (c >= 32 && c <= 127) printf("%c", c);
-               else printf("?");
-       }
-
-       fflush(stdout);
-}
-
-
-/************************* function: log_utf *********************************
-
-       log utf symbol
-
-******************************************************************************/
-
-void log_utf(utf *u)
-{
-       char buf[MAXLOGTEXT];
-       utf_sprint(buf, u);
-       dolog("%s", buf);
-}
-
-
-/********************** function: log_plain_utf ******************************
-
-       log utf symbol (without printing "LOG: " and newline)
-
-******************************************************************************/
-
-void log_plain_utf(utf *u)
-{
-       char buf[MAXLOGTEXT];
-       utf_sprint(buf, u);
-       dolog_plain("%s", buf);
-}
-
-
-/* utf_sprint ******************************************************************
-       
-   write utf symbol into c-string (debugging purposes)
-
-*******************************************************************************/
-
-void utf_sprint(char *buffer, utf *u)
-{
-    char *endpos;                       /* points behind utf string           */
-    char *utf_ptr;                      /* current position in utf text       */
-    u2 pos = 0;                         /* position in c-string               */
-
-       if (!u) {
-               memcpy(buffer, "NULL", 5);      /* 4 chars + terminating \0           */
-               return;
-       }
-
-    endpos = utf_end(u);
-    utf_ptr = u->text;
-
-    while (utf_ptr < endpos) 
-               /* copy next unicode character */       
-               buffer[pos++] = utf_nextu2(&utf_ptr);
-
-    /* terminate string */
-    buffer[pos] = '\0';
-}
-
-
-/* utf_sprint_classname ********************************************************
-       
-   write utf symbol into c-string (debugging purposes)
-
-*******************************************************************************/
-
-void utf_sprint_classname(char *buffer, utf *u)
-{
-    char *endpos;                       /* points behind utf string           */
-    char *utf_ptr;                      /* current position in utf text       */
-    u2 pos = 0;                         /* position in c-string               */
-
-       if (!u) {
-               memcpy(buffer, "NULL", 5);      /* 4 chars + terminating \0           */
-               return;
-       }
-
-    endpos = utf_end(u);
-    utf_ptr = u->text;
-
-    while (utf_ptr < endpos) {
-               /* copy next unicode character */       
-               u2 c = utf_nextu2(&utf_ptr);
-               if (c == '/') c = '.';
-               buffer[pos++] = c;
-       }
-
-    /* terminate string */
-    buffer[pos] = '\0';
-}
-
-
-/********************* Funktion: utf_fprint **********************************
-       
-    write utf symbol into file         
-
-******************************************************************************/
-
-void utf_fprint(FILE *file, utf *u)
-{
-    char *endpos  = utf_end(u);  /* points behind utf string       */
-    char *utf_ptr = u->text;     /* current position in utf text   */ 
-
-    if (!u)
-               return;
-
-    while (utf_ptr < endpos) { 
-               /* read next unicode character */                
-               u2 c = utf_nextu2(&utf_ptr);                            
-
-               if (c >= 32 && c <= 127) fprintf(file, "%c", c);
-               else fprintf(file, "?");
-       }
-}
-
-
-/********************* Funktion: utf_fprint **********************************
-       
-    write utf symbol into file         
-
-******************************************************************************/
-
-void utf_fprint_classname(FILE *file, utf *u)
-{
-    char *endpos  = utf_end(u);  /* points behind utf string       */
-    char *utf_ptr = u->text;     /* current position in utf text   */ 
-
-    if (!u)
-               return;
-
-    while (utf_ptr < endpos) { 
-               /* read next unicode character */                
-               u2 c = utf_nextu2(&utf_ptr);                            
-               if (c == '/') c = '.';
-
-               if (c >= 32 && c <= 127) fprintf(file, "%c", c);
-               else fprintf(file, "?");
-       }
-}
-
-
-/****************** internal function: utf_hashkey *****************************
-
-       The hashkey is computed from the utf-text by using up to 8 characters.
-       For utf-symbols longer than 15 characters 3 characters are taken from
-       the beginning and the end, 2 characters are taken from the middle.
-
-*******************************************************************************/
-
-#define nbs(val) ((u4) *(++text) << val) /* get next byte, left shift by val  */
-#define fbs(val) ((u4) *(  text) << val) /* get first byte, left shift by val */
-
-static u4 utf_hashkey(const char *text, u4 length)
-{
-       const char *start_pos = text;       /* pointer to utf text                */
-       u4 a;
-
-       switch (length) {               
-               
-       case 0: /* empty string */
-               return 0;
-
-       case 1: return fbs(0);
-       case 2: return fbs(0) ^ nbs(3);
-       case 3: return fbs(0) ^ nbs(3) ^ nbs(5);
-       case 4: return fbs(0) ^ nbs(2) ^ nbs(4) ^ nbs(6);
-       case 5: return fbs(0) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(6);
-       case 6: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(5) ^ nbs(6);
-       case 7: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(5) ^ nbs(6);
-       case 8: return fbs(0) ^ nbs(1) ^ nbs(2) ^ nbs(3) ^ nbs(4) ^ nbs(5) ^ nbs(6) ^ nbs(7);
-
-       case 9:
-               a = fbs(0);
-               a ^= nbs(1);
-               a ^= nbs(2);
-               text++;
-               return a ^ nbs(4) ^ nbs(5) ^ nbs(6) ^ nbs(7) ^ nbs(8);
-
-       case 10:
-               a = fbs(0);
-               text++;
-               a ^= nbs(2);
-               a ^= nbs(3);
-               a ^= nbs(4);
-               text++;
-               return a ^ nbs(6) ^ nbs(7) ^ nbs(8) ^ nbs(9);
-
-       case 11:
-               a = fbs(0);
-               text++;
-               a ^= nbs(2);
-               a ^= nbs(3);
-               a ^= nbs(4);
-               text++;
-               return a ^ nbs(6) ^ nbs(7) ^ nbs(8) ^ nbs(9) ^ nbs(10);
-
-       case 12:
-               a = fbs(0);
-               text += 2;
-               a ^= nbs(2);
-               a ^= nbs(3);
-               text++;
-               a ^= nbs(5);
-               a ^= nbs(6);
-               a ^= nbs(7);
-               text++;
-               return a ^ nbs(9) ^ nbs(10);
-
-       case 13:
-               a = fbs(0);
-               a ^= nbs(1);
-               text++;
-               a ^= nbs(3);
-               a ^= nbs(4);
-               text += 2;      
-               a ^= nbs(7);
-               a ^= nbs(8);
-               text += 2;
-               return a ^ nbs(9) ^ nbs(10);
-
-       case 14:
-               a = fbs(0);
-               text += 2;      
-               a ^= nbs(3);
-               a ^= nbs(4);
-               text += 2;      
-               a ^= nbs(7);
-               a ^= nbs(8);
-               text += 2;
-               return a ^ nbs(9) ^ nbs(10) ^ nbs(11);
-
-       case 15:
-               a = fbs(0);
-               text += 2;      
-               a ^= nbs(3);
-               a ^= nbs(4);
-               text += 2;      
-               a ^= nbs(7);
-               a ^= nbs(8);
-               text += 2;
-               return a ^ nbs(9) ^ nbs(10) ^ nbs(11);
-
-       default:  /* 3 characters from beginning */
-               a = fbs(0);
-               text += 2;
-               a ^= nbs(3);
-               a ^= nbs(4);
-
-               /* 2 characters from middle */
-               text = start_pos + (length / 2);
-               a ^= fbs(5);
-               text += 2;
-               a ^= nbs(6);    
-
-               /* 3 characters from end */
-               text = start_pos + length - 4;
-
-               a ^= fbs(7);
-               text++;
-
-               return a ^ nbs(10) ^ nbs(11);
-    }
-}
-
-
-/*************************** function: utf_hashkey ***************************
-
-    compute the hashkey of a unicode string
-
-******************************************************************************/ 
-
-u4 unicode_hashkey(u2 *text, u2 len)
-{
-       return utf_hashkey((char*) text, len);
-}
-
-
-/************************ function: utf_new **********************************
-
-       Creates a new utf-symbol, the text of the symbol is passed as a 
-       u1-array. The function searches the utf-hashtable for a utf-symbol 
-       with this text. On success the element returned, otherwise a new 
-       hashtable element is created.
-
-       If the number of entries in the hashtable exceeds twice the size of the
-       hashtable slots a reorganization of the hashtable is done and the utf 
-       symbols are copied to a new hashtable with doubled size.
-
-******************************************************************************/
-
-utf *utf_new_intern(const char *text, u2 length);
-
-utf *utf_new(const char *text, u2 length)
-{
-    utf *r;
-
-#if defined(USE_THREADS) && defined(NATIVE_THREADS)
-    tables_lock();
-#endif
-
-    r = utf_new_intern(text, length);
-
-#if defined(USE_THREADS) && defined(NATIVE_THREADS)
-    tables_unlock();
-#endif
-
-    return r;
-}
-
-
-utf *utf_new_intern(const char *text, u2 length)
-{
-       u4 key;            /* hashkey computed from utf-text */
-       u4 slot;           /* slot in hashtable */
-       utf *u;            /* hashtable element */
-       u2 i;
-
-#ifdef STATISTICS
-       if (opt_stat)
-               count_utf_new++;
-#endif
-
-       key  = utf_hashkey(text, length);
-       slot = key & (utf_hash.size-1);
-       u    = utf_hash.ptr[slot];
-
-       /* search external hash chain for utf-symbol */
-       while (u) {
-               if (u->blength == length) {
-
-                       /* compare text of hashtable elements */
-                       for (i = 0; i < length; i++)
-                               if (text[i] != u->text[i]) goto nomatch;
-                       
-#ifdef STATISTICS
-                       if (opt_stat)
-                               count_utf_new_found++;
-#endif
-/*                     log_text("symbol found in hash table");*/
-                       /* symbol found in hashtable */
-/*                                     utf_display(u);
-                                       {
-                                               utf blup;
-                                               blup.blength=length;
-                                               blup.text=text;
-                                               utf_display(&blup);
-                                       }*/
-                       return u;
-               }
-       nomatch:
-               u = u->hashlink; /* next element in external chain */
-       }
-
-#ifdef STATISTICS
-       if (opt_stat)
-               count_utf_len += sizeof(utf) + length;
-#endif
-
-       /* location in hashtable found, create new utf element */
-       u = NEW(utf);
-       u->blength  = length;               /* length in bytes of utfstring       */
-       u->hashlink = utf_hash.ptr[slot];   /* link in external hashchain         */
-       u->text     = mem_alloc(length + 1);/* allocate memory for utf-text       */
-       memcpy(u->text, text, length);      /* copy utf-text                      */
-       u->text[length] = '\0';
-       utf_hash.ptr[slot] = u;             /* insert symbol into table           */
-
-       utf_hash.entries++;                 /* update number of entries           */
-
-       if (utf_hash.entries > (utf_hash.size * 2)) {
-
-        /* reorganization of hashtable, average length of 
-           the external chains is approx. 2                */  
-
-               u4 i;
-               utf *u;
-               hashtable newhash; /* the new hashtable */
-
-               /* create new hashtable, double the size */
-               init_hashtable(&newhash, utf_hash.size * 2);
-               newhash.entries = utf_hash.entries;
-
-#ifdef STATISTICS
-               if (opt_stat)
-                       count_utf_len += sizeof(utf*) * utf_hash.size;
-#endif
-
-               /* transfer elements to new hashtable */
-               for (i = 0; i < utf_hash.size; i++) {
-                       u = (utf *) utf_hash.ptr[i];
-                       while (u) {
-                               utf *nextu = u->hashlink;
-                               u4 slot = utf_hashkey(u->text, u->blength) & (newhash.size - 1);
-                                               
-                               u->hashlink = (utf *) newhash.ptr[slot];
-                               newhash.ptr[slot] = u;
-
-                               /* follow link in external hash chain */
-                               u = nextu;
-                       }
-               }
-       
-               /* dispose old table */
-               MFREE(utf_hash.ptr, void*, utf_hash.size);
-               utf_hash = newhash;
-       }
-
-       return u;
-}
-
-
-/********************* function: utf_new_char ********************************
-
-    creates a new utf symbol, the text for this symbol is passed
-    as a c-string ( = char* )
-
-******************************************************************************/
-
-utf *utf_new_char(const char *text)
-{
-       return utf_new(text, strlen(text));
-}
-
-
-/********************* function: utf_new_char ********************************
-
-    creates a new utf symbol, the text for this symbol is passed
-    as a c-string ( = char* )
-    "." characters are going to be replaced by "/". since the above function is
-    used often, this is a separte function, instead of an if
-
-******************************************************************************/
-
-utf *utf_new_char_classname(const char *text)
-{
-       if (strchr(text, '.')) {
-               char *txt = strdup(text);
-               char *end = txt + strlen(txt);
-               char *c;
-               utf *tmpRes;
-               for (c = txt; c < end; c++)
-                       if (*c == '.') *c = '/';
-               tmpRes = utf_new(txt, strlen(txt));
-               free(txt);
-               return tmpRes;
-
-       } else
-               return utf_new(text, strlen(text));
-}
-
-
-/************************** Funktion: utf_show ******************************
-
-    writes the utf symbols in the utfhash to stdout and
-    displays the number of external hash chains grouped 
-    according to the chainlength
-    (debugging purposes)
-
-*****************************************************************************/
-
-void utf_show(void)
-{
-
-#define CHAIN_LIMIT 20               /* limit for seperated enumeration */
-
-       u4 chain_count[CHAIN_LIMIT]; /* numbers of chains */
-       u4 max_chainlength = 0;      /* maximum length of the chains */
-       u4 sum_chainlength = 0;      /* sum of the chainlengths */
-       u4 beyond_limit = 0;         /* number of utf-symbols in chains with length>=CHAIN_LIMIT-1 */
-       u4 i;
-
-       printf ("UTF-HASH:\n");
-
-       /* show element of utf-hashtable */
-       for (i=0; i<utf_hash.size; i++) {
-               utf *u = utf_hash.ptr[i];
-               if (u) {
-                       printf ("SLOT %d: ", (int) i);
-                       while (u) {
-                               printf ("'");
-                               utf_display (u);
-                               printf ("' ");
-                               u = u->hashlink;
-                       }       
-                       printf ("\n");
-               }
-               
-       }
-
-       printf ("UTF-HASH: %d slots for %d entries\n", 
-                       (int) utf_hash.size, (int) utf_hash.entries );
-
-
-       if (utf_hash.entries == 0)
-               return;
-
-       printf("chains:\n  chainlength    number of chains    %% of utfstrings\n");
-
-       for (i=0;i<CHAIN_LIMIT;i++)
-               chain_count[i]=0;
-
-       /* count numbers of hashchains according to their length */
-       for (i=0; i<utf_hash.size; i++) {
-                 
-               utf *u = (utf*) utf_hash.ptr[i];
-               u4 chain_length = 0;
-
-               /* determine chainlength */
-               while (u) {
-                       u = u->hashlink;
-                       chain_length++;
-               }
-
-               /* update sum of all chainlengths */
-               sum_chainlength+=chain_length;
-
-               /* determine the maximum length of the chains */
-               if (chain_length>max_chainlength)
-                       max_chainlength = chain_length;
-
-               /* update number of utf-symbols in chains with length>=CHAIN_LIMIT-1 */
-               if (chain_length>=CHAIN_LIMIT) {
-                       beyond_limit+=chain_length;
-                       chain_length=CHAIN_LIMIT-1;
-               }
-
-               /* update number of hashchains of current length */
-               chain_count[chain_length]++;
-       }
-
-       /* display results */  
-       for (i=1;i<CHAIN_LIMIT-1;i++) 
-               printf("       %2d %17d %18.2f%%\n",i,chain_count[i],(((float) chain_count[i]*i*100)/utf_hash.entries));
-         
-       printf("     >=%2d %17d %18.2f%%\n",CHAIN_LIMIT-1,chain_count[CHAIN_LIMIT-1],((float) beyond_limit*100)/utf_hash.entries);
-
-
-       printf("max. chainlength:%5d\n",max_chainlength);
-
-       /* avg. chainlength = sum of chainlengths / number of chains */
-       printf("avg. chainlength:%5.2f\n",(float) sum_chainlength / (utf_hash.size-chain_count[0]));
-}
-
  /******************************************************************************
  *********************** Misc support functions ********************************
  ******************************************************************************/
@@ -825,581 +220,6 @@ u2 desc_typesize(utf *descriptor)
  }
  
  
-/********************** function: utf_nextu2 *********************************
-
-    read the next unicode character from the utf string and
-    increment the utf-string pointer accordingly
-
-******************************************************************************/
-
-u2 utf_nextu2(char **utf_ptr) 
-{
-    /* uncompressed unicode character */
-    u2 unicode_char = 0;
-    /* current position in utf text */ 
-    unsigned char *utf = (unsigned char *) (*utf_ptr);
-    /* bytes representing the unicode character */
-    unsigned char ch1, ch2, ch3;
-    /* number of bytes used to represent the unicode character */
-    int len = 0;
-       
-    switch ((ch1 = utf[0]) >> 4) {
-       default: /* 1 byte */
-               (*utf_ptr)++;
-               return (u2) ch1;
-       case 0xC: 
-       case 0xD: /* 2 bytes */
-               if (((ch2 = utf[1]) & 0xC0) == 0x80) {
-                       unsigned char high = ch1 & 0x1F;
-                       unsigned char low  = ch2 & 0x3F;
-                       unicode_char = (high << 6) + low;
-                       len = 2;
-               }
-               break;
-
-       case 0xE: /* 2 or 3 bytes */
-               if (((ch2 = utf[1]) & 0xC0) == 0x80) {
-                       if (((ch3 = utf[2]) & 0xC0) == 0x80) {
-                               unsigned char low  = ch3 & 0x3f;
-                               unsigned char mid  = ch2 & 0x3f;
-                               unsigned char high = ch1 & 0x0f;
-                               unicode_char = (((high << 6) + mid) << 6) + low;
-                               len = 3;
-                       } else
-                               len = 2;                                           
-               }
-               break;
-    }
-
-    /* update position in utf-text */
-    *utf_ptr = (char *) (utf + len);
-    return unicode_char;
-}
-
-
-/********************* function: is_valid_utf ********************************
-
-    return true if the given string is a valid UTF-8 string
-
-    utf_ptr...points to first character
-    end_pos...points after last character
-
-******************************************************************************/
-
-static unsigned long min_codepoint[6] = {0,1L<<7,1L<<11,1L<<16,1L<<21,1L<<26};
-
-bool
-is_valid_utf(char *utf_ptr,char *end_pos)
-{
-       int bytes;
-       int len,i;
-       char c;
-       unsigned long v;
-
-       if (end_pos < utf_ptr) return false;
-       bytes = end_pos - utf_ptr;
-       while (bytes--) {
-               c = *utf_ptr++;
-               /*dolog("%c %02x",c,c);*/
-               if (!c) return false;                     /* 0x00 is not allowed */
-               if ((c & 0x80) == 0) continue;            /* ASCII */
-
-               if      ((c & 0xe0) == 0xc0) len = 1;     /* 110x xxxx */
-               else if ((c & 0xf0) == 0xe0) len = 2;     /* 1110 xxxx */
-               else if ((c & 0xf8) == 0xf0) len = 3;     /* 1111 0xxx */
-               else if ((c & 0xfc) == 0xf8) len = 4;     /* 1111 10xx */
-               else if ((c & 0xfe) == 0xfc) len = 5;     /* 1111 110x */
-               else return false;                        /* invalid leading byte */
-
-               if (len > 2) return false;                /* Java limitation */
-
-               v = (unsigned long)c & (0x3f >> len);
-               
-               if ((bytes -= len) < 0) return false;     /* missing bytes */
-
-               for (i = len; i--; ) {
-                       c = *utf_ptr++;
-                       /*dolog("    %c %02x",c,c);*/
-                       if ((c & 0xc0) != 0x80)               /* 10xx xxxx */
-                               return false;
-                       v = (v<<6) | (c & 0x3f);
-               }
-
-               /*              dolog("v=%d",v);*/
-
-               if (v == 0) {
-                       if (len != 1) return false;           /* Java special */
-               }
-               else {
-                       /* Sun Java seems to allow overlong UTF-8 encodings */
-                       
-                       if (v < min_codepoint[len]) { /* overlong UTF-8 */
-                               if (!opt_liberalutf)
-                                       fprintf(stderr,"WARNING: Overlong UTF-8 sequence found.\n");
-                               /* XXX change this to panic? */
-                       }
-               }
-
-               /* surrogates in UTF-8 seem to be allowed in Java classfiles */
-               /* if (v >= 0xd800 && v <= 0xdfff) return false; */ /* surrogates */
-
-               /* even these seem to be allowed */
-               /* if (v == 0xfffe || v == 0xffff) return false; */ /* invalid codepoints */
-       }
-
-       return true;
-}
- 
-/********************* function: is_valid_name *******************************
-
-    return true if the given string may be used as a class/field/method name.
-    (Currently this only disallows empty strings and control characters.)
-
-    NOTE: The string is assumed to have passed is_valid_utf!
-
-    utf_ptr...points to first character
-    end_pos...points after last character
-
-******************************************************************************/
-
-bool
-is_valid_name(char *utf_ptr,char *end_pos)
-{
-       if (end_pos <= utf_ptr) return false; /* disallow empty names */
-
-       while (utf_ptr < end_pos) {
-               unsigned char c = *utf_ptr++;
-
-               if (c < 0x20) return false; /* disallow control characters */
-               if (c == 0xc0 && (unsigned char)*utf_ptr == 0x80) return false; /* disallow zero */
-       }
-       return true;
-}
-
-bool
-is_valid_name_utf(utf *u)
-{
-       return is_valid_name(u->text,utf_end(u));
-}
-
-/******************** Function: class_new **************************************
-
-    searches for the class with the specified name in the classes hashtable,
-    if there is no such class a new classinfo structure is created and inserted
-    into the list of classes to be loaded
-
-*******************************************************************************/
-
-classinfo *class_new_intern(utf *classname)
-{
-       classinfo *c;     /* hashtable element */
-       u4 key;           /* hashkey computed from classname */
-       u4 slot;          /* slot in hashtable */
-       u2 i;
-
-       key  = utf_hashkey(classname->text, classname->blength);
-       slot = key & (class_hash.size - 1);
-       c    = class_hash.ptr[slot];
-
-       /* search external hash chain for the class */
-       while (c) {
-               if (c->name->blength == classname->blength) {
-                       for (i = 0; i < classname->blength; i++)
-                               if (classname->text[i] != c->name->text[i]) goto nomatch;
-                                               
-                       /* class found in hashtable */
-                       return c;
-               }
-                       
-       nomatch:
-               c = c->hashlink; /* next element in external chain */
-       }
-
-       /* location in hashtable found, create new classinfo structure */
-
-#if defined(STATISTICS)
-       if (opt_stat)
-               count_class_infos += sizeof(classinfo);
-#endif
-
-       if (initverbose) {
-               char logtext[MAXLOGTEXT];
-               sprintf(logtext, "Creating class: ");
-               utf_sprint_classname(logtext + strlen(logtext), classname);
-               log_text(logtext);
-       }
-
-       c = GCNEW(classinfo, 1); /*JOWENN: NEW*/
-       /*c=NEW(classinfo);*/
-       c->vmClass = 0;
-       c->flags = 0;
-       c->name = classname;
-       c->packagename = NULL;
-       c->cpcount = 0;
-       c->cptags = NULL;
-       c->cpinfos = NULL;
-       c->super = NULL;
-       c->sub = NULL;
-       c->nextsub = NULL;
-       c->interfacescount = 0;
-       c->interfaces = NULL;
-       c->fieldscount = 0;
-       c->fields = NULL;
-       c->methodscount = 0;
-       c->methods = NULL;
-       c->linked = false;
-       c->loaded = false;
-       c->index = 0;
-       c->instancesize = 0;
-       c->header.vftbl = NULL;
-       c->innerclasscount = 0;
-       c->innerclass = NULL;
-       c->vftbl = NULL;
-       c->initialized = false;
-       c->initializing = false;
-       c->classvftbl = false;
-    c->classUsed = 0;
-    c->impldBy = NULL;
-       c->classloader = NULL;
-       c->sourcefile = NULL;
-       
-       /* insert class into the hashtable */
-       c->hashlink = class_hash.ptr[slot];
-       class_hash.ptr[slot] = c;
-
-       /* update number of hashtable-entries */
-       class_hash.entries++;
-
-       if (class_hash.entries > (class_hash.size * 2)) {
-
-               /* reorganization of hashtable, average length of 
-                  the external chains is approx. 2                */  
-
-               u4 i;
-               classinfo *c;
-               hashtable newhash;  /* the new hashtable */
-
-               /* create new hashtable, double the size */
-               init_hashtable(&newhash, class_hash.size * 2);
-               newhash.entries = class_hash.entries;
-
-               /* transfer elements to new hashtable */
-               for (i = 0; i < class_hash.size; i++) {
-                       c = (classinfo *) class_hash.ptr[i];
-                       while (c) {
-                               classinfo *nextc = c->hashlink;
-                               u4 slot = (utf_hashkey(c->name->text, c->name->blength)) & (newhash.size - 1);
-                                               
-                               c->hashlink = newhash.ptr[slot];
-                               newhash.ptr[slot] = c;
-
-                               c = nextc;
-                       }
-               }
-       
-               /* dispose old table */ 
-               MFREE(class_hash.ptr, void*, class_hash.size);
-               class_hash = newhash;
-       }
-
-    /* Array classes need further initialization. */
-    if (c->name->text[0] == '[') {
-               /* Array classes are not loaded from classfiles. */
-               c->loaded = true;
-        class_new_array(c);
-               c->packagename = array_packagename;
-
-       } else {
-               /* Find the package name */
-               /* Classes in the unnamed package keep packagename == NULL. */
-               char *p = utf_end(c->name) - 1;
-               char *start = c->name->text;
-               for (;p > start; --p) {
-                       if (*p == '/') {
-                               c->packagename = utf_new (start, p - start);
-                               break;
-                       }
-               }
-       }
-#if defined(USE_THREADS) && defined(NATIVE_THREADS)
-       initObjectLock(&c->header);
-#endif
-
-       return c;
-}
-
-
-classinfo *class_new(utf *classname)
-{
-    classinfo *c;
-
-#if defined(USE_THREADS) && defined(NATIVE_THREADS)
-    tables_lock();
-#endif
-
-    c = class_new_intern(classname);
-
-       /* we support eager class loading and linking on demand */
-
-       if (opt_eager) {
-               classinfo *tc;
-               classinfo *tmp;
-
-               list_init(&unlinkedclasses, OFFSET(classinfo, listnode));
-
-               if (!c->loaded) {
-                       if (!class_load(c)) {
-#if defined(USE_THREADS) && defined(NATIVE_THREADS)
-                               tables_unlock();
-#endif
-                               return c;
-                       }
-               }
-
-               /* link all referenced classes */
-
-               tc = list_first(&unlinkedclasses);
-
-               while (tc) {
-                       /* skip the current loaded/linked class */
-                       if (tc != c) {
-                               if (!class_link(tc)) {
-#if defined(USE_THREADS) && defined(NATIVE_THREADS)
-                                       tables_unlock();
-#endif
-                                       return c;
-                               }
-                       }
-
-                       /* we need a tmp variable here, because list_remove sets prev and
-                          next to NULL */
-                       tmp = list_next(&unlinkedclasses, tc);
-                       list_remove(&unlinkedclasses, tc);
-                       tc = tmp;
-               }
-
-               if (!c->linked) {
-                       if (!class_link(c)) {
-#if defined(USE_THREADS) && defined(NATIVE_THREADS)
-                               tables_unlock();
-#endif
-                               return c;
-                       }
-               }
-       }
-
-#if defined(USE_THREADS) && defined(NATIVE_THREADS)
-    tables_unlock();
-#endif
-
-    return c;
-}
-
-
-/******************** Function: class_get **************************************
-
-    searches for the class with the specified name in the classes hashtable
-    if there is no such class NULL is returned
-
-*******************************************************************************/
-
-classinfo *class_get(utf *classname)
-{
-       classinfo *c;  /* hashtable element */ 
-       u4 key;        /* hashkey computed from classname */   
-       u4 slot;       /* slot in hashtable */
-       u2 i;  
-
-       key  = utf_hashkey(classname->text, classname->blength);
-       slot = key & (class_hash.size-1);
-       c    = class_hash.ptr[slot];
-
-       /* search external hash-chain */
-       while (c) {
-               if (c->name->blength == classname->blength) {
-                       /* compare classnames */
-                       for (i = 0; i < classname->blength; i++) 
-                               if (classname->text[i] != c->name->text[i])
-                                       goto nomatch;
-
-                       /* class found in hashtable */                          
-                       return c;
-               }
-                       
-       nomatch:
-               c = c->hashlink;
-       }
-
-       /* class not found */
-       return NULL;
-}
-
-
-/* class_remove ****************************************************************
-
-   removes the class entry wth the specified name in the classes hashtable,
-   furthermore the class' resources are freed
-   if there is no such class false is returned
-
-*******************************************************************************/
-
-bool class_remove(classinfo *c)
-{
-       classinfo *tc;  /* hashtable element */
-       classinfo *pc;
-       u4 key;         /* hashkey computed from classname */   
-       u4 slot;        /* slot in hashtable */
-       u2 i;  
-
-       key  = utf_hashkey(c->name->text, c->name->blength);
-       slot = key & (class_hash.size - 1);
-       tc   = class_hash.ptr[slot];
-       pc   = NULL;
-
-       /* search external hash-chain */
-       while (tc) {
-               if (tc->name->blength == c->name->blength) {
-                       
-                       /* compare classnames */
-                       for (i = 0; i < c->name->blength; i++)
-                               if (tc->name->text[i] != c->name->text[i])
-                                       goto nomatch;
-
-                       /* class found in hashtable */
-                       if (!pc) {
-                               class_hash.ptr[slot] = tc->hashlink;
-
-                       } else {
-                               pc->hashlink = tc->hashlink;
-                       }
-
-                       class_free(tc);
-
-                       return true;
-               }
-                       
-       nomatch:
-               pc = tc;
-               tc = tc->hashlink;
-       }
-
-       /* class not found */
-       return false;
-}
-
-
-/***************** Function: class_array_of ***********************************
-
-    Returns an array class with the given component class.
-    The array class is dynamically created if neccessary.
-
-*******************************************************************************/
-
-classinfo *class_array_of(classinfo *component)
-{
-    int namelen;
-    char *namebuf;
-       classinfo *c;
-
-    /* Assemble the array class name */
-    namelen = component->name->blength;
-    
-    if (component->name->text[0] == '[') {
-        /* the component is itself an array */
-        namebuf = DMNEW(char, namelen + 1);
-        namebuf[0] = '[';
-        memcpy(namebuf + 1, component->name->text, namelen);
-        namelen++;
-
-    } else {
-        /* the component is a non-array class */
-        namebuf = DMNEW(char, namelen + 3);
-        namebuf[0] = '[';
-        namebuf[1] = 'L';
-        memcpy(namebuf + 2, component->name->text, namelen);
-        namebuf[2 + namelen] = ';';
-        namelen += 3;
-    }
-
-       /* load this class ;-) and link it */
-       c = class_new(utf_new(namebuf, namelen));
-       c->loaded = 1;
-       class_link(c);
-
-    return c;
-}
-
-/*************** Function: class_multiarray_of ********************************
-
-    Returns an array class with the given dimension and element class.
-    The array class is dynamically created if neccessary.
-
-*******************************************************************************/
-
-classinfo *class_multiarray_of(int dim, classinfo *element)
-{
-    int namelen;
-    char *namebuf;
-
-       if (dim < 1)
-               panic("Invalid array dimension requested");
-
-    /* Assemble the array class name */
-    namelen = element->name->blength;
-    
-    if (element->name->text[0] == '[') {
-        /* the element is itself an array */
-        namebuf = DMNEW(char, namelen + dim);
-        memcpy(namebuf + dim, element->name->text, namelen);
-        namelen += dim;
-    }
-    else {
-        /* the element is a non-array class */
-        namebuf = DMNEW(char, namelen + 2 + dim);
-        namebuf[dim] = 'L';
-        memcpy(namebuf + dim + 1, element->name->text, namelen);
-        namelen += (2 + dim);
-        namebuf[namelen - 1] = ';';
-    }
-       memset(namebuf, '[', dim);
-
-    return class_new(utf_new(namebuf, namelen));
-}
-
-/************************** function: utf_strlen ******************************
-
-    determine number of unicode characters in the utf string
-
-*******************************************************************************/
-
-u4 utf_strlen(utf *u) 
-{
-    char *endpos;                   /* points behind utf string       */
-    char *utf_ptr;                  /* current position in utf text   */
-    u4 len = 0;                     /* number of unicode characters   */
-
-       if (!u) {
-               *exceptionptr = new_nullpointerexception();
-               return 0;
-       }
-
-       endpos = utf_end(u);
-       utf_ptr = u->text;
-
-    while (utf_ptr < endpos) {
-               len++;
-               /* next unicode character */
-               utf_nextu2(&utf_ptr);
-    }
-
-    if (utf_ptr != endpos)
-       /* string ended abruptly */
-               panic("illegal utf string"); 
-
-    return len;
-}
-
-
  /*
   * These are local overrides for various environment variables in Emacs.
   * Please do not remove this and leave it at the end of the file, where
diff --git a/src/vm/tables.h b/src/vm/tables.h

index fcb980081906a3403df0c482b0a710f54ccbebe1..e0c8b7c1f71df397b7ae910b216fe489dba53f95 100644 (file)
--- a/src/vm/tables.h
+++ b/src/vm/tables.h
@@ -26,7 +26,7 @@
  
     Authors: Reinhard Grafl
  
-   $Id: tables.h 1843 2005-01-04 11:21:02Z twisti $
+   $Id: tables.h 1930 2005-02-10 10:54:28Z twisti $
  
  */
  
@@ -39,6 +39,69 @@
  #include "vm/global.h"
  
  
+/* data structures for hashtables ********************************************
+
+
+   All utf-symbols, javastrings and classes are stored in global
+   hashtables, so every symbol exists only once. Equal symbols have
+   identical pointers.  The functions for adding hashtable elements
+   search the table for the element with the specified name/text and
+   return it on success. Otherwise a new hashtable element is created.
+
+   The hashtables use external linking for handling collisions. The
+   hashtable structure contains a pointer <ptr> to the array of
+   hashtable slots. The number of hashtable slots and therefore the
+   size of this array is specified by the element <size> of hashtable
+   structure. <entries> contains the number of all hashtable elements
+   stored in the table, including those in the external chains.  The
+   hashtable element structures (utf, literalstring, classinfo)
+   contain both a pointer to the next hashtable element as a link for
+   the external hash chain and the key of the element. The key is
+   computed from the text of the string or the classname by using up
+   to 8 characters.
+       
+   If the number of entries in the hashtable exceeds twice the size of
+   the hashtableslot-array it is supposed that the average length of
+   the external chains has reached a value beyond 2. Therefore the
+   functions for adding hashtable elements (utf_new, class_new,
+   literalstring_new) double the hashtableslot-array. In this
+   restructuring process all elements have to be inserted into the new
+   hashtable and new external chains must be built.
+
+   Example for the layout of a hashtable:
+
+hashtable.ptr-->+-------------------+
+                |                   |
+                         ...
+                |                   |
+                +-------------------+   +-------------------+   +-------------------+
+                | hashtable element |-->| hashtable element |-->| hashtable element |-->NULL
+                +-------------------+   +-------------------+   +-------------------+
+                | hashtable element |
+                +-------------------+   +-------------------+   
+                | hashtable element |-->| hashtable element |-->NULL
+                +-------------------+   +-------------------+   
+                | hashtable element |-->NULL
+                +-------------------+
+                |                   |
+                         ...
+                |                   |
+                +-------------------+
+
+*/
+
+
+/* data structure for accessing hashtables ************************************/
+
+typedef struct hashtable hashtable;
+
+struct hashtable {            
+       u4     size;
+       u4     entries;                     /* number of entries in the table     */
+       void **ptr;                         /* pointer to hashtable               */
+};
+
+
  #define CLASS(name)     (unicode_getclasslink(unicode_new_char(name)))
  
  /* to determine the end of utf strings */
@@ -46,9 +109,6 @@
  
  extern hashtable utf_hash;     /* hashtable for utf8-symbols */
  extern hashtable string_hash;  /* hashtable for javastrings  */
-extern hashtable class_hash;   /* hashtable for classes      */
-
-extern list unlinkedclasses;   /* this is only used for eager class loading   */
  
  
  /* creates hashtables for symboltables */
@@ -57,73 +117,15 @@ void tables_init(void);
  /* free memory for hashtables */ 
  void tables_close(void);
  
-/* check if a UTF-8 string is valid */
-bool is_valid_utf(char *utf_ptr, char *end_pos);
-
-/* check if a UTF-8 string may be used as a class/field/method name */
-bool is_valid_name(char *utf_ptr, char *end_pos);
-bool is_valid_name_utf(utf *u);
-
-/* write utf symbol to file/buffer */
-void utf_sprint(char *buffer, utf *u);
-void utf_sprint_classname(char *buffer, utf *u);
-void utf_fprint(FILE *file, utf *u);
-void utf_fprint_classname(FILE *file, utf *u);
-void utf_display(utf *u);
-void utf_display_classname(utf *u);
-
-/* write utf symbol to logfile/stdout */
-void log_utf(utf *u);
-void log_plain_utf(utf *u);
-
-/* create new utf-symbol */
-utf *utf_new(const char *text, u2 length);
-
-/* without locking (caller already holding lock*/
-utf *utf_new_intern(const char *text, u2 length);
-
-utf *utf_new_char(const char *text);
-utf *utf_new_char_classname(const char *text);
-
-/* show utf-table */
-void utf_show(void);
-
-/* get next unicode character of a utf-string */
-u2 utf_nextu2(char **utf);
-
-/* get number of unicode characters of a utf string */
-u4 utf_strlen(utf *u);
-
-/* search for class and create it if not found */
-classinfo *class_new(utf *u);
-
-/* without locking (caller already holding lock*/
-classinfo *class_new_intern(utf *u);
-
-/* return an array class with the given component class */
-classinfo *class_array_of(classinfo *component);
-
-/* return an array class with the given dimension and element class */
-classinfo *class_multiarray_of(int dim, classinfo *element);
-
  /* get javatype according to a typedescriptor */
  u2 desc_to_type(utf *descriptor);
  
  /* get length of a datatype */
  u2 desc_typesize(utf *descriptor);
  
-/* determine hashkey of a unicode-symbol */
-u4 unicode_hashkey(u2 *text, u2 length);
-
  /* create hashtable */
  void init_hashtable(hashtable *hash, u4 size);
  
-/* search for class in classtable */
-classinfo *class_get(utf *u);
-
-/* remove class from classtable */
-bool class_remove(classinfo *c);
-
  #if defined(USE_THREADS) && defined(NATIVE_THREADS)
  void tables_lock(void);
  void tables_unlock(void);
author	twisti <none@none>
	Thu, 10 Feb 2005 10:54:28 +0000 (10:54 +0000)
committer	twisti <none@none>
	Thu, 10 Feb 2005 10:54:28 +0000 (10:54 +0000)
src/vm/tables.c		patch \| blob \| history
src/vm/tables.h		patch \| blob \| history