From 384c06185b0720f715549d1868997c49087969a3 Mon Sep 17 00:00:00 2001 From: edwin Date: Fri, 5 May 2006 15:14:18 +0000 Subject: [PATCH] * src/vm/string.c, src/vm/stringlocal.h (javastring_new_from_utf_buffer): New function. (javastring_new_from_utf_string): New function. * src/vm/utf8.c (utf_get_number_of_u2s_for_buffer): New function. (utf_get_number_of_u2s): Documented that this function may throw exception.s. * src/vm/utf8.h (utf_get_number_of_u2s_for_buffer): New function. --- src/vm/string.c | 73 +++++++++++++++++++++++++++++++++++++++++++- src/vm/stringlocal.h | 8 +++-- src/vm/utf8.c | 47 +++++++++++++++++++++++++++- src/vm/utf8.h | 3 +- 4 files changed, 126 insertions(+), 5 deletions(-) diff --git a/src/vm/string.c b/src/vm/string.c index 9e230fb89..ad1be8563 100644 --- a/src/vm/string.c +++ b/src/vm/string.c @@ -30,7 +30,7 @@ Changes: Christian Thalinger - $Id: string.c 4874 2006-05-05 14:36:18Z edwin $ + $Id: string.c 4875 2006-05-05 15:14:18Z edwin $ */ @@ -267,6 +267,77 @@ void stringtable_update(void) } +/* javastring_new_from_utf_buffer ********************************************** + + Create a new object of type java/lang/String with the text from + the specified utf8 buffer. + + IN: + buffer.......points to first char in the buffer + blength......number of bytes to read from the buffer + + RETURN VALUE: + the java.lang.String object, or + NULL if an exception has been thrown + +*******************************************************************************/ + +java_lang_String *javastring_new_from_utf_buffer(const char *buffer, u4 blength) +{ + const char *utf_ptr; /* current utf character in utf string */ + u4 utflength; /* length of utf-string if uncompressed */ + java_lang_String *s; /* result-string */ + java_chararray *a; + u4 i; + + assert(buffer); + + utflength = utf_get_number_of_u2s_for_buffer(buffer,blength); + + s = (java_lang_String *) builtin_new(class_java_lang_String); + a = builtin_newarray_char(utflength); + + /* javastring or character-array could not be created */ + if (!a || !s) + return NULL; + + /* decompress utf-string */ + utf_ptr = buffer; + for (i = 0; i < utflength; i++) + a->data[i] = utf_nextu2((char **)&utf_ptr); + + /* set fields of the javastring-object */ + s->value = a; + s->offset = 0; + s->count = utflength; + + return s; +} + + +/* javastring_new_from_utf_string ********************************************** + + Create a new object of type java/lang/String with the text from + the specified zero-terminated utf8 string. + + IN: + buffer.......points to first char in the buffer + blength......number of bytes to read from the buffer + + RETURN VALUE: + the java.lang.String object, or + NULL if an exception has been thrown + +*******************************************************************************/ + +java_lang_String *javastring_new_from_utf_string(const char *utfstr) +{ + assert(utfstr); + + return javastring_new_from_utf_buffer(utfstr, strlen(utfstr)); +} + + /* javastring_new ************************************************************** creates a new object of type java/lang/String with the text of diff --git a/src/vm/stringlocal.h b/src/vm/stringlocal.h index 3ce6775b4..7bc7e25c4 100644 --- a/src/vm/stringlocal.h +++ b/src/vm/stringlocal.h @@ -28,7 +28,7 @@ Changes: - $Id: stringlocal.h 4874 2006-05-05 14:36:18Z edwin $ + $Id: stringlocal.h 4875 2006-05-05 15:14:18Z edwin $ */ @@ -127,9 +127,13 @@ java_lang_String *javastring_new(utf *text); /* creates a new object of type java/lang/String from a utf-text, changes slashes to dots */ java_lang_String *javastring_new_slash_to_dot(utf *text); -/* creates a new object of type java/lang/String from a c-string */ +/* creates a new object of type java/lang/String from an ASCII c-string */ java_lang_String *javastring_new_from_ascii(const char *text); +/* creates a new object of type java/lang/String from UTF-8 */ +java_lang_String *javastring_new_from_utf_buffer(const char *buffer, u4 blength); +java_lang_String *javastring_new_from_utf_string(const char *utfstr); + /* make c-string from a javastring (debugging) */ char *javastring_tochar(java_objectheader *s); diff --git a/src/vm/utf8.c b/src/vm/utf8.c index 89c9d4600..10c142bab 100644 --- a/src/vm/utf8.c +++ b/src/vm/utf8.c @@ -30,7 +30,7 @@ Andreas Krall Christian Thalinger - $Id: utf8.c 4873 2006-05-05 13:56:35Z edwin $ + $Id: utf8.c 4875 2006-05-05 15:14:18Z edwin $ */ @@ -808,6 +808,48 @@ u4 utf_bytes(utf *u) return u->blength; } +/* utf_get_number_of_u2s_for_buffer ******************************************** + + Determine number of UTF-16 u2s in the given UTF-8 buffer + + CAUTION: Use this function *only* when you want to convert an UTF-8 buffer + to an array of u2s (UTF-16) and want to know how many of them you will get. + All other uses of this function are probably wrong. + + IN: + buffer........points to first char in buffer + blength.......number of _bytes_ in the buffer + + OUT: + the number of u2s needed to hold this string in UTF-16 encoding. + There is _no_ terminating zero included in this count. + + NOTE: Unlike utf_get_number_of_u2s, this function never throws an + exception. + +*******************************************************************************/ + +u4 utf_get_number_of_u2s_for_buffer(const char *buffer, u4 blength) +{ + const char *endpos; /* points behind utf string */ + const char *utf_ptr; /* current position in utf text */ + u4 len = 0; /* number of unicode characters */ + + utf_ptr = buffer; + endpos = utf_ptr + blength; + + while (utf_ptr < endpos) { + len++; + /* next unicode character */ + utf_nextu2((char **)&utf_ptr); + } + + assert(utf_ptr == endpos); + + return len; +} + + /* utf_get_number_of_u2s ******************************************************* Determine number of UTF-16 u2s in the utf string. @@ -822,6 +864,7 @@ u4 utf_bytes(utf *u) OUT: the number of u2s needed to hold this string in UTF-16 encoding. There is _no_ terminating zero included in this count. + XXX 0 if a NullPointerException has been thrown (see below) *******************************************************************************/ @@ -831,6 +874,8 @@ u4 utf_get_number_of_u2s(utf *u) char *utf_ptr; /* current position in utf text */ u4 len = 0; /* number of unicode characters */ + /* XXX this is probably not checked by most callers! Review this after */ + /* the invalid uses of this function have been eliminated */ if (!u) { exceptions_throw_nullpointerexception(); return 0; diff --git a/src/vm/utf8.h b/src/vm/utf8.h index 600fdfa48..4a834f02a 100644 --- a/src/vm/utf8.h +++ b/src/vm/utf8.h @@ -28,7 +28,7 @@ Changes: - $Id: utf8.h 4873 2006-05-05 13:56:35Z edwin $ + $Id: utf8.h 4875 2006-05-05 15:14:18Z edwin $ */ @@ -191,6 +191,7 @@ u2 utf_nextu2(char **utf); /* get number of unicode characters of a utf string */ u4 utf_get_number_of_u2s(utf *u); +u4 utf_get_number_of_u2s_for_buffer(const char *buffer, u4 blength); /* determine utf length in bytes of a u2 array */ u4 u2_utflength(u2 *text, u4 u2_length); -- 2.25.1