(javastring_new_from_utf_buffer): New function.
(javastring_new_from_utf_string): New function.
* src/vm/utf8.c (utf_get_number_of_u2s_for_buffer): New function.
(utf_get_number_of_u2s): Documented that this function may throw
exception.s.
* src/vm/utf8.h (utf_get_number_of_u2s_for_buffer): New function.
Changes: Christian Thalinger
- $Id: string.c 4874 2006-05-05 14:36:18Z edwin $
+ $Id: string.c 4875 2006-05-05 15:14:18Z edwin $
*/
}
+/* javastring_new_from_utf_buffer **********************************************
+
+ Create a new object of type java/lang/String with the text from
+ the specified utf8 buffer.
+
+ IN:
+ buffer.......points to first char in the buffer
+ blength......number of bytes to read from the buffer
+
+ RETURN VALUE:
+ the java.lang.String object, or
+ NULL if an exception has been thrown
+
+*******************************************************************************/
+
+java_lang_String *javastring_new_from_utf_buffer(const char *buffer, u4 blength)
+{
+ const char *utf_ptr; /* current utf character in utf string */
+ u4 utflength; /* length of utf-string if uncompressed */
+ java_lang_String *s; /* result-string */
+ java_chararray *a;
+ u4 i;
+
+ assert(buffer);
+
+ utflength = utf_get_number_of_u2s_for_buffer(buffer,blength);
+
+ s = (java_lang_String *) builtin_new(class_java_lang_String);
+ a = builtin_newarray_char(utflength);
+
+ /* javastring or character-array could not be created */
+ if (!a || !s)
+ return NULL;
+
+ /* decompress utf-string */
+ utf_ptr = buffer;
+ for (i = 0; i < utflength; i++)
+ a->data[i] = utf_nextu2((char **)&utf_ptr);
+
+ /* set fields of the javastring-object */
+ s->value = a;
+ s->offset = 0;
+ s->count = utflength;
+
+ return s;
+}
+
+
+/* javastring_new_from_utf_string **********************************************
+
+ Create a new object of type java/lang/String with the text from
+ the specified zero-terminated utf8 string.
+
+ IN:
+ buffer.......points to first char in the buffer
+ blength......number of bytes to read from the buffer
+
+ RETURN VALUE:
+ the java.lang.String object, or
+ NULL if an exception has been thrown
+
+*******************************************************************************/
+
+java_lang_String *javastring_new_from_utf_string(const char *utfstr)
+{
+ assert(utfstr);
+
+ return javastring_new_from_utf_buffer(utfstr, strlen(utfstr));
+}
+
+
/* javastring_new **************************************************************
creates a new object of type java/lang/String with the text of
Changes:
- $Id: stringlocal.h 4874 2006-05-05 14:36:18Z edwin $
+ $Id: stringlocal.h 4875 2006-05-05 15:14:18Z edwin $
*/
/* creates a new object of type java/lang/String from a utf-text, changes slashes to dots */
java_lang_String *javastring_new_slash_to_dot(utf *text);
-/* creates a new object of type java/lang/String from a c-string */
+/* creates a new object of type java/lang/String from an ASCII c-string */
java_lang_String *javastring_new_from_ascii(const char *text);
+/* creates a new object of type java/lang/String from UTF-8 */
+java_lang_String *javastring_new_from_utf_buffer(const char *buffer, u4 blength);
+java_lang_String *javastring_new_from_utf_string(const char *utfstr);
+
/* make c-string from a javastring (debugging) */
char *javastring_tochar(java_objectheader *s);
Andreas Krall
Christian Thalinger
- $Id: utf8.c 4873 2006-05-05 13:56:35Z edwin $
+ $Id: utf8.c 4875 2006-05-05 15:14:18Z edwin $
*/
return u->blength;
}
+/* utf_get_number_of_u2s_for_buffer ********************************************
+
+ Determine number of UTF-16 u2s in the given UTF-8 buffer
+
+ CAUTION: Use this function *only* when you want to convert an UTF-8 buffer
+ to an array of u2s (UTF-16) and want to know how many of them you will get.
+ All other uses of this function are probably wrong.
+
+ IN:
+ buffer........points to first char in buffer
+ blength.......number of _bytes_ in the buffer
+
+ OUT:
+ the number of u2s needed to hold this string in UTF-16 encoding.
+ There is _no_ terminating zero included in this count.
+
+ NOTE: Unlike utf_get_number_of_u2s, this function never throws an
+ exception.
+
+*******************************************************************************/
+
+u4 utf_get_number_of_u2s_for_buffer(const char *buffer, u4 blength)
+{
+ const char *endpos; /* points behind utf string */
+ const char *utf_ptr; /* current position in utf text */
+ u4 len = 0; /* number of unicode characters */
+
+ utf_ptr = buffer;
+ endpos = utf_ptr + blength;
+
+ while (utf_ptr < endpos) {
+ len++;
+ /* next unicode character */
+ utf_nextu2((char **)&utf_ptr);
+ }
+
+ assert(utf_ptr == endpos);
+
+ return len;
+}
+
+
/* utf_get_number_of_u2s *******************************************************
Determine number of UTF-16 u2s in the utf string.
OUT:
the number of u2s needed to hold this string in UTF-16 encoding.
There is _no_ terminating zero included in this count.
+ XXX 0 if a NullPointerException has been thrown (see below)
*******************************************************************************/
char *utf_ptr; /* current position in utf text */
u4 len = 0; /* number of unicode characters */
+ /* XXX this is probably not checked by most callers! Review this after */
+ /* the invalid uses of this function have been eliminated */
if (!u) {
exceptions_throw_nullpointerexception();
return 0;
Changes:
- $Id: utf8.h 4873 2006-05-05 13:56:35Z edwin $
+ $Id: utf8.h 4875 2006-05-05 15:14:18Z edwin $
*/
/* get number of unicode characters of a utf string */
u4 utf_get_number_of_u2s(utf *u);
+u4 utf_get_number_of_u2s_for_buffer(const char *buffer, u4 blength);
/* determine utf length in bytes of a u2 array */
u4 u2_utflength(u2 *text, u4 u2_length);