From 032ac1857d0d33607a638684584da6079f0c74c1 Mon Sep 17 00:00:00 2001 From: Bernhard Urban Date: Sun, 20 May 2012 12:32:07 +0200 Subject: [PATCH] string: allocate a real java object for strings incl. fieldtable, array, ... also see: http://www.javamex.com/tutorials/memory/string_memory_usage.shtml --- Mate/Strings.hs | 61 ++++++++++++++++++++++++++++++++++--------- ffi/native.c | 41 ++++++++++++++++++++--------- java/lang/String.java | 8 ++++++ 3 files changed, 86 insertions(+), 24 deletions(-) create mode 100644 java/lang/String.java diff --git a/Mate/Strings.hs b/Mate/Strings.hs index f1762b0..2ebda14 100644 --- a/Mate/Strings.hs +++ b/Mate/Strings.hs @@ -15,11 +15,11 @@ import Text.Printf import JVM.ClassFile -import Foreign.Ptr -import Foreign.Marshal.Utils -import Foreign.Marshal.Array +import Foreign +import Foreign.C.Types import Mate.Types +import Mate.ClassPool import Mate.Debug import Mate.GarbageAlloc @@ -36,13 +36,50 @@ getUniqueStringAddr str = do allocateJavaString :: B.ByteString -> IO Word32 allocateJavaString str = do - -- TODO(bernhard): is this also true for UTF8 stuff? - let strlen = fromIntegral $ B.length str + {- we have to build a java object layout here, where String object looks like + - + - this -+ + - | + - v + - +-------------+-------+-------+----------------+--------+ + - | MethodTable | value | count | cachedhashcode | offset | + - +-------------+-------+-------+----------------+--------+ + - | | + - | +------------+ + - v v + - java/lang/String +--------+--------+--------+-----+------------------+ + - | length | str[0] | str[1] | ... | str [length - 1] | + - +--------+--------+--------+-----+------------------+ + - all cells are 32bit wide, except str[i] of course (they're 8bit [but + - should be 16bit, TODO]). + -} + -- build object layout + fsize <- getObjectSize "java/lang/String" + printfStr "string: fsize: %d (should be 4 * 5)\n" fsize + tblptr <- mallocObject $ fromIntegral fsize + let ptr = intPtrToPtr (fromIntegral tblptr) :: Ptr CUInt + mtbl <- getMethodTable "java/lang/String" + poke ptr $ fromIntegral mtbl + + -- build array layout + let strlen = (fromIntegral $ B.length str) + -- (+1) for \0, (+4) for length + newstr <- mallocString (strlen + 5) + BI.memset newstr 0 (fromIntegral $ strlen + 5) arr <- newArray ((map fromIntegral $ B.unpack str) :: [Word8]) - -- (+1) for \0 - newstr <- mallocString (strlen + 1) - BI.memset newstr 0 (fromIntegral $ strlen + 1) - copyBytes newstr arr strlen - let w32_ptr = fromIntegral $ ptrToIntPtr newstr - printfStr "new str ptr: 0x%08x (%s)@%d\n" w32_ptr (toString str) strlen - return w32_ptr + copyBytes (plusPtr newstr 4) arr strlen + printfStr "new str ptr: (%s)@%d\n" (toString str) strlen + + let newstr_length = castPtr newstr :: Ptr CUInt + poke newstr_length $ fromIntegral strlen + + -- set value pointer + poke (plusPtr ptr 4) (fromIntegral (ptrToIntPtr newstr) :: CUInt) + -- set count field + poke (plusPtr ptr 8) (fromIntegral strlen :: CUInt) + -- set hash code (TODO) + poke (plusPtr ptr 12) (0 :: CUInt) + -- set offset + poke (plusPtr ptr 16) (0 :: CUInt) + + return $ fromIntegral tblptr diff --git a/ffi/native.c b/ffi/native.c index 25e0d1c..a3e877c 100644 --- a/ffi/native.c +++ b/ffi/native.c @@ -49,39 +49,56 @@ struct integer { int value; }; +struct string { + unsigned int method_table_ptr; + struct chararray *value; +}; + +struct chararray { + unsigned int length; + char str; +}; + void java_io_PrintStream__printf_0___Ljava_lang_String__V - (const char *fmt) + (struct string *fmt) { - printf("%s", fmt); + printf("%s", &fmt->value->str); } void java_io_PrintStream__printf_1___Ljava_lang_String_Ljava_lang_Object__V - (struct integer *a1, const char *fmt) + (struct integer *a1, struct string *fmt) { - printf(fmt, a1->value); +#if 0 + printf("fmt: 0x%08x\n", (unsigned int) fmt); + printf("fmt->value: 0x%08x\n", (unsigned int) fmt->value); + printf("fmt->val.len: 0x%08x\n", (unsigned int) (*fmt->value).length); + printf("first: %d\n", fmt->value->length); + printf("*fmt: %s\n", &fmt->value->str); +#endif + printf(&fmt->value->str, a1->value); } void java_io_PrintStream__printf_2___Ljava_lang_String_Ljava_lang_Object_Ljava_lang_Object__V - (struct integer *a2, struct integer *a1, const char *fmt) + (struct integer *a2, struct integer *a1, struct string *fmt) { - printf(fmt, a1->value, a2->value); + printf(&fmt->value->str, a1->value, a2->value); } void java_io_PrintStream__printf_3___Ljava_lang_String_Ljava_lang_Object_Ljava_lang_Object_Ljava_lang_Object__V - (struct integer *a3, struct integer *a2, struct integer *a1, const char *fmt) + (struct integer *a3, struct integer *a2, struct integer *a1, struct string *fmt) { - printf(fmt, a1->value, a2->value, a3->value); + printf(&fmt->value->str, a1->value, a2->value, a3->value); } void java_io_PrintStream__printf_4___Ljava_lang_String_Ljava_lang_Object_Ljava_lang_Object_Ljava_lang_Object_Ljava_lang_Object__V - (struct integer *a4, struct integer *a3, struct integer *a2, struct integer *a1, const char *fmt) + (struct integer *a4, struct integer *a3, struct integer *a2, struct integer *a1, struct string *fmt) { - printf(fmt, a1->value, a2->value, a3->value, a4->value); + printf(&fmt->value->str, a1->value, a2->value, a3->value, a4->value); } void java_io_PrintStream__printf_5___Ljava_lang_String_Ljava_lang_Object_Ljava_lang_Object_Ljava_lang_Object_Ljava_lang_Object_Ljava_lang_Object__V - (struct integer *a5, struct integer *a4, struct integer *a3, struct integer *a2, struct integer *a1, const char *fmt) + (struct integer *a5, struct integer *a4, struct integer *a3, struct integer *a2, struct integer *a1, struct string *fmt) { - printf(fmt, a1->value, a2->value, a3->value, a4->value, a5->value); + printf(&fmt->value->str, a1->value, a2->value, a3->value, a4->value, a5->value); } diff --git a/java/lang/String.java b/java/lang/String.java new file mode 100644 index 0000000..a12998f --- /dev/null +++ b/java/lang/String.java @@ -0,0 +1,8 @@ +package java.lang; + +public class String { + public char[] value; + public int count; + public int cachedHashCode; + public int offset; +} -- 2.25.1