module JVM.Assembler
(Instruction (..),
ArrayType (..),
+ CodeException (..),
Code (..),
IMM (..),
- CMP (..)
+ CMP (..),
+ atype2byte,
+ encodeInstructions,
+ encodeMethod,
+ decodeMethod
)
where
import Control.Monad
import Control.Applicative
import Data.Ix (inRange)
-import Data.List (intercalate)
import Data.Word
-import Data.Bits
import qualified Data.Binary as Binary
-import qualified Data.Binary.Get as Get
-import Data.Char
import qualified Data.ByteString.Lazy as B
-import Data.Array
-import qualified Data.Set as S
-import qualified Data.Map as M
import Data.BinaryState
import JVM.ClassFile
-import JVM.Types
-- | Immediate constant. Corresponding value will be added to base opcode.
data IMM =
codeExceptionsN :: Word16,
codeExceptions :: [CodeException],
codeAttrsN :: Word16,
- codeAttributes :: [AttributeInfo] }
+ codeAttributes :: Attributes File }
deriving (Eq, Show)
-- | Exception descriptor
get = CodeException <$> get <*> get <*> get <*> get
-instance BinaryState Integer AttributeInfo where
+instance BinaryState Integer Attribute where
put a = do
let sz = 6 + attributeLength a -- full size of AttributeInfo structure
liftOffset (fromIntegral sz) Binary.put a
put codeExceptionsN
forM_ codeExceptions put
put codeAttrsN
- forM_ codeAttributes put
+ forM_ (attributesList codeAttributes) put
get = do
stackSz <- get
excs <- replicateM (fromIntegral excn) get
nAttrs <- get
attrs <- replicateM (fromIntegral nAttrs) get
- return $ Code stackSz locals len code excn excs nAttrs attrs
+ return $ Code stackSz locals len code excn excs nAttrs (AP attrs)
-- | Read sequence of instructions (to end of stream)
readInstructions :: GetState Integer [Instruction]
next <- readInstructions
return (x: next)
--- | JVM instruction set
+-- | JVM instruction set. For comments, see JVM specification.
data Instruction =
NOP -- ^ 0
| ACONST_NULL -- ^ 1
| LCMP -- ^ 148
| FCMP CMP -- ^ 149, 150
| DCMP CMP -- ^ 151, 152
- | IF CMP -- ^ 153, 154, 155, 156, 157, 158
+ | IF CMP Word16 -- ^ 153, 154, 155, 156, 157, 158
| IF_ICMP CMP Word16 -- ^ 159, 160, 161, 162, 163, 164
| IF_ACMP CMP Word16 -- ^ 165, 166
- | GOTO -- ^ 167
+ | GOTO Word16 -- ^ 167
| JSR Word16 -- ^ 168
| RET -- ^ 169
| TABLESWITCH Word32 Word32 Word32 [Word32] -- ^ 170
| JSR_W Word32 -- ^ 201
deriving (Eq, Show)
--- ^ JVM array type (primitive types)
+-- | JVM array type (primitive types)
data ArrayType =
T_BOOLEAN -- ^ 4
| T_CHAR -- ^ 5
| T_LONG -- ^ 11
deriving (Eq, Show, Enum)
--- ^ Parse opcode with immediate constant
+-- | Parse opcode with immediate constant
imm :: Word8 -- ^ Base opcode
-> (IMM -> Instruction) -- ^ Instruction constructor
-> Word8 -- ^ Opcode to parse
-> GetState s Instruction
imm base constr x = return $ constr $ toEnum $ fromIntegral (x-base)
--- ^ Put opcode with immediate constant
+-- | Put opcode with immediate constant
putImm :: Word8 -- ^ Base opcode
-> IMM -- ^ Constant to add to opcode
-> PutState Integer ()
put t = putByte (atype2byte t)
--- ^ Put opcode with one argument
+-- | Put opcode with one argument
put1 :: (BinaryState Integer a)
=> Word8 -- ^ Opcode
-> a -- ^ First argument
put (DCMP C_LT) = putByte 151
put (DCMP C_GT) = putByte 152
put (DCMP c) = fail $ "No such instruction: DCMP " ++ show c
- put (IF c) = putByte (fromIntegral $ 153 + fromEnum c)
+ put (IF c x) = putByte (fromIntegral $ 153 + fromEnum c) >> put x
put (IF_ACMP C_EQ x) = put1 165 x
put (IF_ACMP C_NE x) = put1 166 x
put (IF_ACMP c _) = fail $ "No such instruction: IF_ACMP " ++ show c
put (IF_ICMP c x) = putByte (fromIntegral $ 159 + fromEnum c) >> put x
- put GOTO = putByte 167
+ put (GOTO x) = put1 167 x
put (JSR x) = put1 168 x
put RET = putByte 169
put (TABLESWITCH def low high offs) = do
152 -> return $ DCMP C_GT
165 -> IF_ACMP C_EQ <$> get
166 -> IF_ACMP C_NE <$> get
- 167 -> return GOTO
+ 167 -> GOTO <$> get
168 -> JSR <$> get
169 -> return RET
170 -> do
| inRange (34, 37) c -> imm 34 FLOAD_ c
| inRange (38, 41) c -> imm 38 DLOAD_ c
| inRange (42, 45) c -> imm 42 ALOAD_ c
- | inRange (153, 158) c -> return $ IF (toEnum $ fromIntegral $ c-153)
+ | inRange (153, 158) c -> IF (toEnum $ fromIntegral $ c-153) <$> get
| inRange (159, 164) c -> IF_ICMP (toEnum $ fromIntegral $ c-159) <$> get
| otherwise -> fail $ "Unknown instruction byte code: " ++ show c
+
+-- | Encode list of instructions
+encodeInstructions :: [Instruction] -> B.ByteString
+encodeInstructions code =
+ let p list = forM_ list put
+ in encodeWith p (0 :: Integer) code
+-- | Decode Java method
+decodeMethod :: B.ByteString -> Code
+decodeMethod str = decodeS (0 :: Integer) str
+
+-- | Encode Java method
+encodeMethod :: Code -> B.ByteString
+encodeMethod code = encodeS (0 :: Integer) code
+