[System.Net] Add support for .pac proxy config scripts on mac
[mono.git] / mono / arch / x86 / x86-codegen.h
index af3e3c6f5580f05afa3d1f1528a870daf0f69de7..ad6282f64090332eb6f3b23a97f9589f4f9e594e 100644 (file)
 #include <assert.h>
 
 #ifdef __native_client_codegen__
-#define kNaClAlignment 32
-#define kNaClAlignmentMask (kNaClAlignment - 1)
-extern guint8 nacl_align_byte;
+extern gint8 nacl_align_byte;
 #endif /* __native_client_codegen__ */
 
 
 #if defined( __native_client_codegen__ ) && defined( TARGET_X86 )
 #define x86_codegen_pre(inst_ptr_ptr, inst_len) do { mono_nacl_align_inst(inst_ptr_ptr, inst_len); } while (0)
-#define x86_call_sequence_pre(inst) guint8* _code_start = (inst);
-#define x86_call_sequence_post(inst) \
-  (mono_nacl_align_call(&_code_start, &(inst)), _code_start);
 #define x86_call_sequence_pre_val(inst) guint8* _code_start = (inst);
 #define x86_call_sequence_post_val(inst) \
   (mono_nacl_align_call(&_code_start, &(inst)), _code_start);
+#define x86_call_sequence_pre(inst) x86_call_sequence_pre_val((inst))
+#define x86_call_sequence_post(inst) x86_call_sequence_post_val((inst))
 #else
 #define x86_codegen_pre(inst_ptr_ptr, inst_len) do {} while (0)
-#define x86_call_sequence_pre(inst)
-#define x86_call_sequence_post(inst)
+/* Two variants are needed to avoid warnings */
 #define x86_call_sequence_pre_val(inst) guint8* _code_start = (inst);
 #define x86_call_sequence_post_val(inst) _code_start
+#define x86_call_sequence_pre(inst)
+#define x86_call_sequence_post(inst)
 #endif  /* __native_client_codegen__ */
 
 
@@ -305,7 +303,7 @@ typedef union {
 
 #define kMaxMembaseEmitPadding 6
 
-#define x86_membase_emit(inst,r,basereg,disp)  do {\
+#define x86_membase_emit_body(inst,r,basereg,disp)     do {\
        if ((basereg) == X86_ESP) {     \
                if ((disp) == 0) {      \
                        x86_address_byte ((inst), 0, (r), X86_ESP);     \
@@ -334,6 +332,18 @@ typedef union {
        }       \
        } while (0)
 
+#if defined(__native_client_codegen__) && defined(TARGET_AMD64)
+#define x86_membase_emit(inst,r,basereg,disp) \
+       do { \
+               amd64_nacl_membase_handler(&(inst), (basereg), (disp), (r)) ; \
+       } while (0)
+#else /* __default_codegen__ || 32-bit NaCl codegen */
+#define x86_membase_emit(inst,r,basereg,disp) \
+       do { \
+               x86_membase_emit_body((inst),(r),(basereg),(disp)); \
+       } while (0)
+#endif
+
 #define kMaxMemindexEmitPadding 6
 
 #define x86_memindex_emit(inst,r,basereg,disp,indexreg,shift)  \
@@ -351,7 +361,7 @@ typedef union {
                        x86_imm_emit8 ((inst), (disp)); \
                } else {        \
                        x86_address_byte ((inst), 2, (r), 4);   \
-                       x86_address_byte ((inst), (shift), (indexreg), 5);      \
+                       x86_address_byte ((inst), (shift), (indexreg), (basereg));      \
                        x86_imm_emit32 ((inst), (disp));        \
                }       \
        } while (0)
@@ -438,12 +448,23 @@ typedef union {
     } while ( in_nop );  \
   } while (0)
 
+#if defined(__native_client__)
 #define x86_patch(ins,target) \
   do { \
     unsigned char* inst = (ins); \
+    guint8* new_target = nacl_modify_patch_target((target)); \
     x86_skip_nops((inst)); \
-    x86_do_patch((inst), (target)); \
+    x86_do_patch((inst), new_target); \
   } while (0)
+#else /* __native_client__ */
+#define x86_patch(ins,target) \
+  do { \
+    unsigned char* inst = (ins); \
+    guint8* new_target = (target); \
+    x86_skip_nops((inst)); \
+    x86_do_patch((inst), new_target); \
+  } while (0)
+#endif /* __native_client__ */
 
 #else
 #define x86_patch(ins,target) do { x86_do_patch((ins), (target)); } while (0)
@@ -472,6 +493,13 @@ typedef union {
 #define x86_movsl(inst) do { *(inst)++ =(unsigned char)0xa5; } while (0)
 #define x86_movsd(inst) x86_movsl((inst))
 
+#if defined(__default_codegen__)
+#define x86_prefix(inst,p) \
+       do { \
+               *(inst)++ =(unsigned char) (p); \
+       } while (0)
+#elif defined(__native_client_codegen__)
+#if defined(TARGET_X86)
 /* kNaClAlignment - 1 is the max value we can pass into x86_codegen_pre. */
 /* This keeps us from having to call x86_codegen_pre with specific       */
 /* knowledge of the size of the instruction that follows it, and         */
@@ -481,9 +509,22 @@ typedef union {
                x86_codegen_pre(&(inst), kNaClAlignment - 1); \
                *(inst)++ =(unsigned char) (p); \
        } while (0)
+#elif defined(TARGET_AMD64)
+/* We need to tag any prefixes so we can perform proper membase sandboxing */
+/* See: mini-amd64.c:amd64_nacl_membase_handler for verbose details        */
+#define x86_prefix(inst,p) \
+       do { \
+               amd64_nacl_tag_legacy_prefix((inst)); \
+               *(inst)++ =(unsigned char) (p); \
+       } while (0)
+
+#endif /* TARGET_AMD64 */
+
+#endif /* __native_client_codegen__ */
 
 #define x86_rdtsc(inst) \
        do {    \
+               x86_codegen_pre(&(inst), 2); \
                *(inst)++ = 0x0f;       \
                *(inst)++ = 0x31;       \
        } while (0)
@@ -544,7 +585,7 @@ typedef union {
 
 #define x86_xadd_reg_reg(inst,dreg,reg,size)   \
        do {    \
-               x86_codegen_pre(&(inst), 4); \
+               x86_codegen_pre(&(inst), 3); \
                *(inst)++ = (unsigned char)0x0F;     \
                if ((size) == 1)        \
                        *(inst)++ = (unsigned char)0xC0;        \
@@ -630,14 +671,14 @@ typedef union {
 
 #define x86_neg_mem(inst,mem)  \
        do {    \
-               x86_codegen_pre(&(inst), 2); \
+               x86_codegen_pre(&(inst), 6); \
                *(inst)++ = (unsigned char)0xf7;        \
                x86_mem_emit ((inst), 3, (mem));        \
        } while (0)
 
 #define x86_neg_membase(inst,basereg,disp)     \
        do {    \
-               x86_codegen_pre(&(inst), 6); \
+               x86_codegen_pre(&(inst), 1 + kMaxMembaseEmitPadding); \
                *(inst)++ = (unsigned char)0xf7;        \
                x86_membase_emit ((inst), 3, (basereg), (disp));        \
        } while (0)
@@ -773,6 +814,14 @@ typedef union {
                x86_imm_emit32 ((inst), (imm)); \
        } while (0)
 
+#define x86_test_mem_imm8(inst,mem,imm)        \
+       do {    \
+               x86_codegen_pre(&(inst), 7); \
+               *(inst)++ = (unsigned char)0xf6;        \
+               x86_mem_emit ((inst), 0, (mem));        \
+               x86_imm_emit8 ((inst), (imm));  \
+       } while (0)
+
 #define x86_test_mem_imm(inst,mem,imm) \
        do {    \
                x86_codegen_pre(&(inst), 10); \
@@ -841,11 +890,11 @@ typedef union {
 #define x86_shift_membase_imm(inst,opc,basereg,disp,imm)       \
        do {    \
                if ((imm) == 1) {       \
-                       x86_codegen_pre(&(inst), 6); \
+                       x86_codegen_pre(&(inst), 1 + kMaxMembaseEmitPadding); \
                        *(inst)++ = (unsigned char)0xd1;        \
                        x86_membase_emit ((inst), (opc), (basereg), (disp));    \
                } else {        \
-                       x86_codegen_pre(&(inst), 7); \
+                       x86_codegen_pre(&(inst), 2 + kMaxMembaseEmitPadding); \
                        *(inst)++ = (unsigned char)0xc1;        \
                        x86_membase_emit ((inst), (opc), (basereg), (disp));    \
                        x86_imm_emit8 ((inst), (imm));  \
@@ -990,7 +1039,7 @@ typedef union {
                } else {        \
                        x86_codegen_pre(&(inst), 6); \
                        *(inst)++ = (unsigned char)0x69;        \
-                       x86_reg_emit ((inst), (reg), (mem));    \
+                       x86_mem_emit ((inst), (reg), (mem));    \
                        x86_imm_emit32 ((inst), (imm)); \
                }       \
        } while (0)
@@ -1041,7 +1090,7 @@ typedef union {
                x86_codegen_pre(&(inst), 7); \
                switch ((size)) {       \
                case 1: *(inst)++ = (unsigned char)0x88; break; \
-               case 2: *(inst)++ = (unsigned char)0x66; /* fall through */     \
+               case 2: x86_prefix((inst), X86_OPERAND_PREFIX); /* fall through */      \
                case 4: *(inst)++ = (unsigned char)0x89; break; \
                default: assert (0);    \
                }       \
@@ -1053,7 +1102,7 @@ typedef union {
                x86_codegen_pre(&(inst), 3); \
                switch ((size)) {       \
                case 1: *(inst)++ = (unsigned char)0x88; break; \
-               case 2: *(inst)++ = (unsigned char)0x66; /* fall through */     \
+               case 2: x86_prefix((inst), X86_OPERAND_PREFIX); /* fall through */      \
                case 4: *(inst)++ = (unsigned char)0x89; break; \
                default: assert (0);    \
                }       \
@@ -1065,7 +1114,7 @@ typedef union {
                x86_codegen_pre(&(inst), 2 + kMaxMembaseEmitPadding); \
                switch ((size)) {       \
                case 1: *(inst)++ = (unsigned char)0x88; break; \
-               case 2: *(inst)++ = (unsigned char)0x66; /* fall through */     \
+               case 2: x86_prefix((inst), X86_OPERAND_PREFIX); /* fall through */      \
                case 4: *(inst)++ = (unsigned char)0x89; break; \
                default: assert (0);    \
                }       \
@@ -1077,7 +1126,7 @@ typedef union {
                x86_codegen_pre(&(inst), 2 + kMaxMemindexEmitPadding); \
                switch ((size)) {       \
                case 1: *(inst)++ = (unsigned char)0x88; break; \
-               case 2: *(inst)++ = (unsigned char)0x66; /* fall through */     \
+               case 2: x86_prefix((inst), X86_OPERAND_PREFIX); /* fall through */      \
                case 4: *(inst)++ = (unsigned char)0x89; break; \
                default: assert (0);    \
                }       \
@@ -1089,7 +1138,7 @@ typedef union {
                x86_codegen_pre(&(inst), 3); \
                switch ((size)) {       \
                case 1: *(inst)++ = (unsigned char)0x8a; break; \
-               case 2: *(inst)++ = (unsigned char)0x66; /* fall through */     \
+               case 2: x86_prefix((inst), X86_OPERAND_PREFIX); /* fall through */      \
                case 4: *(inst)++ = (unsigned char)0x8b; break; \
                default: assert (0);    \
                }       \
@@ -1101,7 +1150,7 @@ typedef union {
                x86_codegen_pre(&(inst), 7); \
                switch ((size)) {       \
                case 1: *(inst)++ = (unsigned char)0x8a; break; \
-               case 2: *(inst)++ = (unsigned char)0x66; /* fall through */     \
+               case 2: x86_prefix((inst), X86_OPERAND_PREFIX); /* fall through */      \
                case 4: *(inst)++ = (unsigned char)0x8b; break; \
                default: assert (0);    \
                }       \
@@ -1115,7 +1164,7 @@ typedef union {
                x86_codegen_pre(&(inst), kMovRegMembasePadding); \
                switch ((size)) {       \
                case 1: *(inst)++ = (unsigned char)0x8a; break; \
-               case 2: *(inst)++ = (unsigned char)0x66; /* fall through */     \
+               case 2: x86_prefix((inst), X86_OPERAND_PREFIX); /* fall through */      \
                case 4: *(inst)++ = (unsigned char)0x8b; break; \
                default: assert (0);    \
                }       \
@@ -1127,7 +1176,7 @@ typedef union {
                x86_codegen_pre(&(inst), 2 + kMaxMemindexEmitPadding); \
                switch ((size)) {       \
                case 1: *(inst)++ = (unsigned char)0x8a; break; \
-               case 2: *(inst)++ = (unsigned char)0x66; /* fall through */     \
+               case 2: x86_prefix((inst), X86_OPERAND_PREFIX); /* fall through */      \
                case 4: *(inst)++ = (unsigned char)0x8b; break; \
                default: assert (0);    \
                }       \
@@ -1155,7 +1204,7 @@ typedef union {
                        x86_imm_emit8 ((inst), (imm));  \
                } else if ((size) == 2) {       \
                        x86_codegen_pre(&(inst), 9); \
-                       *(inst)++ = (unsigned char)0x66;        \
+                       x86_prefix((inst), X86_OPERAND_PREFIX); \
                        *(inst)++ = (unsigned char)0xc7;        \
                        x86_mem_emit ((inst), 0, (mem));        \
                        x86_imm_emit16 ((inst), (imm)); \
@@ -1176,7 +1225,7 @@ typedef union {
                        x86_imm_emit8 ((inst), (imm));  \
                } else if ((size) == 2) {       \
                        x86_codegen_pre(&(inst), 4 + kMaxMembaseEmitPadding); \
-                       *(inst)++ = (unsigned char)0x66;        \
+                       x86_prefix((inst), X86_OPERAND_PREFIX); \
                        *(inst)++ = (unsigned char)0xc7;        \
                        x86_membase_emit ((inst), 0, (basereg), (disp));        \
                        x86_imm_emit16 ((inst), (imm)); \
@@ -1197,7 +1246,7 @@ typedef union {
                        x86_imm_emit8 ((inst), (imm));  \
                } else if ((size) == 2) {       \
                        x86_codegen_pre(&(inst), 4 + kMaxMemindexEmitPadding); \
-                       *(inst)++ = (unsigned char)0x66;        \
+                       x86_prefix((inst), X86_OPERAND_PREFIX); \
                        *(inst)++ = (unsigned char)0xc7;        \
                        x86_memindex_emit ((inst), 0, (basereg), (disp), (indexreg), (shift));  \
                        x86_imm_emit16 ((inst), (imm)); \
@@ -1681,6 +1730,7 @@ typedef union {
                x86_imm_emit8 ((inst), (imm));  \
        } while (0)
 
+#if defined(TARGET_X86)
 #define x86_jump32(inst,imm)   \
        do {    \
                x86_codegen_pre(&(inst), 5); \
@@ -1694,9 +1744,27 @@ typedef union {
                *(inst)++ = (unsigned char)0xeb;        \
                x86_imm_emit8 ((inst), (imm));  \
        } while (0)
+#elif defined(TARGET_AMD64)
+/* These macros are used directly from mini-amd64.c and other      */
+/* amd64 specific files, so they need to be instrumented directly. */
+#define x86_jump32(inst,imm)   \
+       do {    \
+               amd64_codegen_pre(inst); \
+               *(inst)++ = (unsigned char)0xe9;        \
+               x86_imm_emit32 ((inst), (imm)); \
+               amd64_codegen_post(inst); \
+       } while (0)
 
+#define x86_jump8(inst,imm)    \
+       do {    \
+               amd64_codegen_pre(inst); \
+               *(inst)++ = (unsigned char)0xeb;        \
+               x86_imm_emit8 ((inst), (imm));  \
+               amd64_codegen_post(inst); \
+       } while (0)
+#endif
 
-#ifdef __native_client_codegen__
+#if defined( __native_client_codegen__ ) && defined( TARGET_X86 )
 #define x86_jump_reg(inst,reg) do {    \
     x86_codegen_pre(&(inst), 5);                       \
     *(inst)++ = (unsigned char)0x83;  /* and */                \
@@ -1747,7 +1815,7 @@ typedef union {
 /*
  * target is a pointer in our buffer.
  */
-#define x86_jump_code(inst,target)     \
+#define x86_jump_code_body(inst,target)        \
        do {    \
                int t; \
                x86_codegen_pre(&(inst), 2); \
@@ -1761,6 +1829,31 @@ typedef union {
                }       \
        } while (0)
 
+#if defined(__default_codegen__) 
+#define x86_jump_code(inst,target) \
+       do { \
+               x86_jump_code_body((inst),(target)); \
+       } while (0)
+#elif defined(__native_client_codegen__) && defined(TARGET_X86)
+#define x86_jump_code(inst,target) \
+       do { \
+               guint8* jump_start = (inst); \
+               x86_jump_code_body((inst),(target)); \
+               x86_patch(jump_start, (target)); \
+       } while (0)
+#elif defined(__native_client_codegen__) && defined(TARGET_AMD64)
+#define x86_jump_code(inst,target) \
+       do { \
+               /* jump_code_body is used twice because there are offsets */ \
+               /* calculated based on the IP, which can change after the */ \
+               /* call to amd64_codegen_post                             */ \
+               amd64_codegen_pre(inst); \
+               x86_jump_code_body((inst),(target)); \
+               inst = amd64_codegen_post(inst); \
+               x86_jump_code_body((inst),(target)); \
+       } while (0)
+#endif /* __native_client_codegen__ */
+
 #define x86_jump_disp(inst,disp)       \
        do {    \
                int t = (disp) - 2;     \
@@ -1772,6 +1865,7 @@ typedef union {
                }       \
        } while (0)
 
+#if defined(TARGET_X86)
 #define x86_branch8(inst,cond,imm,is_signed)   \
        do {    \
                x86_codegen_pre(&(inst), 2); \
@@ -1792,12 +1886,40 @@ typedef union {
                        *(inst)++ = x86_cc_unsigned_map [(cond)] + 0x10;        \
                x86_imm_emit32 ((inst), (imm)); \
        } while (0)
+#elif defined(TARGET_AMD64)
+/* These macros are used directly from mini-amd64.c and other      */
+/* amd64 specific files, so they need to be instrumented directly. */
+#define x86_branch8(inst,cond,imm,is_signed)   \
+       do {    \
+               amd64_codegen_pre(inst); \
+               if ((is_signed))        \
+                       *(inst)++ = x86_cc_signed_map [(cond)]; \
+               else    \
+                       *(inst)++ = x86_cc_unsigned_map [(cond)];       \
+               x86_imm_emit8 ((inst), (imm));  \
+               amd64_codegen_post(inst); \
+       } while (0)
+#define x86_branch32(inst,cond,imm,is_signed)  \
+       do {    \
+               amd64_codegen_pre(inst); \
+               *(inst)++ = (unsigned char)0x0f;        \
+               if ((is_signed))        \
+                       *(inst)++ = x86_cc_signed_map [(cond)] + 0x10;  \
+               else    \
+                       *(inst)++ = x86_cc_unsigned_map [(cond)] + 0x10;        \
+               x86_imm_emit32 ((inst), (imm)); \
+               amd64_codegen_post(inst); \
+       } while (0)
+#endif
 
+#if defined(TARGET_X86)
 #define x86_branch(inst,cond,target,is_signed) \
        do {    \
                int offset;                                      \
+               guint8* branch_start; \
                x86_codegen_pre(&(inst), 2); \
                offset = (target) - (inst) - 2; \
+               branch_start = (inst); \
                if (x86_is_imm8 ((offset)))     \
                        x86_branch8 ((inst), (cond), offset, (is_signed));      \
                else {  \
@@ -1805,7 +1927,42 @@ typedef union {
                        offset = (target) - (inst) - 6; \
                        x86_branch32 ((inst), (cond), offset, (is_signed));     \
                }       \
+               x86_patch(branch_start, (target)); \
        } while (0)
+#elif defined(TARGET_AMD64)
+/* This macro is used directly from mini-amd64.c and other        */
+/* amd64 specific files, so it needs to be instrumented directly. */
+
+#define x86_branch_body(inst,cond,target,is_signed)    \
+       do {    \
+               int offset = (target) - (inst) - 2;     \
+               if (x86_is_imm8 ((offset)))     \
+                       x86_branch8 ((inst), (cond), offset, (is_signed));      \
+               else {  \
+                       offset = (target) - (inst) - 6; \
+                       x86_branch32 ((inst), (cond), offset, (is_signed));     \
+               }       \
+       } while (0)
+
+#if defined(__default_codegen__)
+#define x86_branch(inst,cond,target,is_signed) \
+       do { \
+               x86_branch_body((inst),(cond),(target),(is_signed)); \
+       } while (0)
+#elif defined(__native_client_codegen__)
+#define x86_branch(inst,cond,target,is_signed) \
+       do {    \
+               /* branch_body is used twice because there are offsets */ \
+               /* calculated based on the IP, which can change after  */ \
+               /* the call to amd64_codegen_post                      */ \
+               amd64_codegen_pre(inst); \
+               x86_branch_body((inst),(cond),(target),(is_signed)); \
+               inst = amd64_codegen_post(inst); \
+               x86_branch_body((inst),(cond),(target),(is_signed)); \
+       } while (0)
+#endif /* __native_client_codegen__ */
+
+#endif /* TARGET_AMD64 */
 
 #define x86_branch_disp(inst,cond,disp,is_signed)      \
        do {    \
@@ -1865,10 +2022,10 @@ typedef union {
                x86_call_sequence_post((inst)); \
        } while (0)
 
-#ifdef __native_client_codegen__
+
+#if defined( __native_client_codegen__ ) && defined( TARGET_X86 )
 #define x86_call_reg_internal(inst,reg)        \
   do {                                                 \
-    x86_codegen_pre(&(inst), 5);                       \
     *(inst)++ = (unsigned char)0x83;  /* and */                \
     x86_reg_emit ((inst), 4, (reg));  /* reg */                \
     *(inst)++ = (unsigned char)nacl_align_byte;                \
@@ -1914,20 +2071,23 @@ typedef union {
 #endif  /* __native_client_codegen__ */
 
 
-#ifdef __native_client_codegen__
+#if defined( __native_client_codegen__ ) && defined( TARGET_X86 )
 
 #define x86_call_code(inst,target)     \
        do {    \
                int _x86_offset; \
+               guint8* call_start; \
                guint8* _aligned_start; \
-               x86_call_sequence_pre_val ((inst)); \
+               x86_call_sequence_pre_val((inst)); \
                _x86_offset = (unsigned char*)(target) - (inst);        \
                _x86_offset -= 5;       \
                x86_call_imm_body ((inst), _x86_offset);        \
-               _aligned_start = x86_call_sequence_post_val ((inst)); \
+               _aligned_start = x86_call_sequence_post_val((inst)); \
+               call_start = _aligned_start; \
                _x86_offset = (unsigned char*)(target) - (_aligned_start);      \
                _x86_offset -= 5;       \
                x86_call_imm_body ((_aligned_start), _x86_offset);      \
+               x86_patch(call_start, (target)); \
        } while (0)
 
 #define SIZE_OF_RET 6
@@ -2062,9 +2222,9 @@ typedef union {
 
 #ifdef __native_client_codegen__
 
-#define kNaClLengthOfCallReg 5
-#define kNaClLengthOfCallImm 5
-#define kNaClLengthOfCallMembase (kNaClLengthOfCallReg + 6)
+#define kx86NaClLengthOfCallReg 5
+#define kx86NaClLengthOfCallImm 5
+#define kx86NaClLengthOfCallMembase (kx86NaClLengthOfCallReg + 6)
 
 #endif  /* __native_client_codegen__ */
 
@@ -2229,6 +2389,17 @@ typedef enum {
        X86_SSE_PEXTRB = 0x14,/*sse41*/
        X86_SSE_PEXTRW = 0xC5,
        X86_SSE_PEXTRD = 0x16,/*sse41*/
+
+       X86_SSE_SHUFP = 0xC6,   
+
+       X86_SSE_CVTDQ2PD = 0xE6,
+       X86_SSE_CVTDQ2PS = 0x5B,
+       X86_SSE_CVTPD2DQ = 0xE6,
+       X86_SSE_CVTPD2PS = 0x5A,
+       X86_SSE_CVTPS2DQ = 0x5B,
+       X86_SSE_CVTPS2PD = 0x5A,
+       X86_SSE_CVTTPD2DQ = 0xE6,
+       X86_SSE_CVTTPS2DQ = 0x5B,
 } X86_SSE_Opcode;
 
 
@@ -2275,6 +2446,21 @@ typedef enum {
                x86_membase_emit ((inst), (reg), (basereg), (disp));    \
        } while (0)
 
+#define x86_sse_alu_reg_reg_imm8(inst,opc,dreg,reg, imm8)      \
+       do {    \
+               x86_codegen_pre(&(inst), 4); \
+               *(inst)++ = (unsigned char)0x0F;        \
+               *(inst)++ = (unsigned char)(opc);       \
+               x86_reg_emit ((inst), (dreg), (reg));   \
+               *(inst)++ = (unsigned char)(imm8);      \
+       } while (0)
+
+#define x86_sse_alu_pd_reg_reg_imm8(inst,opc,dreg,reg, imm8)       \
+       do {    \
+               x86_codegen_pre(&(inst), 5); \
+               *(inst)++ = (unsigned char)0x66;        \
+               x86_sse_alu_reg_reg_imm8 ((inst), (opc), (dreg), (reg), (imm8)); \
+       } while (0)
 
 #define x86_sse_alu_pd_reg_reg(inst,opc,dreg,reg)       \
        do {    \