- Minor fixes for handling structure constants and array values that are not sdecls
[coreboot.git] / util / romcc / romcc.c
index feebdbdab45dc5c7ba6fcec2f2e1c7e85e6ae5e6..48632a8fc6cfab5199ebd7bcd4abf35cfd406056 100644 (file)
 #define DEBUG_COLOR_GRAPH 0
 #define DEBUG_SCC 0
 #define DEBUG_CONSISTENCY 2
+#define DEBUG_RANGE_CONFLICTS 0
+#define DEBUG_COALESCING 0
+#define DEBUG_SDP_BLOCKS 0
+#define DEBUG_TRIPLE_COLOR 0
 
 #warning "FIXME boundary cases with small types in larger registers"
 #warning "FIXME give clear error messages about unused variables"
+#warning "FIXME properly handle multi dimensional arrays"
+#warning "FIXME fix scc_transform"
 
 /*  Control flow graph of a loop without goto.
  * 
@@ -259,23 +265,25 @@ struct token {
 /* Operations on general purpose registers.
  */
 
-#define OP_SMUL       0
-#define OP_UMUL       1
-#define OP_SDIV       2
-#define OP_UDIV       3
-#define OP_SMOD       4
-#define OP_UMOD       5
-#define OP_ADD        6
-#define OP_SUB        7
-#define OP_SL         8
-#define OP_USR        9
-#define OP_SSR       10 
-#define OP_AND       11 
-#define OP_XOR       12
-#define OP_OR        13
-#define OP_POS       14 /* Dummy positive operator don't use it */
-#define OP_NEG       15
-#define OP_INVERT    16
+#define OP_SDIVT      0
+#define OP_UDIVT      1
+#define OP_SMUL       2
+#define OP_UMUL       3
+#define OP_SDIV       4
+#define OP_UDIV       5
+#define OP_SMOD       6
+#define OP_UMOD       7
+#define OP_ADD        8
+#define OP_SUB        9
+#define OP_SL        10
+#define OP_USR       11
+#define OP_SSR       12 
+#define OP_AND       13 
+#define OP_XOR       14
+#define OP_OR        15
+#define OP_POS       16 /* Dummy positive operator don't use it */
+#define OP_NEG       17
+#define OP_INVERT    18
                     
 #define OP_EQ        20
 #define OP_NOTEQ     21
@@ -293,6 +301,10 @@ struct token {
 
 #define OP_LOAD      32
 #define OP_STORE     33
+/* For OP_STORE ->type holds the type
+ * RHS(0) holds the destination address
+ * RHS(1) holds the value to store.
+ */
 
 #define OP_NOOP      34
 
@@ -300,6 +312,9 @@ struct token {
 #define OP_MAX_CONST 59
 #define IS_CONST_OP(X) (((X) >= OP_MIN_CONST) && ((X) <= OP_MAX_CONST))
 #define OP_INTCONST  50
+/* For OP_INTCONST ->type holds the type.
+ * ->u.cval holds the constant value.
+ */
 #define OP_BLOBCONST 51
 /* For OP_BLOBCONST ->type holds the layout and size
  * information.  u.blob holds a pointer to the raw binary
@@ -313,8 +328,8 @@ struct token {
 
 #define OP_WRITE     60 
 /* OP_WRITE moves one pseudo register to another.
- * LHS(0) holds the destination pseudo register, which must be an OP_DECL.
- * RHS(0) holds the psuedo to move.
+ * RHS(0) holds the destination pseudo register, which must be an OP_DECL.
+ * RHS(1) holds the psuedo to move.
  */
 
 #define OP_READ      61
@@ -504,6 +519,8 @@ struct op_info {
        .targ = (TARG), \
         }
 static const struct op_info table_ops[] = {
+[OP_SDIVT      ] = OP( 2,  2, 0, 0, PURE | BLOCK , "sdivt"),
+[OP_UDIVT      ] = OP( 2,  2, 0, 0, PURE | BLOCK , "udivt"),
 [OP_SMUL       ] = OP( 0,  2, 0, 0, PURE | DEF | BLOCK , "smul"),
 [OP_UMUL       ] = OP( 0,  2, 0, 0, PURE | DEF | BLOCK , "umul"),
 [OP_SDIV       ] = OP( 0,  2, 0, 0, PURE | DEF | BLOCK , "sdiv"),
@@ -536,7 +553,7 @@ static const struct op_info table_ops[] = {
 [OP_LTRUE      ] = OP( 0,  1, 0, 0, PURE | DEF | BLOCK , "ltrue"),
 
 [OP_LOAD       ] = OP( 0,  1, 0, 0, IMPURE | DEF | BLOCK, "load"),
-[OP_STORE      ] = OP( 1,  1, 0, 0, IMPURE | BLOCK , "store"),
+[OP_STORE      ] = OP( 0,  2, 0, 0, IMPURE | BLOCK , "store"),
 
 [OP_NOOP       ] = OP( 0,  0, 0, 0, PURE | BLOCK, "noop"),
 
@@ -544,7 +561,7 @@ static const struct op_info table_ops[] = {
 [OP_BLOBCONST  ] = OP( 0,  0, 0, 0, PURE, "blobconst"),
 [OP_ADDRCONST  ] = OP( 0,  0, 1, 0, PURE | DEF, "addrconst"),
 
-[OP_WRITE      ] = OP( 1,  1, 0, 0, PURE | BLOCK, "write"),
+[OP_WRITE      ] = OP( 0,  2, 0, 0, PURE | BLOCK, "write"),
 [OP_READ       ] = OP( 0,  1, 0, 0, PURE | DEF | BLOCK, "read"),
 [OP_COPY       ] = OP( 0,  1, 0, 0, PURE | DEF | BLOCK, "copy"),
 [OP_PIECE      ] = OP( 0,  0, 1, 0, PURE | DEF, "piece"),
@@ -631,9 +648,9 @@ struct triple_set {
 };
 
 #define MAX_LHS  15
-#define MAX_RHS  15
-#define MAX_MISC 15
-#define MAX_TARG 15
+#define MAX_RHS  250
+#define MAX_MISC 3
+#define MAX_TARG 3
 
 struct occurance {
        int count;
@@ -651,19 +668,19 @@ struct triple {
        unsigned char template_id;
        unsigned short sizes;
 #define TRIPLE_LHS(SIZES)  (((SIZES) >>  0) & 0x0f)
-#define TRIPLE_RHS(SIZES)  (((SIZES) >>  4) & 0x0f)
-#define TRIPLE_MISC(SIZES) (((SIZES) >>  8) & 0x0f)
-#define TRIPLE_TARG(SIZES) (((SIZES) >> 12) & 0x0f)
+#define TRIPLE_RHS(SIZES)  (((SIZES) >>  4) & 0xff)
+#define TRIPLE_MISC(SIZES) (((SIZES) >> 12) & 0x03)
+#define TRIPLE_TARG(SIZES) (((SIZES) >> 14) & 0x03)
 #define TRIPLE_SIZE(SIZES) \
-       ((((SIZES) >> 0) & 0x0f) + \
-       (((SIZES) >>  4) & 0x0f) + \
-       (((SIZES) >>  8) & 0x0f) + \
-       (((SIZES) >> 12) & 0x0f))
+       (TRIPLE_LHS(SIZES)  + \
+        TRIPLE_RHS(SIZES)  + \
+        TRIPLE_MISC(SIZES) + \
+        TRIPLE_TARG(SIZES))
 #define TRIPLE_SIZES(LHS, RHS, MISC, TARG) \
        ((((LHS) & 0x0f) <<  0) | \
-       (((RHS) & 0x0f)  <<  4) | \
-       (((MISC) & 0x0f) <<  8) | \
-       (((TARG) & 0x0f) << 12))
+       (((RHS)  & 0xff) <<  4) | \
+       (((MISC) & 0x03) << 12) | \
+       (((TARG) & 0x03) << 14))
 #define TRIPLE_LHS_OFF(SIZES)  (0)
 #define TRIPLE_RHS_OFF(SIZES)  (TRIPLE_LHS_OFF(SIZES) + TRIPLE_LHS(SIZES))
 #define TRIPLE_MISC_OFF(SIZES) (TRIPLE_RHS_OFF(SIZES) + TRIPLE_RHS(SIZES))
@@ -751,7 +768,6 @@ struct compile_state {
        const char *label_prefix;
        const char *ofilename;
        FILE *output;
-       struct triple *vars;
        struct file_state *file;
        struct occurance *last_occurance;
        const char *function;
@@ -860,9 +876,9 @@ struct type {
 #define MAX_REG_EQUIVS     16
 #define REGISTER_BITS      16
 #define MAX_VIRT_REGISTERS (1<<REGISTER_BITS)
-#define TEMPLATE_BITS      6
+#define TEMPLATE_BITS      7
 #define MAX_TEMPLATES      (1<<TEMPLATE_BITS)
-#define MAX_REGC           12
+#define MAX_REGC           14
 #define REG_UNSET          0
 #define REG_UNNEEDED       1
 #define REG_VIRT0          (MAX_REGISTERS + 0)
@@ -890,6 +906,7 @@ struct type {
 
 static unsigned arch_reg_regcm(struct compile_state *state, int reg);
 static unsigned arch_regcm_normalize(struct compile_state *state, unsigned regcm);
+static unsigned arch_regcm_reg_normalize(struct compile_state *state, unsigned regcm);
 static void arch_reg_equivs(
        struct compile_state *state, unsigned *equiv, int reg);
 static int arch_select_free_register(
@@ -956,7 +973,7 @@ static int get_col(struct file_state *file)
 static void loc(FILE *fp, struct compile_state *state, struct triple *triple)
 {
        int col;
-       if (triple) {
+       if (triple && triple->occurance) {
                struct occurance *spot;
                spot = triple->occurance;
                while(spot->parent) {
@@ -1312,16 +1329,20 @@ static struct triple zero_triple = {
        .op        = OP_INTCONST,
        .sizes     = TRIPLE_SIZES(0, 0, 0, 0),
        .id        = -1, /* An invalid id */
-       .u = { .cval   = 0, },
+       .u = { .cval = 0, },
        .occurance = &dummy_occurance,
-       .param { [0] = 0, [1] = 0, },
+       .param { [0] = 0, [1] = 0, },
 };
 
 
 static unsigned short triple_sizes(struct compile_state *state,
-       int op, struct type *type, int lhs_wanted, int rhs_wanted)
+       int op, struct type *type, int lhs_wanted, int rhs_wanted,
+       struct occurance *occurance)
 {
        int lhs, rhs, misc, targ;
+       struct triple dummy;
+       dummy.op = op;
+       dummy.occurance = occurance;
        valid_op(state, op);
        lhs = table_ops[op].lhs;
        rhs = table_ops[op].rhs;
@@ -1356,16 +1377,16 @@ static unsigned short triple_sizes(struct compile_state *state,
                lhs = lhs_wanted;
        }
        if ((rhs < 0) || (rhs > MAX_RHS)) {
-               internal_error(state, 0, "bad rhs");
+               internal_error(state, &dummy, "bad rhs %d", rhs);
        }
        if ((lhs < 0) || (lhs > MAX_LHS)) {
-               internal_error(state, 0, "bad lhs");
+               internal_error(state, &dummy, "bad lhs");
        }
        if ((misc < 0) || (misc > MAX_MISC)) {
-               internal_error(state, 0, "bad misc");
+               internal_error(state, &dummy, "bad misc");
        }
        if ((targ < 0) || (targ > MAX_TARG)) {
-               internal_error(state, 0, "bad targs");
+               internal_error(state, &dummy, "bad targs");
        }
        return TRIPLE_SIZES(lhs, rhs, misc, targ);
 }
@@ -1376,7 +1397,7 @@ static struct triple *alloc_triple(struct compile_state *state,
 {
        size_t size, sizes, extra_count, min_count;
        struct triple *ret;
-       sizes = triple_sizes(state, op, type, lhs, rhs);
+       sizes = triple_sizes(state, op, type, lhs, rhs, occurance);
 
        min_count = sizeof(ret->param)/sizeof(ret->param[0]);
        extra_count = TRIPLE_SIZE(sizes);
@@ -1552,7 +1573,7 @@ static struct triple *post_triple(struct compile_state *state,
        }
        /* If I have a left hand side skip over it */
        zlhs = TRIPLE_LHS(base->sizes);
-       if (zlhs && (base->op != OP_WRITE) && (base->op != OP_STORE)) {
+       if (zlhs) {
                base = LHS(base, zlhs - 1);
        }
 
@@ -1621,6 +1642,14 @@ static void display_triple(FILE *fp, struct triple *ins)
                        ptr->col);
        }
        fprintf(fp, "\n");
+#if 0
+       {
+               struct triple_set *user;
+               for(user = ptr->use; user; user = user->next) {
+                       fprintf(fp, "use: %p\n", user->member);
+               }
+       }
+#endif
        fflush(fp);
 }
 
@@ -1651,6 +1680,23 @@ static int triple_is_branch(struct compile_state *state, struct triple *ins)
        return is_branch;
 }
 
+static int triple_is_cond_branch(struct compile_state *state, struct triple *ins)
+{
+       /* A conditional branch has the condition argument as a single
+        * RHS parameter.
+        */
+       return triple_is_branch(state, ins) &&
+               (TRIPLE_RHS(ins->sizes) == 1);
+}
+
+static int triple_is_uncond_branch(struct compile_state *state, struct triple *ins)
+{
+       /* A unconditional branch has no RHS parameters.
+        */
+       return triple_is_branch(state, ins) &&
+               (TRIPLE_RHS(ins->sizes) == 0);
+}
+
 static int triple_is_def(struct compile_state *state, struct triple *ins)
 {
        /* This function is used to determine which triples need
@@ -4001,7 +4047,7 @@ static size_t size_of(struct compile_state *state, struct type *type)
                size = size_of(state, type->left);
                break;
        default:
-               error(state, 0, "sizeof not yet defined for type\n");
+               internal_error(state, 0, "sizeof not yet defined for type\n");
                break;
        }
        return size;
@@ -4010,25 +4056,26 @@ static size_t size_of(struct compile_state *state, struct type *type)
 static size_t field_offset(struct compile_state *state, 
        struct type *type, struct hash_entry *field)
 {
+       struct type *member;
        size_t size, align;
        if ((type->type & TYPE_MASK) != TYPE_STRUCT) {
                internal_error(state, 0, "field_offset only works on structures");
        }
        size = 0;
-       type = type->left;
-       while((type->type & TYPE_MASK) == TYPE_PRODUCT) {
-               align = align_of(state, type->left);
+       member = type->left;
+       while((member->type & TYPE_MASK) == TYPE_PRODUCT) {
+               align = align_of(state, member->left);
                size += needed_padding(size, align);
-               if (type->left->field_ident == field) {
-                       type = type->left;
+               if (member->left->field_ident == field) {
+                       member = member->left;
                        break;
                }
-               size += size_of(state, type->left);
-               type = type->right;
+               size += size_of(state, member->left);
+               member = member->right;
        }
-       align = align_of(state, type);
+       align = align_of(state, member);
        size += needed_padding(size, align);
-       if (type->field_ident != field) {
+       if (member->field_ident != field) {
                error(state, 0, "member %s not present", field->name);
        }
        return size;
@@ -4037,48 +4084,50 @@ static size_t field_offset(struct compile_state *state,
 static struct type *field_type(struct compile_state *state, 
        struct type *type, struct hash_entry *field)
 {
+       struct type *member;
        if ((type->type & TYPE_MASK) != TYPE_STRUCT) {
                internal_error(state, 0, "field_type only works on structures");
        }
-       type = type->left;
-       while((type->type & TYPE_MASK) == TYPE_PRODUCT) {
-               if (type->left->field_ident == field) {
-                       type = type->left;
+       member = type->left;
+       while((member->type & TYPE_MASK) == TYPE_PRODUCT) {
+               if (member->left->field_ident == field) {
+                       member = member->left;
                        break;
                }
-               type = type->right;
+               member = member->right;
        }
-       if (type->field_ident != field) {
+       if (member->field_ident != field) {
                error(state, 0, "member %s not present", field->name);
        }
-       return type;
+       return member;
 }
 
 static struct type *next_field(struct compile_state *state,
        struct type *type, struct type *prev_member) 
 {
+       struct type *member;
        if ((type->type & TYPE_MASK) != TYPE_STRUCT) {
                internal_error(state, 0, "next_field only works on structures");
        }
-       type = type->left;
-       while((type->type & TYPE_MASK) == TYPE_PRODUCT) {
+       member = type->left;
+       while((member->type & TYPE_MASK) == TYPE_PRODUCT) {
                if (!prev_member) {
-                       type = type->left;
+                       member = member->left;
                        break;
                }
-               if (type->left == prev_member) {
+               if (member->left == prev_member) {
                        prev_member = 0;
                }
-               type = type->right;
+               member = member->right;
        }
-       if (type == prev_member) {
+       if (member == prev_member) {
                prev_member = 0;
        }
        if (prev_member) {
                internal_error(state, 0, "prev_member %s not present", 
                        prev_member->field_ident->name);
        }
-       return type;
+       return member;
 }
 
 static struct triple *struct_field(struct compile_state *state,
@@ -4183,6 +4232,10 @@ static int equiv_types(struct type *left, struct type *right)
                return 0;
        }
        type = left->type & TYPE_MASK;
+       /* If the basic types match and it is a void type we are done */
+       if (type == TYPE_VOID) {
+               return 1;
+       }
        /* if the basic types match and it is an arithmetic type we are done */
        if (TYPE_ARITHMETIC(type)) {
                return 1;
@@ -4477,6 +4530,8 @@ static struct triple *do_mk_addr_expr(struct compile_state *state,
        struct triple *result;
        clvalue(state, expr);
 
+       type = new_type(TYPE_POINTER | (type->type & QUAL_MASK), type, 0);
+
        result = 0;
        if (expr->op == OP_ADECL) {
                error(state, expr, "address of auto variables not supported");
@@ -4497,13 +4552,7 @@ static struct triple *do_mk_addr_expr(struct compile_state *state,
 static struct triple *mk_addr_expr(
        struct compile_state *state, struct triple *expr, ulong_t offset)
 {
-       struct type *type;
-       
-       type = new_type(
-               TYPE_POINTER | (expr->type->type & QUAL_MASK),
-               expr->type, 0);
-
-       return do_mk_addr_expr(state, expr, type, offset);
+       return do_mk_addr_expr(state, expr, expr->type, offset);
 }
 
 static struct triple *mk_deref_expr(
@@ -4515,6 +4564,29 @@ static struct triple *mk_deref_expr(
        return triple(state, OP_DEREF, base_type, expr, 0);
 }
 
+static struct triple *array_to_pointer(struct compile_state *state, struct triple *def)
+{
+       if ((def->type->type & TYPE_MASK) == TYPE_ARRAY) {
+               struct type *type;
+               type = new_type(
+                       TYPE_POINTER | (def->type->type & QUAL_MASK),
+                       def->type->left, 0);
+               if ((def->op == OP_SDECL) || is_const(def)) {
+                       struct triple *addrconst;
+                       if ((def->op != OP_SDECL) && (def->op != OP_BLOBCONST)) {
+                               internal_error(state, def, "bad array constant");
+                       }
+                       addrconst = triple(state, OP_ADDRCONST, type, 0, 0);
+                       MISC(addrconst, 0) = def;
+                       def = addrconst;
+               }
+               else {
+                       def = triple(state, OP_COPY, type, def, 0);
+               }
+       }
+       return def;
+}
+
 static struct triple *deref_field(
        struct compile_state *state, struct triple *expr, struct hash_entry *field)
 {
@@ -4555,16 +4627,10 @@ static struct triple *read_expr(struct compile_state *state, struct triple *def)
                return def;
        }
        /* Tranform an array to a pointer to the first element */
+       
 #warning "CHECK_ME is this the right place to transform arrays to pointers?"
        if ((def->type->type & TYPE_MASK) == TYPE_ARRAY) {
-               struct type *type;
-               struct triple *result;
-               type = new_type(
-                       TYPE_POINTER | (def->type->type & QUAL_MASK),
-                       def->type->left, 0);
-               result = triple(state, OP_ADDRCONST, type, 0, 0);
-               MISC(result, 0) = def;
-               return result;
+               return array_to_pointer(state, def);
        }
        if (is_in_reg(state, def)) {
                op = OP_READ;
@@ -5119,13 +5185,6 @@ static struct triple *flatten(
                        return ptr;
                }
                switch(ptr->op) {
-               case OP_WRITE:
-               case OP_STORE:
-                       RHS(ptr, 0) = flatten(state, first, RHS(ptr, 0));
-                       LHS(ptr, 0) = flatten(state, first, LHS(ptr, 0));
-                       use_triple(LHS(ptr, 0), ptr);
-                       use_triple(RHS(ptr, 0), ptr);
-                       break;
                case OP_COMMA:
                        RHS(ptr, 0) = flatten(state, first, RHS(ptr, 0));
                        ptr = RHS(ptr, 1);
@@ -5461,6 +5520,22 @@ static int is_one(struct triple *ins)
        return is_const(ins) && (ins->u.cval == 1);
 }
 
+static long_t bit_count(ulong_t value)
+{
+       int count;
+       int i;
+       count = 0;
+       for(i = (sizeof(ulong_t)*8) -1; i >= 0; i--) {
+               ulong_t mask;
+               mask = 1;
+               mask <<= i;
+               if (value & mask) {
+                       count++;
+               }
+       }
+       return count;
+       
+}
 static long_t bsr(ulong_t value)
 {
        int i;
@@ -5622,6 +5697,9 @@ static void mkconst(struct compile_state *state,
 static void mkaddr_const(struct compile_state *state,
        struct triple *ins, struct triple *sdecl, ulong_t value)
 {
+       if (sdecl->op != OP_SDECL) {
+               internal_error(state, ins, "bad base for addrconst");
+       }
        wipe_ins(state, ins);
        ins->op = OP_ADDRCONST;
        ins->sizes = TRIPLE_SIZES(0, 0, 1, 0);
@@ -5687,8 +5765,8 @@ static void flatten_structures(struct compile_state *state)
                                ulong_t i;
 
                                op = ins->op;
-                               src = RHS(ins, 0);
-                               dst = LHS(ins, 0);
+                               src = RHS(ins, 1);
+                               dst = RHS(ins, 0);
                                get_occurance(ins->occurance);
                                next = alloc_triple(state, OP_VAL_VEC, ins->type, -1, -1,
                                        ins->occurance);
@@ -5734,7 +5812,8 @@ static void flatten_structures(struct compile_state *state)
        ins = first;
        do {
                ins->id &= ~TRIPLE_FLAG_FLATTENED;
-               if ((ins->type->type & TYPE_MASK) == TYPE_STRUCT) {
+               if ((ins->op != OP_BLOBCONST) && (ins->op != OP_SDECL) &&
+                       ((ins->type->type & TYPE_MASK) == TYPE_STRUCT)) {
                        internal_error(state, ins, "STRUCT_TYPE remains?");
                }
                if (ins->op == OP_DOT) {
@@ -5936,13 +6015,13 @@ static void simplify_add(struct compile_state *state, struct triple *ins)
                RHS(ins, 1) = tmp;
        }
        if (is_const(RHS(ins, 0)) && is_const(RHS(ins, 1))) {
-               if (!is_pointer(RHS(ins, 0))) {
+               if (RHS(ins, 0)->op == OP_INTCONST) {
                        ulong_t left, right;
                        left  = read_const(state, ins, &RHS(ins, 0));
                        right = read_const(state, ins, &RHS(ins, 1));
                        mkconst(state, ins, left + right);
                }
-               else /* op == OP_ADDRCONST */ {
+               else if (RHS(ins, 0)->op == OP_ADDRCONST) {
                        struct triple *sdecl;
                        ulong_t left, right;
                        sdecl = MISC(RHS(ins, 0), 0);
@@ -5950,6 +6029,9 @@ static void simplify_add(struct compile_state *state, struct triple *ins)
                        right = RHS(ins, 1)->u.cval;
                        mkaddr_const(state, ins, sdecl, left + right);
                }
+               else {
+                       internal_warning(state, ins, "Optimize me!");
+               }
        }
        else if (is_const(RHS(ins, 0)) && !is_const(RHS(ins, 1))) {
                struct triple *tmp;
@@ -5962,13 +6044,13 @@ static void simplify_add(struct compile_state *state, struct triple *ins)
 static void simplify_sub(struct compile_state *state, struct triple *ins)
 {
        if (is_const(RHS(ins, 0)) && is_const(RHS(ins, 1))) {
-               if (!is_pointer(RHS(ins, 0))) {
+               if (RHS(ins, 0)->op == OP_INTCONST) {
                        ulong_t left, right;
                        left  = read_const(state, ins, &RHS(ins, 0));
                        right = read_const(state, ins, &RHS(ins, 1));
                        mkconst(state, ins, left - right);
                }
-               else /* op == OP_ADDRCONST */ {
+               else if (RHS(ins, 0)->op == OP_ADDRCONST) {
                        struct triple *sdecl;
                        ulong_t left, right;
                        sdecl = MISC(RHS(ins, 0), 0);
@@ -5976,6 +6058,9 @@ static void simplify_sub(struct compile_state *state, struct triple *ins)
                        right = RHS(ins, 1)->u.cval;
                        mkaddr_const(state, ins, sdecl, left - right);
                }
+               else {
+                       internal_warning(state, ins, "Optimize me!");
+               }
        }
 }
 
@@ -6359,6 +6444,65 @@ static void simplify_branch(struct compile_state *state, struct triple *ins)
        }
 }
 
+int phi_present(struct block *block)
+{
+       struct triple *ptr;
+       if (!block) {
+               return 0;
+       }
+       ptr = block->first;
+       do {
+               if (ptr->op == OP_PHI) {
+                       return 1;
+               }
+               ptr = ptr->next;
+       } while(ptr != block->last);
+       return 0;
+}
+
+static void simplify_label(struct compile_state *state, struct triple *ins)
+{
+#warning "FIXME enable simplify_label"
+       struct triple *first, *last;
+       first = RHS(state->main_function, 0);
+       last = first->prev;
+       /* Ignore the first and last instructions */
+       if ((ins == first) || (ins == last)) {
+               return;
+       }
+       if (ins->use == 0) {
+               ins->op = OP_NOOP;
+       }
+       else if (ins->prev->op == OP_LABEL) {
+               struct block *block;
+               block = ins->prev->u.block;
+               /* In general it is not safe to merge one label that
+                * imediately follows another.  The problem is that the empty
+                * looking block may have phi functions that depend on it.
+                */
+               if (!block || 
+                       (!phi_present(block->left) && 
+                       !phi_present(block->right))) 
+               {
+                       struct triple_set *user, *next;
+                       ins->op = OP_NOOP;
+                       for(user = ins->use; user; user = next) {
+                               struct triple *use;
+                               next = user->next;
+                               use = user->member;
+                               if (TARG(use, 0) == ins) {
+                                       TARG(use, 0) = ins->prev;
+                                       unuse_triple(ins, use);
+                                       use_triple(ins->prev, use);
+                               }
+                       }
+                       if (ins->use) {
+                               internal_error(state, ins, "noop use != 0");
+                       }
+               }
+       }
+}
+
 static void simplify_phi(struct compile_state *state, struct triple *ins)
 {
        struct triple **expr;
@@ -6401,6 +6545,10 @@ static void simplify_bsr(struct compile_state *state, struct triple *ins)
 
 typedef void (*simplify_t)(struct compile_state *state, struct triple *ins);
 static const simplify_t table_simplify[] = {
+#if 1
+#define simplify_sdivt    simplify_noop
+#define simplify_udivt    simplify_noop
+#endif
 #if 0
 #define simplify_smul     simplify_noop
 #define simplify_umul    simplify_noop
@@ -6459,6 +6607,9 @@ static const simplify_t table_simplify[] = {
 #if 0
 #define simplify_branch          simplify_noop
 #endif
+#if 1
+#define simplify_label   simplify_noop
+#endif
 
 #if 0
 #define simplify_phi     simplify_noop
@@ -6469,6 +6620,8 @@ static const simplify_t table_simplify[] = {
 #define simplify_bsr      simplify_noop
 #endif
 
+[OP_SDIVT      ] = simplify_sdivt,
+[OP_UDIVT      ] = simplify_udivt,
 [OP_SMUL       ] = simplify_smul,
 [OP_UMUL       ] = simplify_umul,
 [OP_SDIV       ] = simplify_sdiv,
@@ -6520,7 +6673,7 @@ static const simplify_t table_simplify[] = {
 
 [OP_LIST       ] = simplify_noop,
 [OP_BRANCH     ] = simplify_branch,
-[OP_LABEL      ] = simplify_noop,
+[OP_LABEL      ] = simplify_label,
 [OP_ADECL      ] = simplify_noop,
 [OP_SDECL      ] = simplify_noop,
 [OP_PHI        ] = simplify_phi,
@@ -6568,7 +6721,7 @@ static void simplify_all(struct compile_state *state)
        do {
                simplify(state, ins);
                ins = ins->next;
-       } while(ins != first);
+       }while(ins != first);
 }
 
 /*
@@ -6745,8 +6898,32 @@ static struct type *register_builtin_type(struct compile_state *state,
 
 static void register_builtins(struct compile_state *state)
 {
+       struct type *div_type, *ldiv_type;
+       struct type *udiv_type, *uldiv_type;
        struct type *msr_type;
 
+       div_type = register_builtin_type(state, "__builtin_div_t",
+               partial_struct(state, "quot", &int_type,
+               partial_struct(state, "rem",  &int_type, 0)));
+       ldiv_type = register_builtin_type(state, "__builtin_ldiv_t",
+               partial_struct(state, "quot", &long_type,
+               partial_struct(state, "rem",  &long_type, 0)));
+       udiv_type = register_builtin_type(state, "__builtin_udiv_t",
+               partial_struct(state, "quot", &uint_type,
+               partial_struct(state, "rem",  &uint_type, 0)));
+       uldiv_type = register_builtin_type(state, "__builtin_uldiv_t",
+               partial_struct(state, "quot", &ulong_type,
+               partial_struct(state, "rem",  &ulong_type, 0)));
+
+       register_builtin_function(state, "__builtin_div",   OP_SDIVT, div_type,
+               &int_type, &int_type);
+       register_builtin_function(state, "__builtin_ldiv",  OP_SDIVT, ldiv_type,
+               &long_type, &long_type);
+       register_builtin_function(state, "__builtin_udiv",  OP_UDIVT, udiv_type,
+               &uint_type, &uint_type);
+       register_builtin_function(state, "__builtin_uldiv", OP_UDIVT, uldiv_type,
+               &ulong_type, &ulong_type);
+
        register_builtin_function(state, "__builtin_inb", OP_INB, &uchar_type, 
                &ushort_type);
        register_builtin_function(state, "__builtin_inw", OP_INW, &ushort_type,
@@ -8391,24 +8568,23 @@ static struct type *enum_specifier(
        return type;
 }
 
-#if 0
 static struct type *struct_declarator(
        struct compile_state *state, struct type *type, struct hash_entry **ident)
 {
        int tok;
-#warning "struct_declarator is complicated because of bitfields, kill them?"
        tok = peek(state);
        if (tok != TOK_COLON) {
                type = declarator(state, type, ident, 1);
        }
        if ((tok == TOK_COLON) || (peek(state) == TOK_COLON)) {
+               struct triple *value;
                eat(state, TOK_COLON);
-               constant_expr(state);
+               value = constant_expr(state);
+#warning "FIXME implement bitfields to reduce register usage"
+               error(state, 0, "bitfields not yet implemented");
        }
-       FINISHME();
        return type;
 }
-#endif
 
 static struct type *struct_or_union_specifier(
        struct compile_state *state, unsigned int spec)
@@ -8441,19 +8617,19 @@ static struct type *struct_or_union_specifier(
        }
        if (!ident || (peek(state) == TOK_LBRACE)) {
                ulong_t elements;
+               struct type **next;
                elements = 0;
                eat(state, TOK_LBRACE);
+               next = &struct_type;
                do {
                        struct type *base_type;
-                       struct type **next;
                        int done;
                        base_type = specifier_qualifier_list(state);
-                       next = &struct_type;
                        do {
                                struct type *type;
                                struct hash_entry *fident;
                                done = 1;
-                               type = declarator(state, base_type, &fident, 1);
+                               type = struct_declarator(state, base_type, &fident);
                                elements++;
                                if (peek(state) == TOK_COMMA) {
                                        done = 0;
@@ -9107,6 +9283,9 @@ static struct triple *do_decl(struct compile_state *state,
        default:
                internal_error(state, 0, "Undefined storage class");
        }
+       if ((type->type & TYPE_MASK) == TYPE_FUNCTION) {
+               error(state, 0, "Function prototypes not supported");
+       }
        if (ident && 
                ((type->type & STOR_MASK) == STOR_STATIC) &&
                ((type->type & QUAL_CONST) == 0)) {
@@ -9347,14 +9526,6 @@ static int do_print_triple(struct compile_state *state, struct triple *ins, int
        if ((ins->op == OP_BRANCH) && ins->use) {
                internal_error(state, ins, "branch used?");
        }
-#if 0
-       {
-               struct triple_set *user;
-               for(user = ins->use; user; user = user->next) {
-                       printf("use: %p\n", user->member);
-               }
-       }
-#endif
        if (triple_is_branch(state, ins)) {
                printf("\n");
        }
@@ -9481,13 +9652,16 @@ static void walk_blocks(struct compile_state *state,
        ptr = first;
        do {
                struct block *block;
-               if (ptr->op == OP_LABEL) {
+               if (triple_stores_block(state, ptr)) {
                        block = ptr->u.block;
                        if (block && (block != last_block)) {
                                cb(state, block, arg);
                        }
                        last_block = block;
                }
+               if (block && (block->last == ptr)) {
+                       block = 0;
+               }
                ptr = ptr->next;
        } while(ptr != first);
 }
@@ -9495,10 +9669,11 @@ static void walk_blocks(struct compile_state *state,
 static void print_block(
        struct compile_state *state, struct block *block, void *arg)
 {
+       struct block_set *user;
        struct triple *ptr;
        FILE *fp = arg;
 
-       fprintf(fp, "\nblock: %p (%d), %p<-%p %p<-%p\n", 
+       fprintf(fp, "\nblock: %p (%d)  %p<-%p %p<-%p\n", 
                block, 
                block->vertex,
                block->left, 
@@ -9509,51 +9684,17 @@ static void print_block(
                fprintf(fp, "%p:\n", block->first);
        }
        for(ptr = block->first; ; ptr = ptr->next) {
-               struct triple_set *user;
-               int op = ptr->op;
-               
-               if (triple_stores_block(state, ptr)) {
-                       if (ptr->u.block != block) {
-                               internal_error(state, ptr, 
-                                       "Wrong block pointer: %p\n",
-                                       ptr->u.block);
-                       }
-               }
-               if (op == OP_ADECL) {
-                       for(user = ptr->use; user; user = user->next) {
-                               if (!user->member->u.block) {
-                                       internal_error(state, user->member, 
-                                               "Use %p not in a block?\n",
-                                               user->member);
-                               }
-                       }
-               }
                display_triple(fp, ptr);
-
-#if 0
-               for(user = ptr->use; user; user = user->next) {
-                       fprintf(fp, "use: %p\n", user->member);
-               }
-#endif
-
-               /* Sanity checks... */
-               valid_ins(state, ptr);
-               for(user = ptr->use; user; user = user->next) {
-                       struct triple *use;
-                       use = user->member;
-                       valid_ins(state, use);
-                       if (triple_stores_block(state, user->member) &&
-                               !user->member->u.block) {
-                               internal_error(state, user->member,
-                                       "Use %p not in a block?",
-                                       user->member);
-                       }
-               }
-
                if (ptr == block->last)
                        break;
        }
-       fprintf(fp,"\n");
+       fprintf(fp, "users %d: ", block->users);
+       for(user = block->use; user; user = user->next) {
+               fprintf(fp, "%p (%d) ", 
+                       user->member,
+                       user->member->vertex);
+       }
+       fprintf(fp,"\n\n");
 }
 
 
@@ -9579,6 +9720,9 @@ static void prune_nonblock_triples(struct compile_state *state)
                if (!block) {
                        release_triple(state, ins);
                }
+               if (block && block->last == ins) {
+                       block = 0;
+               }
                ins = next;
        } while(ins != first);
 }
@@ -9599,10 +9743,6 @@ static void setup_basic_blocks(struct compile_state *state)
        if (!state->last_block) {
                internal_error(state, 0, "end not used?");
        }
-       /* Insert an extra unused edge from start to the end 
-        * This helps with reverse control flow calculations.
-        */
-       use_block(state->first_block, state->last_block);
        /* If we are debugging print what I have just done */
        if (state->debug & DEBUG_BASIC_BLOCKS) {
                print_blocks(state, stdout);
@@ -9754,7 +9894,8 @@ static int initialize_sdblock(struct sdom_block *sd,
        return vertex;
 }
 
-static int initialize_sdpblock(struct sdom_block *sd,
+static int initialize_sdpblock(
+       struct compile_state *state, struct sdom_block *sd,
        struct block *parent, struct block *block, int vertex)
 {
        struct block_set *user;
@@ -9771,7 +9912,38 @@ static int initialize_sdpblock(struct sdom_block *sd,
        sd[vertex].ancestor = 0;
        sd[vertex].vertex   = vertex;
        for(user = block->use; user; user = user->next) {
-               vertex = initialize_sdpblock(sd, block, user->member, vertex);
+               vertex = initialize_sdpblock(state, sd, block, user->member, vertex);
+       }
+       return vertex;
+}
+
+static int setup_sdpblocks(struct compile_state *state, struct sdom_block *sd)
+{
+       struct block *block;
+       int vertex;
+       /* Setup as many sdpblocks as possible without using fake edges */
+       vertex = initialize_sdpblock(state, sd, 0, state->last_block, 0);
+
+       /* Walk through the graph and find unconnected blocks.  If 
+        * we can, add a fake edge from the unconnected blocks to the
+        * end of the graph.
+        */
+       block = state->first_block->last->next->u.block;
+       for(; block && block != state->first_block; block =  block->last->next->u.block) {
+               if (sd[block->vertex].block == block) {
+                       continue;
+               }
+               if (block->left != 0) {
+                       continue;
+               }
+
+#if DEBUG_SDP_BLOCKS
+               fprintf(stderr, "Adding %d\n", vertex +1);
+#endif
+
+               block->left = state->last_block;
+               use_block(block->left, block);
+               vertex = initialize_sdpblock(state, sd, state->last_block, block, vertex);
        }
        return vertex;
 }
@@ -10011,10 +10183,15 @@ static void find_immediate_dominators(struct compile_state *state)
 static void find_post_dominators(struct compile_state *state)
 {
        struct sdom_block *sd;
+       int vertex;
        /* Step 1 initialize the basic block information */
        sd = xcmalloc(sizeof(*sd) * (state->last_vertex + 1), "sdom_state");
 
-       initialize_sdpblock(sd, 0, state->last_block, 0);
+       vertex = setup_sdpblocks(state, sd);
+       if (vertex != state->last_vertex) {
+               internal_error(state, 0, "missing %d blocks\n",
+                       state->last_vertex - vertex);
+       }
 
        /* Step 2 compute the semidominators */
        /* Step 3 implicitly define the immediate dominator of each vertex */
@@ -10254,7 +10431,7 @@ static void insert_phi_operations(struct compile_state *state)
        int *has_already, *work;
        struct block *work_list, **work_list_tail;
        int iter;
-       struct triple *var;
+       struct triple *var, *vnext;
 
        size = sizeof(int) * (state->last_vertex + 1);
        has_already = xcmalloc(size, "has_already");
@@ -10262,16 +10439,18 @@ static void insert_phi_operations(struct compile_state *state)
        iter = 0;
 
        first = RHS(state->main_function, 0);
-       for(var = first->next; var != first ; var = var->next) {
+       for(var = first->next; var != first ; var = vnext) {
                struct block *block;
-               struct triple_set *user;
+               struct triple_set *user, *unext;
+               vnext = var->next;
                if ((var->op != OP_ADECL) || !var->use) {
                        continue;
                }
                iter += 1;
                work_list = 0;
                work_list_tail = &work_list;
-               for(user = var->use; user; user = user->next) {
+               for(user = var->use; user; user = unext) {
+                       unext = user->next;
                        if (user->member->op == OP_READ) {
                                continue;
                        }
@@ -10282,6 +10461,8 @@ static void insert_phi_operations(struct compile_state *state)
                        block = user->member->u.block;
                        if (!block) {
                                warning(state, user->member, "dead code");
+                               release_triple(state, user->member);
+                               continue;
                        }
                        if (work[block->vertex] >= iter) {
                                continue;
@@ -10419,16 +10600,30 @@ static void rename_block_variables(
                }
                /* LHS(A) */
                if (ptr->op == OP_WRITE) {
-                       struct triple *var, *val;
-                       var = LHS(ptr, 0);
-                       val = RHS(ptr, 0);
+                       struct triple *var, *val, *tval;
+                       var = RHS(ptr, 0);
+                       tval = val = RHS(ptr, 1);
                        if ((val->op == OP_WRITE) || (val->op == OP_READ)) {
-                               internal_error(state, val, "bad value in write");
+                               internal_error(state, ptr, "bad value in write");
                        }
-                       propogate_use(state, ptr, val);
+                       /* Insert a copy if the types differ */
+                       if (!equiv_types(ptr->type, val->type)) {
+                               if (val->op == OP_INTCONST) {
+                                       tval = pre_triple(state, ptr, OP_INTCONST, ptr->type, 0, 0);
+                                       tval->u.cval = val->u.cval;
+                               }
+                               else {
+                                       tval = pre_triple(state, ptr, OP_COPY, ptr->type, val, 0);
+                                       use_triple(val, tval);
+                               }
+                               unuse_triple(val, ptr);
+                               RHS(ptr, 1) = tval;
+                               use_triple(tval, ptr);
+                       }
+                       propogate_use(state, ptr, tval);
                        unuse_triple(var, ptr);
                        /* Push OP_WRITE ptr->right onto a stack of variable uses */
-                       push_triple(var, val);
+                       push_triple(var, tval);
                }
                if (ptr->op == OP_PHI) {
                        struct triple *var;
@@ -10457,9 +10652,9 @@ static void rename_block_variables(
                }
                if (ptr->op == OP_WRITE) {
                        struct triple *var;
-                       var = LHS(ptr, 0);
+                       var = RHS(ptr, 0);
                        /* Pop OP_WRITE ptr->right from the stack of variable uses */
-                       pop_triple(var, RHS(ptr, 0));
+                       pop_triple(var, RHS(ptr, 1));
                        release_triple(state, ptr);
                        continue;
                }
@@ -10611,6 +10806,7 @@ static void transform_from_ssa_form(struct compile_state *state)
                        unuse_triple(phi, use->member);
                }
 
+#warning "CHECK_ME does the OP_ADECL need to be placed somewhere that dominates all of the incoming phi edges?"
                /* A variable to replace the phi function */
                var = post_triple(state, phi, OP_ADECL, phi->type, 0,0);
                /* A read of the single value that is set into the variable */
@@ -10625,7 +10821,7 @@ static void transform_from_ssa_form(struct compile_state *state)
                for(edge = 0, set = block->use; set; set = set->next, edge++) {
                        struct block *eblock;
                        struct triple *move;
-                       struct triple *val;
+                       struct triple *val, *base;
                        eblock = set->member;
                        val = slot[edge];
                        slot[edge] = 0;
@@ -10637,20 +10833,21 @@ static void transform_from_ssa_form(struct compile_state *state)
                                continue;
                        }
                        
-                       move = post_triple(state, 
-                               val, OP_WRITE, phi->type, var, val);
+                       /* Make certain the write is placed in the edge block... */
+                       base = eblock->first;
+                       if (block_of_triple(state, val) == eblock) {
+                               base = val;
+                       }
+                       move = post_triple(state, base, OP_WRITE, phi->type, var, val);
                        use_triple(val, move);
                        use_triple(var, move);
                }               
                /* See if there are any writers of var */
                used = 0;
                for(use = var->use; use; use = use->next) {
-                       struct triple **expr;
-                       expr = triple_lhs(state, use->member, 0);
-                       for(; expr; expr = triple_lhs(state, use->member, expr)) {
-                               if (*expr == var) {
-                                       used = 1;
-                               }
+                       if ((use->member->op == OP_WRITE) &&
+                               (RHS(use->member, 0) == var)) {
+                               used = 1;
                        }
                }
                /* If var is not used free it */
@@ -10758,7 +10955,7 @@ static struct reg_info find_lhs_post_color(
        struct triple_set *set;
        struct reg_info info;
        struct triple *lhs;
-#if 0
+#if DEBUG_TRIPLE_COLOR
        fprintf(stderr, "find_lhs_post_color(%p, %d)\n",
                ins, index);
 #endif
@@ -10802,7 +10999,7 @@ static struct reg_info find_lhs_post_color(
                        info.regcm &= rinfo.regcm;
                }
        }
-#if 0
+#if DEBUG_TRIPLE_COLOR
        fprintf(stderr, "find_lhs_post_color(%p, %d) -> ( %d, %x)\n",
                ins, index, info.reg, info.regcm);
 #endif
@@ -10814,7 +11011,7 @@ static struct reg_info find_rhs_post_color(
 {
        struct reg_info info, rinfo;
        int zlhs, i;
-#if 0
+#if DEBUG_TRIPLE_COLOR
        fprintf(stderr, "find_rhs_post_color(%p, %d)\n",
                ins, index);
 #endif
@@ -10837,7 +11034,7 @@ static struct reg_info find_rhs_post_color(
                        if (tinfo.reg >= MAX_REGISTERS) {
                                tinfo.reg = REG_UNSET;
                        }
-                       info.regcm &= linfo.reg;
+                       info.regcm &= linfo.regcm;
                        info.regcm &= tinfo.regcm;
                        if (info.reg != REG_UNSET) {
                                internal_error(state, ins, "register conflict");
@@ -10848,7 +11045,7 @@ static struct reg_info find_rhs_post_color(
                        info.reg = tinfo.reg;
                }
        }
-#if 0
+#if DEBUG_TRIPLE_COLOR
        fprintf(stderr, "find_rhs_post_color(%p, %d) -> ( %d, %x)\n",
                ins, index, info.reg, info.regcm);
 #endif
@@ -10859,7 +11056,7 @@ static struct reg_info find_lhs_color(
        struct compile_state *state, struct triple *ins, int index)
 {
        struct reg_info pre, post, info;
-#if 0
+#if DEBUG_TRIPLE_COLOR
        fprintf(stderr, "find_lhs_color(%p, %d)\n",
                ins, index);
 #endif
@@ -10875,9 +11072,10 @@ static struct reg_info find_lhs_color(
        if (info.reg == REG_UNSET) {
                info.reg = post.reg;
        }
-#if 0
-       fprintf(stderr, "find_lhs_color(%p, %d) -> ( %d, %x)\n",
-               ins, index, info.reg, info.regcm);
+#if DEBUG_TRIPLE_COLOR
+       fprintf(stderr, "find_lhs_color(%p, %d) -> ( %d, %x) ... (%d, %x) (%d, %x)\n",
+               ins, index, info.reg, info.regcm,
+               pre.reg, pre.regcm, post.reg, post.regcm);
 #endif
        return info;
 }
@@ -10912,25 +11110,38 @@ static struct triple *post_copy(struct compile_state *state, struct triple *ins)
        return out;
 }
 
-static struct triple *pre_copy(
-       struct compile_state *state, struct triple *ins, int index)
+static struct triple *typed_pre_copy(
+       struct compile_state *state, struct type *type, struct triple *ins, int index)
 {
        /* Carefully insert enough operations so that I can
         * enter any operation with a GPR32.
         */
        struct triple *in;
        struct triple **expr;
+       unsigned classes;
+       struct reg_info info;
        if (ins->op == OP_PHI) {
                internal_error(state, ins, "pre_copy on a phi?");
        }
+       classes = arch_type_to_regcm(state, type);
+       info = arch_reg_rhs(state, ins, index);
        expr = &RHS(ins, index);
-       in = pre_triple(state, ins, OP_COPY, (*expr)->type, *expr, 0);
+       if ((info.regcm & classes) == 0) {
+               internal_error(state, ins, "pre_copy with no register classes");
+       }
+       in = pre_triple(state, ins, OP_COPY, type, *expr, 0);
        unuse_triple(*expr, ins);
        *expr = in;
        use_triple(RHS(in, 0), in);
        use_triple(in, ins);
        transform_to_arch_instruction(state, in);
        return in;
+       
+}
+static struct triple *pre_copy(
+       struct compile_state *state, struct triple *ins, int index)
+{
+       return typed_pre_copy(state, RHS(ins, index)->type, ins, index);
 }
 
 
@@ -10947,7 +11158,7 @@ static void insert_copies_to_phi(struct compile_state *state)
        for(phi = first->next; phi != first ; phi = phi->next) {
                struct block_set *set;
                struct block *block;
-               struct triple **slot;
+               struct triple **slot, *copy;
                int edge;
                if (phi->op != OP_PHI) {
                        continue;
@@ -10955,6 +11166,13 @@ static void insert_copies_to_phi(struct compile_state *state)
                phi->id |= TRIPLE_FLAG_POST_SPLIT;
                block = phi->u.block;
                slot  = &RHS(phi, 0);
+               /* Phi's that feed into mandatory live range joins
+                * cause nasty complications.  Insert a copy of
+                * the phi value so I never have to deal with
+                * that in the rest of the code.
+                */
+               copy = post_copy(state, phi);
+               copy->id |= TRIPLE_FLAG_PRE_SPLIT;
                /* Walk all of the incoming edges/blocks and insert moves.
                 */
                for(edge = 0, set = block->use; set; set = set->next, edge++) {
@@ -11449,6 +11667,12 @@ static void eliminate_inefectual_code(struct compile_state *state)
                if (!triple_is_pure(state, ins) || triple_is_branch(state, ins)) {
                        awaken(state, dtriple, &ins, &work_list_tail);
                }
+#if 1
+               /* Unconditionally keep the very last instruction */
+               else if (ins->next == first) {
+                       awaken(state, dtriple, &ins, &work_list_tail);
+               }
+#endif
                i++;
                ins = ins->next;
        } while(ins != first);
@@ -11567,6 +11791,14 @@ static void insert_mandatory_copies(struct compile_state *state)
                        if (regcm == 0) {
                                do_pre_copy = 1;
                        }
+                       /* Always use pre_copies for constants.
+                        * They do not take up any registers until a
+                        * copy places them in one.
+                        */
+                       if ((info.reg == REG_UNNEEDED) && 
+                               (rinfo.reg != REG_UNNEEDED)) {
+                               do_pre_copy = 1;
+                       }
                }
                do_post_copy =
                        !do_pre_copy &&
@@ -11577,7 +11809,7 @@ static void insert_mandatory_copies(struct compile_state *state)
 
                reg = info.reg;
                regcm = info.regcm;
-               /* Walk through the uses of insert and do a pre_copy or see if a post_copy is warranted */
+               /* Walk through the uses of ins and do a pre_copy or see if a post_copy is warranted */
                for(entry = ins->use; entry; entry = next) {
                        struct reg_info rinfo;
                        int i;
@@ -11705,34 +11937,168 @@ struct reg_state {
 };
 
 
-static unsigned regc_max_size(struct compile_state *state, int classes)
-{
-       unsigned max_size;
-       int i;
-       max_size = 0;
-       for(i = 0; i < MAX_REGC; i++) {
-               if (classes & (1 << i)) {
-                       unsigned size;
-                       size = arch_regc_size(state, i);
-                       if (size > max_size) {
-                               max_size = size;
-                       }
-               }
-       }
-       return max_size;
-}
 
-static int reg_is_reg(struct compile_state *state, int reg1, int reg2)
+struct print_interference_block_info {
+       struct reg_state *rstate;
+       FILE *fp;
+       int need_edges;
+};
+static void print_interference_block(
+       struct compile_state *state, struct block *block, void *arg)
+
 {
-       unsigned equivs[MAX_REG_EQUIVS];
-       int i;
-       if ((reg1 < 0) || (reg1 >= MAX_REGISTERS)) {
-               internal_error(state, 0, "invalid register");
-       }
-       if ((reg2 < 0) || (reg2 >= MAX_REGISTERS)) {
-               internal_error(state, 0, "invalid register");
-       }
-       arch_reg_equivs(state, equivs, reg1);
+       struct print_interference_block_info *info = arg;
+       struct reg_state *rstate = info->rstate;
+       FILE *fp = info->fp;
+       struct reg_block *rb;
+       struct triple *ptr;
+       int phi_present;
+       int done;
+       rb = &rstate->blocks[block->vertex];
+
+       fprintf(fp, "\nblock: %p (%d), %p<-%p %p<-%p\n", 
+               block, 
+               block->vertex,
+               block->left, 
+               block->left && block->left->use?block->left->use->member : 0,
+               block->right, 
+               block->right && block->right->use?block->right->use->member : 0);
+       if (rb->in) {
+               struct triple_reg_set *in_set;
+               fprintf(fp, "        in:");
+               for(in_set = rb->in; in_set; in_set = in_set->next) {
+                       fprintf(fp, " %-10p", in_set->member);
+               }
+               fprintf(fp, "\n");
+       }
+       phi_present = 0;
+       for(done = 0, ptr = block->first; !done; ptr = ptr->next) {
+               done = (ptr == block->last);
+               if (ptr->op == OP_PHI) {
+                       phi_present = 1;
+                       break;
+               }
+       }
+       if (phi_present) {
+               int edge;
+               for(edge = 0; edge < block->users; edge++) {
+                       fprintf(fp, "     in(%d):", edge);
+                       for(done = 0, ptr = block->first; !done; ptr = ptr->next) {
+                               struct triple **slot;
+                               done = (ptr == block->last);
+                               if (ptr->op != OP_PHI) {
+                                       continue;
+                               }
+                               slot = &RHS(ptr, 0);
+                               fprintf(fp, " %-10p", slot[edge]);
+                       }
+                       fprintf(fp, "\n");
+               }
+       }
+       if (block->first->op == OP_LABEL) {
+               fprintf(fp, "%p:\n", block->first);
+       }
+       for(done = 0, ptr = block->first; !done; ptr = ptr->next) {
+               struct live_range *lr;
+               unsigned id;
+               int op;
+               op = ptr->op;
+               done = (ptr == block->last);
+               lr = rstate->lrd[ptr->id].lr;
+               
+               id = ptr->id;
+               ptr->id = rstate->lrd[id].orig_id;
+               SET_REG(ptr->id, lr->color);
+               display_triple(fp, ptr);
+               ptr->id = id;
+
+               if (triple_is_def(state, ptr) && (lr->defs == 0)) {
+                       internal_error(state, ptr, "lr has no defs!");
+               }
+               if (info->need_edges) {
+                       if (lr->defs) {
+                               struct live_range_def *lrd;
+                               fprintf(fp, "       range:");
+                               lrd = lr->defs;
+                               do {
+                                       fprintf(fp, " %-10p", lrd->def);
+                                       lrd = lrd->next;
+                               } while(lrd != lr->defs);
+                               fprintf(fp, "\n");
+                       }
+                       if (lr->edges > 0) {
+                               struct live_range_edge *edge;
+                               fprintf(fp, "       edges:");
+                               for(edge = lr->edges; edge; edge = edge->next) {
+                                       struct live_range_def *lrd;
+                                       lrd = edge->node->defs;
+                                       do {
+                                               fprintf(fp, " %-10p", lrd->def);
+                                               lrd = lrd->next;
+                                       } while(lrd != edge->node->defs);
+                                       fprintf(fp, "|");
+                               }
+                               fprintf(fp, "\n");
+                       }
+               }
+               /* Do a bunch of sanity checks */
+               valid_ins(state, ptr);
+               if ((ptr->id < 0) || (ptr->id > rstate->defs)) {
+                       internal_error(state, ptr, "Invalid triple id: %d",
+                               ptr->id);
+               }
+       }
+       if (rb->out) {
+               struct triple_reg_set *out_set;
+               fprintf(fp, "       out:");
+               for(out_set = rb->out; out_set; out_set = out_set->next) {
+                       fprintf(fp, " %-10p", out_set->member);
+               }
+               fprintf(fp, "\n");
+       }
+       fprintf(fp, "\n");
+}
+
+static void print_interference_blocks(
+       struct compile_state *state, struct reg_state *rstate, FILE *fp, int need_edges)
+{
+       struct print_interference_block_info info;
+       info.rstate = rstate;
+       info.fp = fp;
+       info.need_edges = need_edges;
+       fprintf(fp, "\nlive variables by block\n");
+       walk_blocks(state, print_interference_block, &info);
+
+}
+
+static unsigned regc_max_size(struct compile_state *state, int classes)
+{
+       unsigned max_size;
+       int i;
+       max_size = 0;
+       for(i = 0; i < MAX_REGC; i++) {
+               if (classes & (1 << i)) {
+                       unsigned size;
+                       size = arch_regc_size(state, i);
+                       if (size > max_size) {
+                               max_size = size;
+                       }
+               }
+       }
+       return max_size;
+}
+
+static int reg_is_reg(struct compile_state *state, int reg1, int reg2)
+{
+       unsigned equivs[MAX_REG_EQUIVS];
+       int i;
+       if ((reg1 < 0) || (reg1 >= MAX_REGISTERS)) {
+               internal_error(state, 0, "invalid register");
+       }
+       if ((reg2 < 0) || (reg2 >= MAX_REGISTERS)) {
+               internal_error(state, 0, "invalid register");
+       }
+       arch_reg_equivs(state, equivs, reg1);
        for(i = 0; (i < MAX_REG_EQUIVS) && equivs[i] != REG_UNSET; i++) {
                if (equivs[i] == reg2) {
                        return 1;
@@ -12015,6 +12381,21 @@ static struct live_range *coalesce_ranges(
                internal_error(state, lr1->defs->def,
                        "cannot coalesce live ranges with dissimilar register classes");
        }
+#if DEBUG_COALESCING
+       fprintf(stderr, "coalescing:");
+       lrd = lr1->defs;
+       do {
+               fprintf(stderr, " %p", lrd->def);
+               lrd = lrd->next;
+       } while(lrd != lr1->defs);
+       fprintf(stderr, " |");
+       lrd = lr2->defs;
+       do {
+               fprintf(stderr, " %p", lrd->def);
+               lrd = lrd->next;
+       } while(lrd != lr2->defs);
+       fprintf(stderr, "\n");
+#endif
        /* If there is a clear dominate live range put it in lr1,
         * For purposes of this test phi functions are
         * considered dominated by the definitions that feed into
@@ -12050,7 +12431,6 @@ static struct live_range *coalesce_ranges(
                lr2->color);
 #endif
        
-       lr1->classes = classes;
        /* Append lr2 onto lr1 */
 #warning "FIXME should this be a merge instead of a splice?"
        /* This FIXME item applies to the correctness of live_range_end 
@@ -12138,9 +12518,9 @@ static void initialize_live_ranges(
         */
        count = count_triples(state);
        /* Potentially I need one live range definitions for each
-        * instruction, plus an extra for the split routines.
+        * instruction.
         */
-       rstate->defs = count + 1;
+       rstate->defs = count;
        /* Potentially I need one live range for each instruction
         * plus an extra for the dummy live range.
         */
@@ -12162,7 +12542,6 @@ static void initialize_live_ranges(
                        struct reg_info info;
                        /* Find the architecture specific color information */
                        info = find_def_color(state, ins);
-
                        i++;
                        rstate->lr[i].defs    = &rstate->lrd[j];
                        rstate->lr[i].color   = info.reg;
@@ -12186,7 +12565,6 @@ static void initialize_live_ranges(
                ins = ins->next;
        } while(ins != first);
        rstate->ranges = i;
-       rstate->defs -= 1;
 
        /* Make a second pass to handle achitecture specific register
         * constraints.
@@ -12204,7 +12582,11 @@ static void initialize_live_ranges(
                        zlhs = 1;
                }
                zrhs = TRIPLE_RHS(ins->sizes);
-               
+
+#if DEBUG_COALESCING > 1
+               fprintf(stderr, "mandatory coalesce: %p %d %d\n",
+                       ins, zlhs, zrhs);
+#endif         
                for(i = 0; i < zlhs; i++) {
                        struct reg_info linfo;
                        struct live_range_def *lhs;
@@ -12217,6 +12599,11 @@ static void initialize_live_ranges(
                        } else {
                                lhs = &rstate->lrd[LHS(ins, i)->id];
                        }
+#if DEBUG_COALESCING > 1
+                       fprintf(stderr, "coalesce lhs(%d): %p %d\n",
+                               i, lhs, linfo.reg);
+               
+#endif         
                        for(j = 0; j < zrhs; j++) {
                                struct reg_info rinfo;
                                struct live_range_def *rhs;
@@ -12224,7 +12611,12 @@ static void initialize_live_ranges(
                                if (rinfo.reg < MAX_REGISTERS) {
                                        continue;
                                }
-                               rhs = &rstate->lrd[RHS(ins, i)->id];
+                               rhs = &rstate->lrd[RHS(ins, j)->id];
+#if DEBUG_COALESCING > 1
+                               fprintf(stderr, "coalesce rhs(%d): %p %d\n",
+                                       j, rhs, rinfo.reg);
+               
+#endif         
                                if (rinfo.reg == linfo.reg) {
                                        coalesce_ranges(state, rstate, 
                                                lhs->lr, rhs->lr);
@@ -12516,6 +12908,7 @@ static void fix_coalesce_conflicts(struct compile_state *state,
        struct reg_block *blocks, struct triple_reg_set *live,
        struct reg_block *rb, struct triple *ins, void *arg)
 {
+       int *conflicts = arg;
        int zlhs, zrhs, i, j;
 
        /* See if we have a mandatory coalesce operation between
@@ -12555,12 +12948,22 @@ static void fix_coalesce_conflicts(struct compile_state *state,
                                struct triple *copy;
                                copy = pre_copy(state, ins, j);
                                copy->id |= TRIPLE_FLAG_PRE_SPLIT;
+                               (*conflicts)++;
                        }
                }
        }
        return;
 }
 
+static int correct_coalesce_conflicts(
+       struct compile_state *state, struct reg_block *blocks)
+{
+       int conflicts;
+       conflicts = 0;
+       walk_variable_lifetimes(state, blocks, fix_coalesce_conflicts, &conflicts);
+       return conflicts;
+}
+
 static void replace_set_use(struct compile_state *state,
        struct triple_reg_set *head, struct triple *orig, struct triple *new)
 {
@@ -12733,317 +13136,85 @@ static int correct_tangles(
        return tangles;
 }
 
-struct least_conflict {
-       struct reg_state *rstate;
-       struct live_range *ref_range;
-       struct triple *ins;
-       struct triple_reg_set *live;
-       size_t count;
-       int constraints;
-};
-static void least_conflict(struct compile_state *state,
-       struct reg_block *blocks, struct triple_reg_set *live,
-       struct reg_block *rb, struct triple *ins, void *arg)
-{
-       struct least_conflict *conflict = arg;
-       struct live_range_edge *edge;
-       struct triple_reg_set *set;
-       size_t count;
-       int constraints;
-
-#warning "FIXME handle instructions with left hand sides..."
-       /* Only instructions that introduce a new definition
-        * can be the conflict instruction.
-        */
-       if (!triple_is_def(state, ins)) {
-               return;
-       }
-
-       /* See if live ranges at this instruction are a
-        * strict subset of the live ranges that are in conflict.
-        */
-       count = 0;
-       for(set = live; set; set = set->next) {
-               struct live_range *lr;
-               lr = conflict->rstate->lrd[set->member->id].lr;
-               /* Ignore it if there cannot be an edge between these two nodes */
-               if (!arch_regcm_intersect(conflict->ref_range->classes, lr->classes)) {
-                       continue;
-               }
-               for(edge = conflict->ref_range->edges; edge; edge = edge->next) {
-                       if (edge->node == lr) {
-                               break;
-                       }
-               }
-               if (!edge && (lr != conflict->ref_range)) {
-                       return;
-               }
-               count++;
-       }
-       if (count <= 1) {
-               return;
-       }
 
-#if 0
-       /* See if there is an uncolored member in this subset. 
-        */
-        for(set = live; set; set = set->next) {
-               struct live_range *lr;
-               lr = conflict->rstate->lrd[set->member->id].lr;
-               if (lr->color == REG_UNSET) {
-                       break;
-               }
-       }
-       if (!set && (conflict->ref_range != REG_UNSET)) {
-               return;
-       }
-#endif
+static void ids_from_rstate(struct compile_state *state, struct reg_state *rstate);
+static void cleanup_rstate(struct compile_state *state, struct reg_state *rstate);
 
-       /* See if any of the live registers are constrained,
-        * if not it won't be productive to pick this as
-        * a conflict instruction.
-        */
-       constraints = 0;
-       for(set = live; set; set = set->next) {
-               struct triple_set *uset;
+struct triple *find_constrained_def(
+       struct compile_state *state, struct live_range *range, struct triple *constrained)
+{
+       struct live_range_def *lrd;
+       lrd = range->defs;
+       do {
                struct reg_info info;
-               unsigned classes;
-               unsigned cur_size, size;
-               /* Skip this instruction */
-               if (set->member == ins) {
-                       continue;
-               }
-               /* Find how many registers this value can potentially 
-                * be assigned to.
+               unsigned regcm;
+               int is_constrained;
+               regcm = arch_type_to_regcm(state, lrd->def->type);
+               info = find_lhs_color(state, lrd->def, 0);
+               regcm      = arch_regcm_reg_normalize(state, regcm);
+               info.regcm = arch_regcm_reg_normalize(state, info.regcm);
+               /* If the 2 register class masks are not equal the
+                * the current register class is constrained.
                 */
-               classes = arch_type_to_regcm(state, set->member->type);
-               size = regc_max_size(state, classes);
+               is_constrained = regcm != info.regcm;
                
-               /* Find how many registers we allow this value to
-                * be assigned to.
-                */
-               info = arch_reg_lhs(state, set->member, 0);
-               
-               /* If the value does not live in a register it
-                * isn't constrained.
-                */
-               if (info.reg == REG_UNNEEDED) {
-                       continue;
-               }
-               
-               if ((info.reg == REG_UNSET) || (info.reg >= MAX_REGISTERS)) {
-                       cur_size = regc_max_size(state, info.regcm);
-               } else {
-                       cur_size = 1;
-               }
-
-               /* If there is no difference between potential and
-                * actual register count there is not a constraint
+               /* Of the constrained live ranges deal with the
+                * least dominated one first.
                 */
-               if (cur_size >= size) {
-                       continue;
-               }
-               
-               /* If this live_range feeds into conflict->inds
-                * it isn't a constraint we can relieve.
-                */
-               for(uset = set->member->use; uset; uset = uset->next) {
-                       if (uset->member == ins) {
-                               break;
-                       }
-               }
-               if (uset) {
-                       continue;
-               }
-               constraints = 1;
-               break;
-       }
-       /* Don't drop canidates with constraints */
-       if (conflict->constraints && !constraints) {
-               return;
-       }
-
-
-#if 0
-       fprintf(stderr, "conflict ins? %p %s count: %d constraints: %d\n",
-               ins, tops(ins->op), count, constraints);
+               if (is_constrained) {
+#if DEBUG_RANGE_CONFLICTS
+                       fprintf(stderr, "canidate: %p %-8s regcm: %x %x\n",
+                               lrd->def, tops(lrd->def->op), regcm, info.regcm);
 #endif
-       /* Find the instruction with the largest possible subset of
-        * conflict ranges and that dominates any other instruction
-        * with an equal sized set of conflicting ranges.
-        */
-       if ((count > conflict->count) ||
-               ((count == conflict->count) &&
-                       tdominates(state, ins, conflict->ins))) {
-               struct triple_reg_set *next;
-               /* Remember the canidate instruction */
-               conflict->ins = ins;
-               conflict->count = count;
-               conflict->constraints = constraints;
-               /* Free the old collection of live registers */
-               for(set = conflict->live; set; set = next) {
-                       next = set->next;
-                       do_triple_unset(&conflict->live, set->member);
-               }
-               conflict->live = 0;
-               /* Rember the registers that are alive but do not feed
-                * into or out of conflict->ins.
-                */
-               for(set = live; set; set = set->next) {
-                       struct triple **expr;
-                       if (set->member == ins) {
-                               goto next;
-                       }
-                       expr = triple_rhs(state, ins, 0);
-                       for(;expr; expr = triple_rhs(state, ins, expr)) {
-                               if (*expr == set->member) {
-                                       goto next;
-                               }
+                       if (!constrained || 
+                               tdominates(state, lrd->def, constrained))
+                       {
+                               constrained = lrd->def;
                        }
-                       expr = triple_lhs(state, ins, 0);
-                       for(; expr; expr = triple_lhs(state, ins, expr)) {
-                               if (*expr == set->member) {
-                                       goto next;
-                               }
-                       }
-                       do_triple_set(&conflict->live, set->member, set->new);
-               next:
-                       ;
                }
-       }
-       return;
+               lrd = lrd->next;
+       } while(lrd != range->defs);
+       return constrained;
 }
 
-static void find_range_conflict(struct compile_state *state,
-       struct reg_state *rstate, char *used, struct live_range *ref_range,
-       struct least_conflict *conflict)
+static int split_constrained_ranges(
+       struct compile_state *state, struct reg_state *rstate, 
+       struct live_range *range)
 {
-
-       /* there are 3 kinds ways conflicts can occure.
-        * 1) the life time of 2 values simply overlap.
-        * 2) the 2 values feed into the same instruction.
-        * 3) the 2 values feed into a phi function.
-        */
-
-       /* find the instruction where the problematic conflict comes
-        * into existance.  that the instruction where all of
-        * the values are alive, and among such instructions it is
-        * the least dominated one.
-        *
-        * a value is alive an an instruction if either;
-        * 1) the value defintion dominates the instruction and there
-        *    is a use at or after that instrction
-        * 2) the value definition feeds into a phi function in the
-        *    same block as the instruction.  and the phi function
-        *    is at or after the instruction.
+       /* Walk through the edges in conflict and our current live
+        * range, and find definitions that are more severly constrained
+        * than they type of data they contain require.
+        * 
+        * Then pick one of those ranges and relax the constraints.
         */
-       memset(conflict, 0, sizeof(*conflict));
-       conflict->rstate      = rstate;
-       conflict->ref_range   = ref_range;
-       conflict->ins         = 0;
-       conflict->live        = 0;
-       conflict->count       = 0;
-       conflict->constraints = 0;
-       walk_variable_lifetimes(state, rstate->blocks, least_conflict, conflict);
+       struct live_range_edge *edge;
+       struct triple *constrained;
 
-       if (!conflict->ins) {
-               internal_error(state, ref_range->defs->def, "No conflict ins?");
+       constrained = 0;
+       for(edge = range->edges; edge; edge = edge->next) {
+               constrained = find_constrained_def(state, edge->node, constrained);
        }
-       if (!conflict->live) {
-               internal_error(state, ref_range->defs->def, "No conflict live?");
+       if (!constrained) {
+               constrained = find_constrained_def(state, range, constrained);
        }
-#if 0
-       fprintf(stderr, "conflict ins: %p %s count: %d constraints: %d\n", 
-               conflict->ins, tops(conflict->ins->op),
-               conflict->count, conflict->constraints);
+#if DEBUG_RANGE_CONFLICTS
+       fprintf(stderr, "constrained: %p %-8s\n",
+               constrained, tops(constrained->op));
 #endif
-       return;
-}
-
-static struct triple *split_constrained_range(struct compile_state *state, 
-       struct reg_state *rstate, char *used, struct least_conflict *conflict)
-{
-       unsigned constrained_size;
-       struct triple *new, *constrained;
-       struct triple_reg_set *cset;
-       /* Find a range that is having problems because it is
-        * artificially constrained.
-        */
-       constrained_size = ~0;
-       constrained = 0;
-       new = 0;
-       for(cset = conflict->live; cset; cset = cset->next) {
-               struct triple_set *set;
-               struct reg_info info;
-               unsigned classes;
-               unsigned cur_size, size;
-               /* Skip the live range that starts with conflict->ins */
-               if (cset->member == conflict->ins) {
-                       continue;
-               }
-               /* Find how many registers this value can potentially
-                * be assigned to.
-                */
-               classes = arch_type_to_regcm(state, cset->member->type);
-               size = regc_max_size(state, classes);
-
-               /* Find how many registers we allow this value to
-                * be assigned to.
-                */
-               info = arch_reg_lhs(state, cset->member, 0);
-
-               /* If the register doesn't need a register 
-                * splitting it can't help.
-                */
-               if (info.reg == REG_UNNEEDED) {
-                       continue;
-               }
-#warning "FIXME do I need a call to arch_reg_rhs around here somewhere?"
-               if ((info.reg == REG_UNSET) || (info.reg >= MAX_REGISTERS)) {
-                       cur_size = regc_max_size(state, info.regcm);
-               } else {
-                       cur_size = 1;
-               }
-               /* If this live_range feeds into conflict->ins
-                * splitting it is unlikely to help.
-                */
-               for(set = cset->member->use; set; set = set->next) {
-                       if (set->member == conflict->ins) {
-                               goto next;
-                       }
-               }
-
-               /* If there is no difference between potential and
-                * actual register count there is nothing to do.
-                */
-               if (cur_size >= size) {
-                       continue;
-               }
-               /* Of the constrained registers deal with the
-                * most constrained one first.
-                */
-               if (!constrained ||
-                       (size < constrained_size)) {
-                       constrained = cset->member;
-                       constrained_size = size;
-               }
-       next:
-               ;
-       }
        if (constrained) {
-               new = post_copy(state, constrained);
-               new->id |= TRIPLE_FLAG_POST_SPLIT;
+               ids_from_rstate(state, rstate);
+               cleanup_rstate(state, rstate);
+               resolve_tangle(state, constrained);
        }
-       return new;
+       return !!constrained;
 }
-
+       
 static int split_ranges(
-       struct compile_state *state, struct reg_state *rstate, 
+       struct compile_state *state, struct reg_state *rstate,
        char *used, struct live_range *range)
 {
-       struct triple *new;
-
-#if 0
+       int split;
+#if DEBUG_RANGE_CONFLICTS
        fprintf(stderr, "split_ranges %d %s %p\n", 
                rstate->passes, tops(range->defs->def->op), range->defs->def);
 #endif
@@ -13051,71 +13222,41 @@ static int split_ranges(
                (rstate->passes >= rstate->max_passes)) {
                return 0;
        }
-       new = 0;
-       /* If I can't allocate a register something needs to be split */
-       if (arch_select_free_register(state, used, range->classes) == REG_UNSET) {
-               struct least_conflict conflict;
-
-#if 0
-       fprintf(stderr, "find_range_conflict\n");
-#endif
-               /* Find where in the set of registers the conflict
-                * actually occurs.
-                */
-               find_range_conflict(state, rstate, used, range, &conflict);
+       split = split_constrained_ranges(state, rstate, range);
 
-               /* If a range has been artifically constrained split it */
-               new = split_constrained_range(state, rstate, used, &conflict);
-               
-               if (!new) {
-               /* Ideally I would split the live range that will not be used
-                * for the longest period of time in hopes that this will 
-                * (a) allow me to spill a register or
-                * (b) allow me to place a value in another register.
-                *
-                * So far I don't have a test case for this, the resolving
-                * of mandatory constraints has solved all of my
-                * know issues.  So I have choosen not to write any
-                * code until I cat get a better feel for cases where
-                * it would be useful to have.
-                *
-                */
+       /* Ideally I would split the live range that will not be used
+        * for the longest period of time in hopes that this will 
+        * (a) allow me to spill a register or
+        * (b) allow me to place a value in another register.
+        *
+        * So far I don't have a test case for this, the resolving
+        * of mandatory constraints has solved all of my
+        * know issues.  So I have choosen not to write any
+        * code until I cat get a better feel for cases where
+        * it would be useful to have.
+        *
+        */
 #warning "WISHLIST implement live range splitting..."
-#if 0
-                       print_blocks(state, stderr);
-                       print_dominators(state, stderr);
-
-#endif
-                       return 0;
-               }
-       }
-       if (new) {
-               rstate->lrd[rstate->defs].orig_id = new->id;
-               new->id = rstate->defs;
-               rstate->defs++;
-#if 0
-               fprintf(stderr, "new: %p old: %s %p\n", 
-                       new, tops(RHS(new, 0)->op), RHS(new, 0));
-#endif
-#if 0
-               print_blocks(state, stderr);
+       if ((DEBUG_RANGE_CONFLICTS > 1) && 
+               (!split || (DEBUG_RANGE_CONFLICTS > 2))) {
+               print_interference_blocks(state, rstate, stderr, 0);
                print_dominators(state, stderr);
-
-#endif
-               return 1;
        }
-       return 0;
+       return split;
 }
 
 #if DEBUG_COLOR_GRAPH > 1
 #define cgdebug_printf(...) fprintf(stdout, __VA_ARGS__)
 #define cgdebug_flush() fflush(stdout)
+#define cgdebug_loc(STATE, TRIPLE) loc(stdout, STATE, TRIPLE)
 #elif DEBUG_COLOR_GRAPH == 1
 #define cgdebug_printf(...) fprintf(stderr, __VA_ARGS__)
 #define cgdebug_flush() fflush(stderr)
+#define cgdebug_loc(STATE, TRIPLE) loc(stderr, STATE, TRIPLE)
 #else
 #define cgdebug_printf(...)
 #define cgdebug_flush()
+#define cgdebug_loc(STATE, TRIPLE)
 #endif
 
        
@@ -13161,8 +13302,6 @@ static int select_free_color(struct compile_state *state,
        }       
 #endif
 
-#warning "FIXME detect conflicts caused by the source and destination being the same register"
-
        /* If a color is already assigned see if it will work */
        if (range->color != REG_UNSET) {
                struct live_range_def *lrd;
@@ -13203,6 +13342,7 @@ static int select_free_color(struct compile_state *state,
                entry = lrd->def->use;
                for(;(range->color == REG_UNSET) && entry; entry = entry->next) {
                        struct live_range_def *insd;
+                       unsigned regcm;
                        insd = &rstate->lrd[entry->member->id];
                        if (insd->lr->defs == 0) {
                                continue;
@@ -13211,8 +13351,11 @@ static int select_free_color(struct compile_state *state,
                                !interfere(rstate, range, insd->lr)) {
                                phi = insd;
                        }
-                       if ((insd->lr->color == REG_UNSET) ||
-                               ((insd->lr->classes & range->classes) == 0) ||
+                       if (insd->lr->color == REG_UNSET) {
+                               continue;
+                       }
+                       regcm = insd->lr->classes;
+                       if (((regcm & range->classes) == 0) ||
                                (used[insd->lr->color])) {
                                continue;
                        }
@@ -13235,12 +13378,16 @@ static int select_free_color(struct compile_state *state,
                        expr = triple_rhs(state, phi->def, 0);
                        for(; expr; expr = triple_rhs(state, phi->def, expr)) {
                                struct live_range *lr;
+                               unsigned regcm;
                                if (!*expr) {
                                        continue;
                                }
                                lr = rstate->lrd[(*expr)->id].lr;
-                               if ((lr->color == REG_UNSET) || 
-                                       ((lr->classes & range->classes) == 0) ||
+                               if (lr->color == REG_UNSET) {
+                                       continue;
+                               }
+                               regcm = lr->classes;
+                               if (((regcm & range->classes) == 0) ||
                                        (used[lr->color])) {
                                        continue;
                                }
@@ -13257,12 +13404,16 @@ static int select_free_color(struct compile_state *state,
                expr = triple_rhs(state, lrd->def, 0);
                for(; expr; expr = triple_rhs(state, lrd->def, expr)) {
                        struct live_range *lr;
+                       unsigned regcm;
                        if (!*expr) {
                                continue;
                        }
-                       lr = rstate->lrd[(*expr)->id].lr;
-                       if ((lr->color == -1) || 
-                               ((lr->classes & range->classes) == 0) ||
+                       lr = rstate->lrd[(*expr)->id].lr;
+                       if (lr->color == REG_UNSET) {
+                               continue;
+                       }
+                       regcm = lr->classes;
+                       if (((regcm & range->classes) == 0) ||
                                (used[lr->color])) {
                                continue;
                        }
@@ -13291,16 +13442,16 @@ static int select_free_color(struct compile_state *state,
                                arch_reg_str(edge->node->color));
                        lrd = edge->node->defs;
                        do {
-                               warning(state, lrd->def, " %s",
-                                       tops(lrd->def->op));
+                               warning(state, lrd->def, " %s %p",
+                                       tops(lrd->def->op), lrd->def);
                                lrd = lrd->next;
                        } while(lrd != edge->node->defs);
                }
                warning(state, range->defs->def, "range: ");
                lrd = range->defs;
                do {
-                       warning(state, lrd->def, " %s",
-                               tops(lrd->def->op));
+                       warning(state, lrd->def, " %s %p",
+                               tops(lrd->def->op), lrd->def);
                        lrd = lrd->next;
                } while(lrd != range->defs);
                        
@@ -13318,8 +13469,8 @@ static int select_free_color(struct compile_state *state,
                internal_error(state, range->defs->def, "too few registers");
 #endif
        }
-       range->classes = arch_reg_regcm(state, range->color);
-       if (range->color == -1) {
+       range->classes &= arch_reg_regcm(state, range->color);
+       if ((range->color == REG_UNSET) || (range->classes == 0)) {
                internal_error(state, range->defs->def, "select_free_color did not?");
        }
        return 1;
@@ -13398,9 +13549,8 @@ static int color_graph(struct compile_state *state, struct reg_state *rstate)
        }
        colored = color_graph(state, rstate);
        if (colored) {
-               cgdebug_printf("Coloring %d @%s:%d.%d:", 
-                       range - rstate->lr,
-                       range->def->filename, range->def->line, range->def->col);
+               cgdebug_printf("Coloring %d @", range - rstate->lr);
+               cgdebug_loc(state, range->defs->def);
                cgdebug_flush();
                colored = select_free_color(state, rstate, range);
                cgdebug_printf(" %s\n", arch_reg_str(range->color));
@@ -13462,154 +13612,6 @@ static void color_triples(struct compile_state *state, struct reg_state *rstate)
        } while (ins != first);
 }
 
-static void print_interference_block(
-       struct compile_state *state, struct block *block, void *arg)
-
-{
-       struct reg_state *rstate = arg;
-       struct reg_block *rb;
-       struct triple *ptr;
-       int phi_present;
-       int done;
-       rb = &rstate->blocks[block->vertex];
-
-       printf("\nblock: %p (%d), %p<-%p %p<-%p\n", 
-               block, 
-               block->vertex,
-               block->left, 
-               block->left && block->left->use?block->left->use->member : 0,
-               block->right, 
-               block->right && block->right->use?block->right->use->member : 0);
-       if (rb->in) {
-               struct triple_reg_set *in_set;
-               printf("        in:");
-               for(in_set = rb->in; in_set; in_set = in_set->next) {
-                       printf(" %-10p", in_set->member);
-               }
-               printf("\n");
-       }
-       phi_present = 0;
-       for(done = 0, ptr = block->first; !done; ptr = ptr->next) {
-               done = (ptr == block->last);
-               if (ptr->op == OP_PHI) {
-                       phi_present = 1;
-                       break;
-               }
-       }
-       if (phi_present) {
-               int edge;
-               for(edge = 0; edge < block->users; edge++) {
-                       printf("     in(%d):", edge);
-                       for(done = 0, ptr = block->first; !done; ptr = ptr->next) {
-                               struct triple **slot;
-                               done = (ptr == block->last);
-                               if (ptr->op != OP_PHI) {
-                                       continue;
-                               }
-                               slot = &RHS(ptr, 0);
-                               printf(" %-10p", slot[edge]);
-                       }
-                       printf("\n");
-               }
-       }
-       if (block->first->op == OP_LABEL) {
-               printf("%p:\n", block->first);
-       }
-       for(done = 0, ptr = block->first; !done; ptr = ptr->next) {
-               struct triple_set *user;
-               struct live_range *lr;
-               unsigned id;
-               int op;
-               op = ptr->op;
-               done = (ptr == block->last);
-               lr = rstate->lrd[ptr->id].lr;
-               
-               if (triple_stores_block(state, ptr)) {
-                       if (ptr->u.block != block) {
-                               internal_error(state, ptr, 
-                                       "Wrong block pointer: %p",
-                                       ptr->u.block);
-                       }
-               }
-               if (op == OP_ADECL) {
-                       for(user = ptr->use; user; user = user->next) {
-                               if (!user->member->u.block) {
-                                       internal_error(state, user->member, 
-                                               "Use %p not in a block?",
-                                               user->member);
-                               }
-                               
-                       }
-               }
-               id = ptr->id;
-               ptr->id = rstate->lrd[id].orig_id;
-               SET_REG(ptr->id, lr->color);
-               display_triple(stdout, ptr);
-               ptr->id = id;
-
-               if (triple_is_def(state, ptr) && (lr->defs == 0)) {
-                       internal_error(state, ptr, "lr has no defs!");
-               }
-
-               if (lr->defs) {
-                       struct live_range_def *lrd;
-                       printf("       range:");
-                       lrd = lr->defs;
-                       do {
-                               printf(" %-10p", lrd->def);
-                               lrd = lrd->next;
-                       } while(lrd != lr->defs);
-                       printf("\n");
-               }
-               if (lr->edges > 0) {
-                       struct live_range_edge *edge;
-                       printf("       edges:");
-                       for(edge = lr->edges; edge; edge = edge->next) {
-                               struct live_range_def *lrd;
-                               lrd = edge->node->defs;
-                               do {
-                                       printf(" %-10p", lrd->def);
-                                       lrd = lrd->next;
-                               } while(lrd != edge->node->defs);
-                               printf("|");
-                       }
-                       printf("\n");
-               }
-               /* Do a bunch of sanity checks */
-               valid_ins(state, ptr);
-               if ((ptr->id < 0) || (ptr->id > rstate->defs)) {
-                       internal_error(state, ptr, "Invalid triple id: %d",
-                               ptr->id);
-               }
-               for(user = ptr->use; user; user = user->next) {
-                       struct triple *use;
-                       struct live_range *ulr;
-                       use = user->member;
-                       valid_ins(state, use);
-                       if ((use->id < 0) || (use->id > rstate->defs)) {
-                               internal_error(state, use, "Invalid triple id: %d",
-                                       use->id);
-                       }
-                       ulr = rstate->lrd[user->member->id].lr;
-                       if (triple_stores_block(state, user->member) &&
-                               !user->member->u.block) {
-                               internal_error(state, user->member,
-                                       "Use %p not in a block?",
-                                       user->member);
-                       }
-               }
-       }
-       if (rb->out) {
-               struct triple_reg_set *out_set;
-               printf("       out:");
-               for(out_set = rb->out; out_set; out_set = out_set->next) {
-                       printf(" %-10p", out_set->member);
-               }
-               printf("\n");
-       }
-       printf("\n");
-}
-
 static struct live_range *merge_sort_lr(
        struct live_range *first, struct live_range *last)
 {
@@ -13724,10 +13726,11 @@ static void allocate_registers(struct compile_state *state)
 
        do {
                struct live_range **point, **next;
+               int conflicts;
                int tangles;
                int coalesced;
 
-#if 0
+#if DEBUG_RANGE_CONFLICTS
                fprintf(stderr, "pass: %d\n", rstate.passes);
 #endif
 
@@ -13741,8 +13744,7 @@ static void allocate_registers(struct compile_state *state)
                rstate.blocks = compute_variable_lifetimes(state);
 
                /* Fix invalid mandatory live range coalesce conflicts */
-               walk_variable_lifetimes(
-                       state, rstate.blocks, fix_coalesce_conflicts, 0);
+               conflicts = correct_coalesce_conflicts(state, rstate.blocks);
 
                /* Fix two simultaneous uses of the same register.
                 * In a few pathlogical cases a partial untangle moves
@@ -13770,7 +13772,7 @@ static void allocate_registers(struct compile_state *state)
                 *  yields some benefit.
                 */
                do {
-#if 0
+#if DEBUG_COALESCING
                        fprintf(stderr, "coalescing\n");
 #endif                 
                        /* Remove any previous live edge calculations */
@@ -13782,8 +13784,7 @@ static void allocate_registers(struct compile_state *state)
                        
                        /* Display the interference graph if desired */
                        if (state->debug & DEBUG_INTERFERENCE) {
-                               printf("\nlive variables by block\n");
-                               walk_blocks(state, print_interference_block, &rstate);
+                               print_interference_blocks(state, &rstate, stdout, 1);
                                printf("\nlive variables by instruction\n");
                                walk_variable_lifetimes(
                                        state, rstate.blocks, 
@@ -13792,7 +13793,7 @@ static void allocate_registers(struct compile_state *state)
                        
                        coalesced = coalesce_live_ranges(state, &rstate);
 
-#if 0
+#if DEBUG_COALESCING
                        fprintf(stderr, "coalesced: %d\n", coalesced);
 #endif
                } while(coalesced);
@@ -14639,24 +14640,99 @@ static void verify_uses(struct compile_state *state)
                ins = ins->next;
        } while(ins != first);
        
+}
+static void verify_blocks_present(struct compile_state *state)
+{
+       struct triple *first, *ins;
+       if (!state->first_block) {
+               return;
+       }
+       first = RHS(state->main_function, 0);
+       ins = first;
+       do {
+               valid_ins(state, ins);
+               if (triple_stores_block(state, ins)) {
+                       if (!ins->u.block) {
+                               internal_error(state, ins, 
+                                       "%p not in a block?\n", ins);
+                       }
+               }
+               ins = ins->next;
+       } while(ins != first);
+       
+       
 }
 static void verify_blocks(struct compile_state *state)
 {
        struct triple *ins;
        struct block *block;
+       int blocks;
        block = state->first_block;
        if (!block) {
                return;
        }
+       blocks = 0;
        do {
+               int users;
+               struct block_set *user;
+               blocks++;
                for(ins = block->first; ins != block->last->next; ins = ins->next) {
-                       if (!triple_stores_block(state, ins)) {
+                       if (triple_stores_block(state, ins) && (ins->u.block != block)) {
+                               internal_error(state, ins, "inconsitent block specified");
+                       }
+                       valid_ins(state, ins);
+               }
+               users = 0;
+               for(user = block->use; user; user = user->next) {
+                       users++;
+                       if ((block == state->last_block) &&
+                               (user->member == state->first_block)) {
                                continue;
                        }
-                       if (ins->u.block != block) {
-                               internal_error(state, ins, "inconsitent block specified");
+                       if ((user->member->left != block) &&
+                               (user->member->right != block)) {
+                               internal_error(state, user->member->first,
+                                       "user does not use block");
+                       }
+               }
+               if (triple_is_branch(state, block->last) &&
+                       (block->right != block_of_triple(state, TARG(block->last, 0))))
+               {
+                       internal_error(state, block->last, "block->right != TARG(0)");
+               }
+               if (!triple_is_uncond_branch(state, block->last) &&
+                       (block != state->last_block) &&
+                       (block->left != block_of_triple(state, block->last->next)))
+               {
+                       internal_error(state, block->last, "block->left != block->last->next");
+               }
+               if (block->left) {
+                       for(user = block->left->use; user; user = user->next) {
+                               if (user->member == block) {
+                                       break;
+                               }
+                       }
+                       if (!user || user->member != block) {
+                               internal_error(state, block->first,
+                                       "block does not use left");
+                       }
+               }
+               if (block->right) {
+                       for(user = block->right->use; user; user = user->next) {
+                               if (user->member == block) {
+                                       break;
+                               }
+                       }
+                       if (!user || user->member != block) {
+                               internal_error(state, block->first,
+                                       "block does not use right");
                        }
                }
+               if (block->users != users) {
+                       internal_error(state, block->first, 
+                               "computed users %d != stored users %d\n",
+                               users, block->users);
+               }
                if (!triple_stores_block(state, block->last->next)) {
                        internal_error(state, block->last->next, 
                                "cannot find next block");
@@ -14667,6 +14743,10 @@ static void verify_blocks(struct compile_state *state)
                                "bad next block");
                }
        } while(block != state->first_block);
+       if (blocks != state->last_vertex) {
+               internal_error(state, 0, "computed blocks != stored blocks %d\n",
+                       blocks, state->last_vertex);
+       }
 }
 
 static void verify_domination(struct compile_state *state)
@@ -14711,9 +14791,6 @@ static void verify_piece(struct compile_state *state)
                struct triple *ptr;
                int lhs, i;
                lhs = TRIPLE_LHS(ins->sizes);
-               if ((ins->op == OP_WRITE) || (ins->op == OP_STORE)) {
-                       lhs = 0;
-               }
                for(ptr = ins->next, i = 0; i < lhs; i++, ptr = ptr->next) {
                        if (ptr != LHS(ins, i)) {
                                internal_error(state, ins, "malformed lhs on %s",
@@ -14744,6 +14821,7 @@ static void verify_ins_colors(struct compile_state *state)
 static void verify_consistency(struct compile_state *state)
 {
        verify_uses(state);
+       verify_blocks_present(state);
        verify_blocks(state);
        verify_domination(state);
        verify_piece(state);
@@ -14768,8 +14846,19 @@ static void optimize(struct compile_state *state)
        setup_basic_blocks(state);
        analyze_idominators(state);
        analyze_ipdominators(state);
-       /* Transform the code to ssa form */
+
+       /* Transform the code to ssa form. */
+       /*
+        * The transformation to ssa form puts a phi function
+        * on each of edge of a dominance frontier where that
+        * phi function might be needed.  At -O2 if we don't
+        * eleminate the excess phi functions we can get an
+        * exponential code size growth.  So I kill the extra
+        * phi functions early and I kill them often.
+        */
        transform_to_ssa_form(state);
+       eliminate_inefectual_code(state);
+
        verify_consistency(state);
        if (state->debug & DEBUG_CODE_ELIMINATION) {
                fprintf(stdout, "After transform_to_ssa_form\n");
@@ -14778,11 +14867,21 @@ static void optimize(struct compile_state *state)
        /* Do strength reduction and simple constant optimizations */
        if (state->optimize >= 1) {
                simplify_all(state);
+               transform_from_ssa_form(state);
+               free_basic_blocks(state);
+               setup_basic_blocks(state);
+               analyze_idominators(state);
+               analyze_ipdominators(state);
+               transform_to_ssa_form(state);
+               eliminate_inefectual_code(state);
+       }
+       if (state->debug & DEBUG_CODE_ELIMINATION) {
+               fprintf(stdout, "After simplify_all\n");
+               print_blocks(state, stdout);
        }
        verify_consistency(state);
        /* Propogate constants throughout the code */
        if (state->optimize >= 2) {
-#warning "FIXME fix scc_transform"
                scc_transform(state);
                transform_from_ssa_form(state);
                free_basic_blocks(state);
@@ -14790,6 +14889,7 @@ static void optimize(struct compile_state *state)
                analyze_idominators(state);
                analyze_ipdominators(state);
                transform_to_ssa_form(state);
+               eliminate_inefectual_code(state);
        }
        verify_consistency(state);
 #warning "WISHLIST implement single use constants (least possible register pressure)"
@@ -14905,37 +15005,41 @@ static void print_op_asm(struct compile_state *state,
 #define CPU_DEFAULT  CPU_I386
 
 /* The x86 register classes */
-#define REGC_FLAGS    0
-#define REGC_GPR8     1
-#define REGC_GPR16    2
-#define REGC_GPR32    3
-#define REGC_GPR64    4
-#define REGC_MMX      5
-#define REGC_XMM      6
-#define REGC_GPR32_8  7
-#define REGC_GPR16_8  8
-#define REGC_IMM32    9
-#define REGC_IMM16   10
-#define REGC_IMM8    11
+#define REGC_FLAGS       0
+#define REGC_GPR8        1
+#define REGC_GPR16       2
+#define REGC_GPR32       3
+#define REGC_DIVIDEND64  4
+#define REGC_DIVIDEND32  5
+#define REGC_MMX         6
+#define REGC_XMM         7
+#define REGC_GPR32_8     8
+#define REGC_GPR16_8     9
+#define REGC_GPR8_LO    10
+#define REGC_IMM32      11
+#define REGC_IMM16      12
+#define REGC_IMM8       13
 #define LAST_REGC  REGC_IMM8
 #if LAST_REGC >= MAX_REGC
 #error "MAX_REGC is to low"
 #endif
 
 /* Register class masks */
-#define REGCM_FLAGS   (1 << REGC_FLAGS)
-#define REGCM_GPR8    (1 << REGC_GPR8)
-#define REGCM_GPR16   (1 << REGC_GPR16)
-#define REGCM_GPR32   (1 << REGC_GPR32)
-#define REGCM_GPR64   (1 << REGC_GPR64)
-#define REGCM_MMX     (1 << REGC_MMX)
-#define REGCM_XMM     (1 << REGC_XMM)
-#define REGCM_GPR32_8 (1 << REGC_GPR32_8)
-#define REGCM_GPR16_8 (1 << REGC_GPR16_8)
-#define REGCM_IMM32   (1 << REGC_IMM32)
-#define REGCM_IMM16   (1 << REGC_IMM16)
-#define REGCM_IMM8    (1 << REGC_IMM8)
-#define REGCM_ALL     ((1 << (LAST_REGC + 1)) - 1)
+#define REGCM_FLAGS      (1 << REGC_FLAGS)
+#define REGCM_GPR8       (1 << REGC_GPR8)
+#define REGCM_GPR16      (1 << REGC_GPR16)
+#define REGCM_GPR32      (1 << REGC_GPR32)
+#define REGCM_DIVIDEND64 (1 << REGC_DIVIDEND64)
+#define REGCM_DIVIDEND32 (1 << REGC_DIVIDEND32)
+#define REGCM_MMX        (1 << REGC_MMX)
+#define REGCM_XMM        (1 << REGC_XMM)
+#define REGCM_GPR32_8    (1 << REGC_GPR32_8)
+#define REGCM_GPR16_8    (1 << REGC_GPR16_8)
+#define REGCM_GPR8_LO    (1 << REGC_GPR8_LO)
+#define REGCM_IMM32      (1 << REGC_IMM32)
+#define REGCM_IMM16      (1 << REGC_IMM16)
+#define REGCM_IMM8       (1 << REGC_IMM8)
+#define REGCM_ALL        ((1 << (LAST_REGC + 1)) - 1)
 
 /* The x86 registers */
 #define REG_EFLAGS  2
@@ -14949,12 +15053,10 @@ static void print_op_asm(struct compile_state *state,
 #define REG_BH      8
 #define REG_CH      9
 #define REG_DH      10
+#define REGC_GPR8_LO_FIRST REG_AL
+#define REGC_GPR8_LO_LAST  REG_DL
 #define REGC_GPR8_FIRST  REG_AL
-#if X86_4_8BIT_GPRS
-#define REGC_GPR8_LAST   REG_DL
-#else 
 #define REGC_GPR8_LAST   REG_DH
-#endif
 #define REG_AX     11
 #define REG_BX     12
 #define REG_CX     13
@@ -14976,26 +15078,29 @@ static void print_op_asm(struct compile_state *state,
 #define REGC_GPR32_FIRST REG_EAX
 #define REGC_GPR32_LAST  REG_ESP
 #define REG_EDXEAX 27
-#define REGC_GPR64_FIRST REG_EDXEAX
-#define REGC_GPR64_LAST  REG_EDXEAX
-#define REG_MMX0   28
-#define REG_MMX1   29
-#define REG_MMX2   30
-#define REG_MMX3   31
-#define REG_MMX4   32
-#define REG_MMX5   33
-#define REG_MMX6   34
-#define REG_MMX7   35
+#define REGC_DIVIDEND64_FIRST REG_EDXEAX
+#define REGC_DIVIDEND64_LAST  REG_EDXEAX
+#define REG_DXAX   28
+#define REGC_DIVIDEND32_FIRST REG_DXAX
+#define REGC_DIVIDEND32_LAST  REG_DXAX
+#define REG_MMX0   29
+#define REG_MMX1   30
+#define REG_MMX2   31
+#define REG_MMX3   32
+#define REG_MMX4   33
+#define REG_MMX5   34
+#define REG_MMX6   35
+#define REG_MMX7   36
 #define REGC_MMX_FIRST REG_MMX0
 #define REGC_MMX_LAST  REG_MMX7
-#define REG_XMM0   36
-#define REG_XMM1   37
-#define REG_XMM2   38
-#define REG_XMM3   39
-#define REG_XMM4   40
-#define REG_XMM5   41
-#define REG_XMM6   42
-#define REG_XMM7   43
+#define REG_XMM0   37
+#define REG_XMM1   38
+#define REG_XMM2   39
+#define REG_XMM3   40
+#define REG_XMM4   41
+#define REG_XMM5   42
+#define REG_XMM6   43
+#define REG_XMM7   44
 #define REGC_XMM_FIRST REG_XMM0
 #define REGC_XMM_LAST  REG_XMM7
 #warning "WISHLIST figure out how to use pinsrw and pextrw to better use extended regs"
@@ -15019,35 +15124,39 @@ static void print_op_asm(struct compile_state *state,
 
 
 static unsigned regc_size[LAST_REGC +1] = {
-       [REGC_FLAGS]   = REGC_FLAGS_LAST   - REGC_FLAGS_FIRST + 1,
-       [REGC_GPR8]    = REGC_GPR8_LAST    - REGC_GPR8_FIRST + 1,
-       [REGC_GPR16]   = REGC_GPR16_LAST   - REGC_GPR16_FIRST + 1,
-       [REGC_GPR32]   = REGC_GPR32_LAST   - REGC_GPR32_FIRST + 1,
-       [REGC_GPR64]   = REGC_GPR64_LAST   - REGC_GPR64_FIRST + 1,
-       [REGC_MMX]     = REGC_MMX_LAST     - REGC_MMX_FIRST + 1,
-       [REGC_XMM]     = REGC_XMM_LAST     - REGC_XMM_FIRST + 1,
-       [REGC_GPR32_8] = REGC_GPR32_8_LAST - REGC_GPR32_8_FIRST + 1,
-       [REGC_GPR16_8] = REGC_GPR16_8_LAST - REGC_GPR16_8_FIRST + 1,
-       [REGC_IMM32]   = 0,
-       [REGC_IMM16]   = 0,
-       [REGC_IMM8]    = 0,
+       [REGC_FLAGS]      = REGC_FLAGS_LAST      - REGC_FLAGS_FIRST + 1,
+       [REGC_GPR8]       = REGC_GPR8_LAST       - REGC_GPR8_FIRST + 1,
+       [REGC_GPR16]      = REGC_GPR16_LAST      - REGC_GPR16_FIRST + 1,
+       [REGC_GPR32]      = REGC_GPR32_LAST      - REGC_GPR32_FIRST + 1,
+       [REGC_DIVIDEND64] = REGC_DIVIDEND64_LAST - REGC_DIVIDEND64_FIRST + 1,
+       [REGC_DIVIDEND32] = REGC_DIVIDEND32_LAST - REGC_DIVIDEND32_FIRST + 1,
+       [REGC_MMX]        = REGC_MMX_LAST        - REGC_MMX_FIRST + 1,
+       [REGC_XMM]        = REGC_XMM_LAST        - REGC_XMM_FIRST + 1,
+       [REGC_GPR32_8]    = REGC_GPR32_8_LAST    - REGC_GPR32_8_FIRST + 1,
+       [REGC_GPR16_8]    = REGC_GPR16_8_LAST    - REGC_GPR16_8_FIRST + 1,
+       [REGC_GPR8_LO]    = REGC_GPR8_LO_LAST    - REGC_GPR8_LO_FIRST + 1,
+       [REGC_IMM32]      = 0,
+       [REGC_IMM16]      = 0,
+       [REGC_IMM8]       = 0,
 };
 
 static const struct {
        int first, last;
 } regcm_bound[LAST_REGC + 1] = {
-       [REGC_FLAGS]   = { REGC_FLAGS_FIRST,   REGC_FLAGS_LAST },
-       [REGC_GPR8]    = { REGC_GPR8_FIRST,    REGC_GPR8_LAST },
-       [REGC_GPR16]   = { REGC_GPR16_FIRST,   REGC_GPR16_LAST },
-       [REGC_GPR32]   = { REGC_GPR32_FIRST,   REGC_GPR32_LAST },
-       [REGC_GPR64]   = { REGC_GPR64_FIRST,   REGC_GPR64_LAST },
-       [REGC_MMX]     = { REGC_MMX_FIRST,     REGC_MMX_LAST },
-       [REGC_XMM]     = { REGC_XMM_FIRST,     REGC_XMM_LAST },
-       [REGC_GPR32_8] = { REGC_GPR32_8_FIRST, REGC_GPR32_8_LAST },
-       [REGC_GPR16_8] = { REGC_GPR16_8_FIRST, REGC_GPR16_8_LAST },
-       [REGC_IMM32]   = { REGC_IMM32_FIRST,   REGC_IMM32_LAST },
-       [REGC_IMM16]   = { REGC_IMM16_FIRST,   REGC_IMM16_LAST },
-       [REGC_IMM8]    = { REGC_IMM8_FIRST,    REGC_IMM8_LAST },
+       [REGC_FLAGS]      = { REGC_FLAGS_FIRST,      REGC_FLAGS_LAST },
+       [REGC_GPR8]       = { REGC_GPR8_FIRST,       REGC_GPR8_LAST },
+       [REGC_GPR16]      = { REGC_GPR16_FIRST,      REGC_GPR16_LAST },
+       [REGC_GPR32]      = { REGC_GPR32_FIRST,      REGC_GPR32_LAST },
+       [REGC_DIVIDEND64] = { REGC_DIVIDEND64_FIRST, REGC_DIVIDEND64_LAST },
+       [REGC_DIVIDEND32] = { REGC_DIVIDEND32_FIRST, REGC_DIVIDEND32_LAST },
+       [REGC_MMX]        = { REGC_MMX_FIRST,        REGC_MMX_LAST },
+       [REGC_XMM]        = { REGC_XMM_FIRST,        REGC_XMM_LAST },
+       [REGC_GPR32_8]    = { REGC_GPR32_8_FIRST,    REGC_GPR32_8_LAST },
+       [REGC_GPR16_8]    = { REGC_GPR16_8_FIRST,    REGC_GPR16_8_LAST },
+       [REGC_GPR8_LO]    = { REGC_GPR8_LO_FIRST,    REGC_GPR8_LO_LAST },
+       [REGC_IMM32]      = { REGC_IMM32_FIRST,      REGC_IMM32_LAST },
+       [REGC_IMM16]      = { REGC_IMM16_FIRST,      REGC_IMM16_LAST },
+       [REGC_IMM8]       = { REGC_IMM8_FIRST,       REGC_IMM8_LAST },
 };
 
 static int arch_encode_cpu(const char *cpu)
@@ -15079,11 +15188,13 @@ static unsigned arch_regc_size(struct compile_state *state, int class)
        }
        return regc_size[class];
 }
+
 static int arch_regcm_intersect(unsigned regcm1, unsigned regcm2)
 {
        /* See if two register classes may have overlapping registers */
-       unsigned gpr_mask = REGCM_GPR8 | REGCM_GPR16_8 | REGCM_GPR16 |
-               REGCM_GPR32_8 | REGCM_GPR32 | REGCM_GPR64;
+       unsigned gpr_mask = REGCM_GPR8 | REGCM_GPR8_LO | REGCM_GPR16_8 | REGCM_GPR16 |
+               REGCM_GPR32_8 | REGCM_GPR32 | 
+               REGCM_DIVIDEND32 | REGCM_DIVIDEND64;
 
        /* Special case for the immediates */
        if ((regcm1 & (REGCM_IMM32 | REGCM_IMM16 | REGCM_IMM8)) &&
@@ -15110,6 +15221,7 @@ static void arch_reg_equivs(
 #endif
                *equiv++ = REG_AX;
                *equiv++ = REG_EAX;
+               *equiv++ = REG_DXAX;
                *equiv++ = REG_EDXEAX;
                break;
        case REG_AH:
@@ -15118,6 +15230,7 @@ static void arch_reg_equivs(
 #endif
                *equiv++ = REG_AX;
                *equiv++ = REG_EAX;
+               *equiv++ = REG_DXAX;
                *equiv++ = REG_EDXEAX;
                break;
        case REG_BL:  
@@ -15156,6 +15269,7 @@ static void arch_reg_equivs(
 #endif
                *equiv++ = REG_DX;
                *equiv++ = REG_EDX;
+               *equiv++ = REG_DXAX;
                *equiv++ = REG_EDXEAX;
                break;
        case REG_DH:
@@ -15164,12 +15278,14 @@ static void arch_reg_equivs(
 #endif
                *equiv++ = REG_DX;
                *equiv++ = REG_EDX;
+               *equiv++ = REG_DXAX;
                *equiv++ = REG_EDXEAX;
                break;
        case REG_AX:
                *equiv++ = REG_AL;
                *equiv++ = REG_AH;
                *equiv++ = REG_EAX;
+               *equiv++ = REG_DXAX;
                *equiv++ = REG_EDXEAX;
                break;
        case REG_BX:
@@ -15186,6 +15302,7 @@ static void arch_reg_equivs(
                *equiv++ = REG_DL;
                *equiv++ = REG_DH;
                *equiv++ = REG_EDX;
+               *equiv++ = REG_DXAX;
                *equiv++ = REG_EDXEAX;
                break;
        case REG_SI:  
@@ -15204,6 +15321,7 @@ static void arch_reg_equivs(
                *equiv++ = REG_AL;
                *equiv++ = REG_AH;
                *equiv++ = REG_AX;
+               *equiv++ = REG_DXAX;
                *equiv++ = REG_EDXEAX;
                break;
        case REG_EBX:
@@ -15220,6 +15338,7 @@ static void arch_reg_equivs(
                *equiv++ = REG_DL;
                *equiv++ = REG_DH;
                *equiv++ = REG_DX;
+               *equiv++ = REG_DXAX;
                *equiv++ = REG_EDXEAX;
                break;
        case REG_ESI: 
@@ -15234,6 +15353,17 @@ static void arch_reg_equivs(
        case REG_ESP: 
                *equiv++ = REG_SP;
                break;
+       case REG_DXAX: 
+               *equiv++ = REG_AL;
+               *equiv++ = REG_AH;
+               *equiv++ = REG_DL;
+               *equiv++ = REG_DH;
+               *equiv++ = REG_AX;
+               *equiv++ = REG_DX;
+               *equiv++ = REG_EAX;
+               *equiv++ = REG_EDX;
+               *equiv++ = REG_EDXEAX;
+               break;
        case REG_EDXEAX: 
                *equiv++ = REG_AL;
                *equiv++ = REG_AH;
@@ -15243,6 +15373,7 @@ static void arch_reg_equivs(
                *equiv++ = REG_DX;
                *equiv++ = REG_EAX;
                *equiv++ = REG_EDX;
+               *equiv++ = REG_DXAX;
                break;
        }
        *equiv++ = REG_UNSET; 
@@ -15251,8 +15382,10 @@ static void arch_reg_equivs(
 static unsigned arch_avail_mask(struct compile_state *state)
 {
        unsigned avail_mask;
-       avail_mask = REGCM_GPR8 | REGCM_GPR16_8 | REGCM_GPR16 | 
-               REGCM_GPR32 | REGCM_GPR32_8 | REGCM_GPR64 |
+       /* REGCM_GPR8 is not available */
+       avail_mask = REGCM_GPR8_LO | REGCM_GPR16_8 | REGCM_GPR16 | 
+               REGCM_GPR32 | REGCM_GPR32_8 | 
+               REGCM_DIVIDEND32 | REGCM_DIVIDEND64 |
                REGCM_IMM32 | REGCM_IMM16 | REGCM_IMM8 | REGCM_FLAGS;
        switch(state->cpu) {
        case CPU_P3:
@@ -15264,12 +15397,6 @@ static unsigned arch_avail_mask(struct compile_state *state)
                avail_mask |= REGCM_MMX | REGCM_XMM;
                break;
        }
-#if 0
-       /* Don't enable 8 bit values until I can force both operands
-        * to be 8bits simultaneously.
-        */
-       avail_mask &= ~(REGCM_GPR8 | REGCM_GPR16_8 | REGCM_GPR16);
-#endif
        return avail_mask;
 }
 
@@ -15278,7 +15405,6 @@ static unsigned arch_regcm_normalize(struct compile_state *state, unsigned regcm
        unsigned mask, result;
        int class, class2;
        result = regcm;
-       result &= arch_avail_mask(state);
 
        for(class = 0, mask = 1; mask; mask <<= 1, class++) {
                if ((result & mask) == 0) {
@@ -15294,9 +15420,20 @@ static unsigned arch_regcm_normalize(struct compile_state *state, unsigned regcm
                        }
                }
        }
+       result &= arch_avail_mask(state);
        return result;
 }
 
+static unsigned arch_regcm_reg_normalize(struct compile_state *state, unsigned regcm)
+{
+       /* Like arch_regcm_normalize except immediate register classes are excluded */
+       regcm = arch_regcm_normalize(state, regcm);
+       /* Remove the immediate register classes */
+       regcm &= ~(REGCM_IMM32 | REGCM_IMM16 | REGCM_IMM8);
+       return regcm;
+       
+}
+
 static unsigned arch_reg_regcm(struct compile_state *state, int reg)
 {
        unsigned mask;
@@ -15322,19 +15459,19 @@ static struct reg_info arch_reg_constraint(
                unsigned int mask;
                unsigned int reg;
        } constraints[] = {
-               { 'r', REGCM_GPR32, REG_UNSET },
-               { 'g', REGCM_GPR32, REG_UNSET },
-               { 'p', REGCM_GPR32, REG_UNSET },
-               { 'q', REGCM_GPR8 REG_UNSET },
+               { 'r', REGCM_GPR32,   REG_UNSET },
+               { 'g', REGCM_GPR32,   REG_UNSET },
+               { 'p', REGCM_GPR32,   REG_UNSET },
+               { 'q', REGCM_GPR8_LO, REG_UNSET },
                { 'Q', REGCM_GPR32_8, REG_UNSET },
-               { 'x', REGCM_XMM,   REG_UNSET },
-               { 'y', REGCM_MMX,   REG_UNSET },
-               { 'a', REGCM_GPR32, REG_EAX },
-               { 'b', REGCM_GPR32, REG_EBX },
-               { 'c', REGCM_GPR32, REG_ECX },
-               { 'd', REGCM_GPR32, REG_EDX },
-               { 'D', REGCM_GPR32, REG_EDI },
-               { 'S', REGCM_GPR32, REG_ESI },
+               { 'x', REGCM_XMM,     REG_UNSET },
+               { 'y', REGCM_MMX,     REG_UNSET },
+               { 'a', REGCM_GPR32,   REG_EAX },
+               { 'b', REGCM_GPR32,   REG_EBX },
+               { 'c', REGCM_GPR32,   REG_ECX },
+               { 'd', REGCM_GPR32,   REG_EDX },
+               { 'D', REGCM_GPR32,   REG_EDI },
+               { 'S', REGCM_GPR32,   REG_ESI },
                { '\0', 0, REG_UNSET },
        };
        unsigned int regcm;
@@ -15451,21 +15588,28 @@ static int do_select_reg(struct compile_state *state,
 static int arch_select_free_register(
        struct compile_state *state, char *used, int classes)
 {
-       /* Preference: flags, 8bit gprs, 32bit gprs, other 32bit reg
-        * other types of registers.
+       /* Live ranges with the most neighbors are colored first.
+        *
+        * Generally it does not matter which colors are given
+        * as the register allocator attempts to color live ranges
+        * in an order where you are guaranteed not to run out of colors.
+        *
+        * Occasionally the register allocator cannot find an order
+        * of register selection that will find a free color.  To
+        * increase the odds the register allocator will work when
+        * it guesses first give out registers from register classes
+        * least likely to run out of registers.
+        * 
         */
        int i, reg;
        reg = REG_UNSET;
-       for(i = REGC_FLAGS_FIRST; (reg == REG_UNSET) && (i <= REGC_FLAGS_LAST); i++) {
-               reg = do_select_reg(state, used, i, classes);
-       }
-       for(i = REGC_GPR32_FIRST; (reg == REG_UNSET) && (i <= REGC_GPR32_LAST); i++) {
+       for(i = REGC_XMM_FIRST; (reg == REG_UNSET) && (i <= REGC_XMM_LAST); i++) {
                reg = do_select_reg(state, used, i, classes);
        }
        for(i = REGC_MMX_FIRST; (reg == REG_UNSET) && (i <= REGC_MMX_LAST); i++) {
                reg = do_select_reg(state, used, i, classes);
        }
-       for(i = REGC_XMM_FIRST; (reg == REG_UNSET) && (i <= REGC_XMM_LAST); i++) {
+       for(i = REGC_GPR32_LAST; (reg == REG_UNSET) && (i >= REGC_GPR32_FIRST); i--) {
                reg = do_select_reg(state, used, i, classes);
        }
        for(i = REGC_GPR16_FIRST; (reg == REG_UNSET) && (i <= REGC_GPR16_LAST); i++) {
@@ -15474,7 +15618,16 @@ static int arch_select_free_register(
        for(i = REGC_GPR8_FIRST; (reg == REG_UNSET) && (i <= REGC_GPR8_LAST); i++) {
                reg = do_select_reg(state, used, i, classes);
        }
-       for(i = REGC_GPR64_FIRST; (reg == REG_UNSET) && (i <= REGC_GPR64_LAST); i++) {
+       for(i = REGC_GPR8_LO_FIRST; (reg == REG_UNSET) && (i <= REGC_GPR8_LO_LAST); i++) {
+               reg = do_select_reg(state, used, i, classes);
+       }
+       for(i = REGC_DIVIDEND32_FIRST; (reg == REG_UNSET) && (i <= REGC_DIVIDEND32_LAST); i++) {
+               reg = do_select_reg(state, used, i, classes);
+       }
+       for(i = REGC_DIVIDEND64_FIRST; (reg == REG_UNSET) && (i <= REGC_DIVIDEND64_LAST); i++) {
+               reg = do_select_reg(state, used, i, classes);
+       }
+       for(i = REGC_FLAGS_FIRST; (reg == REG_UNSET) && (i <= REGC_FLAGS_LAST); i++) {
                reg = do_select_reg(state, used, i, classes);
        }
        return reg;
@@ -15484,10 +15637,8 @@ static int arch_select_free_register(
 static unsigned arch_type_to_regcm(struct compile_state *state, struct type *type) 
 {
 #warning "FIXME force types smaller (if legal) before I get here"
-       unsigned avail_mask;
        unsigned mask;
        mask = 0;
-       avail_mask = arch_avail_mask(state);
        switch(type->type & TYPE_MASK) {
        case TYPE_ARRAY:
        case TYPE_VOID: 
@@ -15495,10 +15646,10 @@ static unsigned arch_type_to_regcm(struct compile_state *state, struct type *typ
                break;
        case TYPE_CHAR:
        case TYPE_UCHAR:
-               mask = REGCM_GPR8 | 
+               mask = REGCM_GPR8 | REGCM_GPR8_LO |
                        REGCM_GPR16 | REGCM_GPR16_8 | 
                        REGCM_GPR32 | REGCM_GPR32_8 |
-                       REGCM_GPR64 |
+                       REGCM_DIVIDEND32 | REGCM_DIVIDEND64 |
                        REGCM_MMX | REGCM_XMM |
                        REGCM_IMM32 | REGCM_IMM16 | REGCM_IMM8;
                break;
@@ -15506,7 +15657,7 @@ static unsigned arch_type_to_regcm(struct compile_state *state, struct type *typ
        case TYPE_USHORT:
                mask =  REGCM_GPR16 | REGCM_GPR16_8 |
                        REGCM_GPR32 | REGCM_GPR32_8 |
-                       REGCM_GPR64 |
+                       REGCM_DIVIDEND32 | REGCM_DIVIDEND64 |
                        REGCM_MMX | REGCM_XMM |
                        REGCM_IMM32 | REGCM_IMM16;
                break;
@@ -15516,14 +15667,15 @@ static unsigned arch_type_to_regcm(struct compile_state *state, struct type *typ
        case TYPE_ULONG:
        case TYPE_POINTER:
                mask =  REGCM_GPR32 | REGCM_GPR32_8 |
-                       REGCM_GPR64 | REGCM_MMX | REGCM_XMM |
+                       REGCM_DIVIDEND32 | REGCM_DIVIDEND64 |
+                       REGCM_MMX | REGCM_XMM |
                        REGCM_IMM32;
                break;
        default:
                internal_error(state, 0, "no register class for type");
                break;
        }
-       mask &= avail_mask;
+       mask = arch_regcm_normalize(state, mask);
        return mask;
 }
 
@@ -15574,52 +15726,80 @@ static int get_imm8(struct triple *ins, struct triple **expr)
        return 1;
 }
 
-#define TEMPLATE_NOP         0
-#define TEMPLATE_INTCONST8   1
-#define TEMPLATE_INTCONST32  2
-#define TEMPLATE_COPY_REG    3
-#define TEMPLATE_COPY_IMM32  4
-#define TEMPLATE_COPY_IMM16  5
-#define TEMPLATE_COPY_IMM8   6
-#define TEMPLATE_PHI         7
-#define TEMPLATE_STORE8      8
-#define TEMPLATE_STORE16     9
-#define TEMPLATE_STORE32    10
-#define TEMPLATE_LOAD8      11
-#define TEMPLATE_LOAD16     12
-#define TEMPLATE_LOAD32     13
-#define TEMPLATE_BINARY_REG 14
-#define TEMPLATE_BINARY_IMM 15
-#define TEMPLATE_SL_CL      16
-#define TEMPLATE_SL_IMM     17
-#define TEMPLATE_UNARY      18
-#define TEMPLATE_CMP_REG    19
-#define TEMPLATE_CMP_IMM    20
-#define TEMPLATE_TEST       21
-#define TEMPLATE_SET        22
-#define TEMPLATE_JMP        23
-#define TEMPLATE_INB_DX     24
-#define TEMPLATE_INB_IMM    25
-#define TEMPLATE_INW_DX     26
-#define TEMPLATE_INW_IMM    27
-#define TEMPLATE_INL_DX     28
-#define TEMPLATE_INL_IMM    29
-#define TEMPLATE_OUTB_DX    30
-#define TEMPLATE_OUTB_IMM   31
-#define TEMPLATE_OUTW_DX    32
-#define TEMPLATE_OUTW_IMM   33
-#define TEMPLATE_OUTL_DX    34
-#define TEMPLATE_OUTL_IMM   35
-#define TEMPLATE_BSF        36
-#define TEMPLATE_RDMSR      37
-#define TEMPLATE_WRMSR      38
-#define LAST_TEMPLATE       TEMPLATE_WRMSR
+#define TEMPLATE_NOP           0
+#define TEMPLATE_INTCONST8     1
+#define TEMPLATE_INTCONST32    2
+#define TEMPLATE_COPY8_REG     3
+#define TEMPLATE_COPY16_REG    4
+#define TEMPLATE_COPY32_REG    5
+#define TEMPLATE_COPY_IMM8     6
+#define TEMPLATE_COPY_IMM16    7
+#define TEMPLATE_COPY_IMM32    8
+#define TEMPLATE_PHI8          9
+#define TEMPLATE_PHI16        10
+#define TEMPLATE_PHI32        11
+#define TEMPLATE_STORE8       12
+#define TEMPLATE_STORE16      13
+#define TEMPLATE_STORE32      14
+#define TEMPLATE_LOAD8        15
+#define TEMPLATE_LOAD16       16
+#define TEMPLATE_LOAD32       17
+#define TEMPLATE_BINARY8_REG  18
+#define TEMPLATE_BINARY16_REG 19
+#define TEMPLATE_BINARY32_REG 20
+#define TEMPLATE_BINARY8_IMM  21
+#define TEMPLATE_BINARY16_IMM 22
+#define TEMPLATE_BINARY32_IMM 23
+#define TEMPLATE_SL8_CL       24
+#define TEMPLATE_SL16_CL      25
+#define TEMPLATE_SL32_CL      26
+#define TEMPLATE_SL8_IMM      27
+#define TEMPLATE_SL16_IMM     28
+#define TEMPLATE_SL32_IMM     29
+#define TEMPLATE_UNARY8       30
+#define TEMPLATE_UNARY16      31
+#define TEMPLATE_UNARY32      32
+#define TEMPLATE_CMP8_REG     33
+#define TEMPLATE_CMP16_REG    34
+#define TEMPLATE_CMP32_REG    35
+#define TEMPLATE_CMP8_IMM     36
+#define TEMPLATE_CMP16_IMM    37
+#define TEMPLATE_CMP32_IMM    38
+#define TEMPLATE_TEST8        39
+#define TEMPLATE_TEST16       40
+#define TEMPLATE_TEST32       41
+#define TEMPLATE_SET          42
+#define TEMPLATE_JMP          43
+#define TEMPLATE_INB_DX       44
+#define TEMPLATE_INB_IMM      45
+#define TEMPLATE_INW_DX       46
+#define TEMPLATE_INW_IMM      47
+#define TEMPLATE_INL_DX       48
+#define TEMPLATE_INL_IMM      49
+#define TEMPLATE_OUTB_DX      50
+#define TEMPLATE_OUTB_IMM     51
+#define TEMPLATE_OUTW_DX      52
+#define TEMPLATE_OUTW_IMM     53
+#define TEMPLATE_OUTL_DX      54
+#define TEMPLATE_OUTL_IMM     55
+#define TEMPLATE_BSF          56
+#define TEMPLATE_RDMSR        57
+#define TEMPLATE_WRMSR        58
+#define TEMPLATE_UMUL8        59
+#define TEMPLATE_UMUL16       60
+#define TEMPLATE_UMUL32       61
+#define TEMPLATE_DIV8         62
+#define TEMPLATE_DIV16        63
+#define TEMPLATE_DIV32        64
+#define LAST_TEMPLATE       TEMPLATE_DIV32
 #if LAST_TEMPLATE >= MAX_TEMPLATES
 #error "MAX_TEMPLATES to low"
 #endif
 
-#define COPY_REGCM (REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8 | REGCM_MMX | REGCM_XMM)
-#define COPY32_REGCM (REGCM_GPR32 | REGCM_MMX | REGCM_XMM)
+#define COPY8_REGCM     (REGCM_DIVIDEND64 | REGCM_DIVIDEND32 | REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8_LO | REGCM_MMX | REGCM_XMM)
+#define COPY16_REGCM    (REGCM_DIVIDEND64 | REGCM_DIVIDEND32 | REGCM_GPR32 | REGCM_GPR16 | REGCM_MMX | REGCM_XMM)  
+#define COPY32_REGCM    (REGCM_DIVIDEND64 | REGCM_DIVIDEND32 | REGCM_GPR32 | REGCM_MMX | REGCM_XMM)
+
 
 static struct ins_template templates[] = {
        [TEMPLATE_NOP]      = {},
@@ -15629,56 +15809,110 @@ static struct ins_template templates[] = {
        [TEMPLATE_INTCONST32] = { 
                .lhs = { [0] = { REG_UNNEEDED, REGCM_IMM32 } },
        },
-       [TEMPLATE_COPY_REG] = {
-               .lhs = { [0] = { REG_UNSET, COPY_REGCM } },
-               .rhs = { [0] = { REG_UNSET, COPY_REGCM }  },
+       [TEMPLATE_COPY8_REG] = {
+               .lhs = { [0] = { REG_UNSET, COPY8_REGCM } },
+               .rhs = { [0] = { REG_UNSET, COPY8_REGCM }  },
        },
-       [TEMPLATE_COPY_IMM32] = {
-               .lhs = { [0] = { REG_UNSET, COPY32_REGCM } },
-               .rhs = { [0] = { REG_UNNEEDED, REGCM_IMM32 } },
+       [TEMPLATE_COPY16_REG] = {
+               .lhs = { [0] = { REG_UNSET, COPY16_REGCM } },
+               .rhs = { [0] = { REG_UNSET, COPY16_REGCM }  },
        },
-       [TEMPLATE_COPY_IMM16] = {
-               .lhs = { [0] = { REG_UNSET, COPY32_REGCM | REGCM_GPR16 } },
-               .rhs = { [0] = { REG_UNNEEDED, REGCM_IMM16 } },
+       [TEMPLATE_COPY32_REG] = {
+               .lhs = { [0] = { REG_UNSET, COPY32_REGCM } },
+               .rhs = { [0] = { REG_UNSET, COPY32_REGCM }  },
        },
        [TEMPLATE_COPY_IMM8] = {
-               .lhs = { [0] = { REG_UNSET, COPY_REGCM } },
+               .lhs = { [0] = { REG_UNSET, COPY8_REGCM } },
                .rhs = { [0] = { REG_UNNEEDED, REGCM_IMM8 } },
        },
-       [TEMPLATE_PHI] = { 
-               .lhs = { [0] = { REG_VIRT0, COPY_REGCM } },
+       [TEMPLATE_COPY_IMM16] = {
+               .lhs = { [0] = { REG_UNSET, COPY16_REGCM } },
+               .rhs = { [0] = { REG_UNNEEDED, REGCM_IMM16 | REGCM_IMM8 } },
+       },
+       [TEMPLATE_COPY_IMM32] = {
+               .lhs = { [0] = { REG_UNSET, COPY32_REGCM } },
+               .rhs = { [0] = { REG_UNNEEDED, REGCM_IMM32 | REGCM_IMM16 | REGCM_IMM8 } },
+       },
+       [TEMPLATE_PHI8] = { 
+               .lhs = { [0] = { REG_VIRT0, COPY8_REGCM } },
+               .rhs = { 
+                       [ 0] = { REG_VIRT0, COPY8_REGCM },
+                       [ 1] = { REG_VIRT0, COPY8_REGCM },
+                       [ 2] = { REG_VIRT0, COPY8_REGCM },
+                       [ 3] = { REG_VIRT0, COPY8_REGCM },
+                       [ 4] = { REG_VIRT0, COPY8_REGCM },
+                       [ 5] = { REG_VIRT0, COPY8_REGCM },
+                       [ 6] = { REG_VIRT0, COPY8_REGCM },
+                       [ 7] = { REG_VIRT0, COPY8_REGCM },
+                       [ 8] = { REG_VIRT0, COPY8_REGCM },
+                       [ 9] = { REG_VIRT0, COPY8_REGCM },
+                       [10] = { REG_VIRT0, COPY8_REGCM },
+                       [11] = { REG_VIRT0, COPY8_REGCM },
+                       [12] = { REG_VIRT0, COPY8_REGCM },
+                       [13] = { REG_VIRT0, COPY8_REGCM },
+                       [14] = { REG_VIRT0, COPY8_REGCM },
+                       [15] = { REG_VIRT0, COPY8_REGCM },
+               }, },
+       [TEMPLATE_PHI16] = { 
+               .lhs = { [0] = { REG_VIRT0, COPY16_REGCM } },
+               .rhs = { 
+                       [ 0] = { REG_VIRT0, COPY16_REGCM },
+                       [ 1] = { REG_VIRT0, COPY16_REGCM },
+                       [ 2] = { REG_VIRT0, COPY16_REGCM },
+                       [ 3] = { REG_VIRT0, COPY16_REGCM },
+                       [ 4] = { REG_VIRT0, COPY16_REGCM },
+                       [ 5] = { REG_VIRT0, COPY16_REGCM },
+                       [ 6] = { REG_VIRT0, COPY16_REGCM },
+                       [ 7] = { REG_VIRT0, COPY16_REGCM },
+                       [ 8] = { REG_VIRT0, COPY16_REGCM },
+                       [ 9] = { REG_VIRT0, COPY16_REGCM },
+                       [10] = { REG_VIRT0, COPY16_REGCM },
+                       [11] = { REG_VIRT0, COPY16_REGCM },
+                       [12] = { REG_VIRT0, COPY16_REGCM },
+                       [13] = { REG_VIRT0, COPY16_REGCM },
+                       [14] = { REG_VIRT0, COPY16_REGCM },
+                       [15] = { REG_VIRT0, COPY16_REGCM },
+               }, },
+       [TEMPLATE_PHI32] = { 
+               .lhs = { [0] = { REG_VIRT0, COPY32_REGCM } },
                .rhs = { 
-                       [ 0] = { REG_VIRT0, COPY_REGCM },
-                       [ 1] = { REG_VIRT0, COPY_REGCM },
-                       [ 2] = { REG_VIRT0, COPY_REGCM },
-                       [ 3] = { REG_VIRT0, COPY_REGCM },
-                       [ 4] = { REG_VIRT0, COPY_REGCM },
-                       [ 5] = { REG_VIRT0, COPY_REGCM },
-                       [ 6] = { REG_VIRT0, COPY_REGCM },
-                       [ 7] = { REG_VIRT0, COPY_REGCM },
-                       [ 8] = { REG_VIRT0, COPY_REGCM },
-                       [ 9] = { REG_VIRT0, COPY_REGCM },
-                       [10] = { REG_VIRT0, COPY_REGCM },
-                       [11] = { REG_VIRT0, COPY_REGCM },
-                       [12] = { REG_VIRT0, COPY_REGCM },
-                       [13] = { REG_VIRT0, COPY_REGCM },
-                       [14] = { REG_VIRT0, COPY_REGCM },
-                       [15] = { REG_VIRT0, COPY_REGCM },
+                       [ 0] = { REG_VIRT0, COPY32_REGCM },
+                       [ 1] = { REG_VIRT0, COPY32_REGCM },
+                       [ 2] = { REG_VIRT0, COPY32_REGCM },
+                       [ 3] = { REG_VIRT0, COPY32_REGCM },
+                       [ 4] = { REG_VIRT0, COPY32_REGCM },
+                       [ 5] = { REG_VIRT0, COPY32_REGCM },
+                       [ 6] = { REG_VIRT0, COPY32_REGCM },
+                       [ 7] = { REG_VIRT0, COPY32_REGCM },
+                       [ 8] = { REG_VIRT0, COPY32_REGCM },
+                       [ 9] = { REG_VIRT0, COPY32_REGCM },
+                       [10] = { REG_VIRT0, COPY32_REGCM },
+                       [11] = { REG_VIRT0, COPY32_REGCM },
+                       [12] = { REG_VIRT0, COPY32_REGCM },
+                       [13] = { REG_VIRT0, COPY32_REGCM },
+                       [14] = { REG_VIRT0, COPY32_REGCM },
+                       [15] = { REG_VIRT0, COPY32_REGCM },
                }, },
        [TEMPLATE_STORE8] = {
-               .lhs = { [0] = { REG_UNSET, REGCM_GPR32 } },
-               .rhs = { [0] = { REG_UNSET, REGCM_GPR8 } },
+               .rhs = { 
+                       [0] = { REG_UNSET, REGCM_GPR32 },
+                       [1] = { REG_UNSET, REGCM_GPR8_LO },
+               },
        },
        [TEMPLATE_STORE16] = {
-               .lhs = { [0] = { REG_UNSET, REGCM_GPR32 } },
-               .rhs = { [0] = { REG_UNSET, REGCM_GPR16 } },
+               .rhs = { 
+                       [0] = { REG_UNSET, REGCM_GPR32 },
+                       [1] = { REG_UNSET, REGCM_GPR16 },
+               },
        },
        [TEMPLATE_STORE32] = {
-               .lhs = { [0] = { REG_UNSET, REGCM_GPR32 } },
-               .rhs = { [0] = { REG_UNSET, REGCM_GPR32 } },
+               .rhs = { 
+                       [0] = { REG_UNSET, REGCM_GPR32 },
+                       [1] = { REG_UNSET, REGCM_GPR32 },
+               },
        },
        [TEMPLATE_LOAD8] = {
-               .lhs = { [0] = { REG_UNSET, REGCM_GPR8 } },
+               .lhs = { [0] = { REG_UNSET, REGCM_GPR8_LO } },
                .rhs = { [0] = { REG_UNSET, REGCM_GPR32 } },
        },
        [TEMPLATE_LOAD16] = {
@@ -15689,69 +15923,169 @@ static struct ins_template templates[] = {
                .lhs = { [0] = { REG_UNSET, REGCM_GPR32 } },
                .rhs = { [0] = { REG_UNSET, REGCM_GPR32 } },
        },
-       [TEMPLATE_BINARY_REG] = {
+       [TEMPLATE_BINARY8_REG] = {
+               .lhs = { [0] = { REG_VIRT0, REGCM_GPR8_LO } },
+               .rhs = { 
+                       [0] = { REG_VIRT0, REGCM_GPR8_LO },
+                       [1] = { REG_UNSET, REGCM_GPR8_LO },
+               },
+       },
+       [TEMPLATE_BINARY16_REG] = {
+               .lhs = { [0] = { REG_VIRT0, REGCM_GPR16 } },
+               .rhs = { 
+                       [0] = { REG_VIRT0, REGCM_GPR16 },
+                       [1] = { REG_UNSET, REGCM_GPR16 },
+               },
+       },
+       [TEMPLATE_BINARY32_REG] = {
                .lhs = { [0] = { REG_VIRT0, REGCM_GPR32 } },
                .rhs = { 
                        [0] = { REG_VIRT0, REGCM_GPR32 },
                        [1] = { REG_UNSET, REGCM_GPR32 },
                },
        },
-       [TEMPLATE_BINARY_IMM] = {
+       [TEMPLATE_BINARY8_IMM] = {
+               .lhs = { [0] = { REG_VIRT0, REGCM_GPR8_LO } },
+               .rhs = { 
+                       [0] = { REG_VIRT0,    REGCM_GPR8_LO },
+                       [1] = { REG_UNNEEDED, REGCM_IMM8 },
+               },
+       },
+       [TEMPLATE_BINARY16_IMM] = {
+               .lhs = { [0] = { REG_VIRT0, REGCM_GPR16 } },
+               .rhs = { 
+                       [0] = { REG_VIRT0,    REGCM_GPR16 },
+                       [1] = { REG_UNNEEDED, REGCM_IMM16 },
+               },
+       },
+       [TEMPLATE_BINARY32_IMM] = {
                .lhs = { [0] = { REG_VIRT0, REGCM_GPR32 } },
                .rhs = { 
                        [0] = { REG_VIRT0,    REGCM_GPR32 },
                        [1] = { REG_UNNEEDED, REGCM_IMM32 },
                },
        },
-       [TEMPLATE_SL_CL] = {
-               .lhs = { [0] = { REG_VIRT0, REGCM_GPR32 } },
+       [TEMPLATE_SL8_CL] = {
+               .lhs = { [0] = { REG_VIRT0, REGCM_GPR8_LO } },
+               .rhs = { 
+                       [0] = { REG_VIRT0, REGCM_GPR8_LO },
+                       [1] = { REG_CL, REGCM_GPR8_LO },
+               },
+       },
+       [TEMPLATE_SL16_CL] = {
+               .lhs = { [0] = { REG_VIRT0, REGCM_GPR16 } },
+               .rhs = { 
+                       [0] = { REG_VIRT0, REGCM_GPR16 },
+                       [1] = { REG_CL, REGCM_GPR8_LO },
+               },
+       },
+       [TEMPLATE_SL32_CL] = {
+               .lhs = { [0] = { REG_VIRT0, REGCM_GPR32 } },
+               .rhs = { 
+                       [0] = { REG_VIRT0, REGCM_GPR32 },
+                       [1] = { REG_CL, REGCM_GPR8_LO },
+               },
+       },
+       [TEMPLATE_SL8_IMM] = {
+               .lhs = { [0] = { REG_VIRT0, REGCM_GPR8_LO } },
+               .rhs = { 
+                       [0] = { REG_VIRT0,    REGCM_GPR8_LO },
+                       [1] = { REG_UNNEEDED, REGCM_IMM8 },
+               },
+       },
+       [TEMPLATE_SL16_IMM] = {
+               .lhs = { [0] = { REG_VIRT0, REGCM_GPR16 } },
                .rhs = { 
-                       [0] = { REG_VIRT0, REGCM_GPR32 },
-                       [1] = { REG_CL, REGCM_GPR8 },
+                       [0] = { REG_VIRT0,    REGCM_GPR16 },
+                       [1] = { REG_UNNEEDED, REGCM_IMM8 },
                },
        },
-       [TEMPLATE_SL_IMM] = {
+       [TEMPLATE_SL32_IMM] = {
                .lhs = { [0] = { REG_VIRT0, REGCM_GPR32 } },
                .rhs = { 
                        [0] = { REG_VIRT0,    REGCM_GPR32 },
                        [1] = { REG_UNNEEDED, REGCM_IMM8 },
                },
        },
-       [TEMPLATE_UNARY] = {
+       [TEMPLATE_UNARY8] = {
+               .lhs = { [0] = { REG_VIRT0, REGCM_GPR8_LO } },
+               .rhs = { [0] = { REG_VIRT0, REGCM_GPR8_LO } },
+       },
+       [TEMPLATE_UNARY16] = {
+               .lhs = { [0] = { REG_VIRT0, REGCM_GPR16 } },
+               .rhs = { [0] = { REG_VIRT0, REGCM_GPR16 } },
+       },
+       [TEMPLATE_UNARY32] = {
                .lhs = { [0] = { REG_VIRT0, REGCM_GPR32 } },
                .rhs = { [0] = { REG_VIRT0, REGCM_GPR32 } },
        },
-       [TEMPLATE_CMP_REG] = {
+       [TEMPLATE_CMP8_REG] = {
+               .lhs = { [0] = { REG_EFLAGS, REGCM_FLAGS } },
+               .rhs = {
+                       [0] = { REG_UNSET, REGCM_GPR8_LO },
+                       [1] = { REG_UNSET, REGCM_GPR8_LO },
+               },
+       },
+       [TEMPLATE_CMP16_REG] = {
+               .lhs = { [0] = { REG_EFLAGS, REGCM_FLAGS } },
+               .rhs = {
+                       [0] = { REG_UNSET, REGCM_GPR16 },
+                       [1] = { REG_UNSET, REGCM_GPR16 },
+               },
+       },
+       [TEMPLATE_CMP32_REG] = {
                .lhs = { [0] = { REG_EFLAGS, REGCM_FLAGS } },
                .rhs = {
                        [0] = { REG_UNSET, REGCM_GPR32 },
                        [1] = { REG_UNSET, REGCM_GPR32 },
                },
        },
-       [TEMPLATE_CMP_IMM] = {
+       [TEMPLATE_CMP8_IMM] = {
+               .lhs = { [0] = { REG_EFLAGS, REGCM_FLAGS } },
+               .rhs = {
+                       [0] = { REG_UNSET, REGCM_GPR8_LO },
+                       [1] = { REG_UNNEEDED, REGCM_IMM8 },
+               },
+       },
+       [TEMPLATE_CMP16_IMM] = {
+               .lhs = { [0] = { REG_EFLAGS, REGCM_FLAGS } },
+               .rhs = {
+                       [0] = { REG_UNSET, REGCM_GPR16 },
+                       [1] = { REG_UNNEEDED, REGCM_IMM16 },
+               },
+       },
+       [TEMPLATE_CMP32_IMM] = {
                .lhs = { [0] = { REG_EFLAGS, REGCM_FLAGS } },
                .rhs = {
                        [0] = { REG_UNSET, REGCM_GPR32 },
                        [1] = { REG_UNNEEDED, REGCM_IMM32 },
                },
        },
-       [TEMPLATE_TEST] = {
+       [TEMPLATE_TEST8] = {
+               .lhs = { [0] = { REG_EFLAGS, REGCM_FLAGS } },
+               .rhs = { [0] = { REG_UNSET, REGCM_GPR8_LO } },
+       },
+       [TEMPLATE_TEST16] = {
+               .lhs = { [0] = { REG_EFLAGS, REGCM_FLAGS } },
+               .rhs = { [0] = { REG_UNSET, REGCM_GPR16 } },
+       },
+       [TEMPLATE_TEST32] = {
                .lhs = { [0] = { REG_EFLAGS, REGCM_FLAGS } },
                .rhs = { [0] = { REG_UNSET, REGCM_GPR32 } },
        },
        [TEMPLATE_SET] = {
-               .lhs = { [0] = { REG_UNSET, REGCM_GPR8 } },
+               .lhs = { [0] = { REG_UNSET, REGCM_GPR8_LO } },
                .rhs = { [0] = { REG_EFLAGS, REGCM_FLAGS } },
        },
        [TEMPLATE_JMP] = {
                .rhs = { [0] = { REG_EFLAGS, REGCM_FLAGS } },
        },
        [TEMPLATE_INB_DX] = {
-               .lhs = { [0] = { REG_AL,  REGCM_GPR8 } },  
+               .lhs = { [0] = { REG_AL,  REGCM_GPR8_LO } },  
                .rhs = { [0] = { REG_DX, REGCM_GPR16 } },
        },
        [TEMPLATE_INB_IMM] = {
-               .lhs = { [0] = { REG_AL,  REGCM_GPR8 } },  
+               .lhs = { [0] = { REG_AL,  REGCM_GPR8_LO } },  
                .rhs = { [0] = { REG_UNNEEDED, REGCM_IMM8 } },
        },
        [TEMPLATE_INW_DX]  = { 
@@ -15772,13 +16106,13 @@ static struct ins_template templates[] = {
        },
        [TEMPLATE_OUTB_DX] = { 
                .rhs = {
-                       [0] = { REG_AL,  REGCM_GPR8 },
+                       [0] = { REG_AL,  REGCM_GPR8_LO },
                        [1] = { REG_DX, REGCM_GPR16 },
                },
        },
        [TEMPLATE_OUTB_IMM] = { 
                .rhs = {
-                       [0] = { REG_AL,  REGCM_GPR8 },  
+                       [0] = { REG_AL,  REGCM_GPR8_LO },  
                        [1] = { REG_UNNEEDED, REGCM_IMM8 },
                },
        },
@@ -15824,6 +16158,57 @@ static struct ins_template templates[] = {
                        [2] = { REG_EDX, REGCM_GPR32 },
                },
        },
+       [TEMPLATE_UMUL8] = {
+               .lhs = { [0] = { REG_AX, REGCM_GPR16 } },
+               .rhs = { 
+                       [0] = { REG_AL, REGCM_GPR8_LO },
+                       [1] = { REG_UNSET, REGCM_GPR8_LO },
+               },
+       },
+       [TEMPLATE_UMUL16] = {
+               .lhs = { [0] = { REG_DXAX, REGCM_DIVIDEND32 } },
+               .rhs = { 
+                       [0] = { REG_AX, REGCM_GPR16 },
+                       [1] = { REG_UNSET, REGCM_GPR16 },
+               },
+       },
+       [TEMPLATE_UMUL32] = {
+               .lhs = { [0] = { REG_EDXEAX, REGCM_DIVIDEND64 } },
+               .rhs = { 
+                       [0] = { REG_EAX, REGCM_GPR32 },
+                       [1] = { REG_UNSET, REGCM_GPR32 },
+               },
+       },
+       [TEMPLATE_DIV8] = {
+               .lhs = { 
+                       [0] = { REG_AL, REGCM_GPR8_LO },
+                       [1] = { REG_AH, REGCM_GPR8 },
+               },
+               .rhs = {
+                       [0] = { REG_AX, REGCM_GPR16 },
+                       [1] = { REG_UNSET, REGCM_GPR8_LO },
+               },
+       },
+       [TEMPLATE_DIV16] = {
+               .lhs = { 
+                       [0] = { REG_AX, REGCM_GPR16 },
+                       [1] = { REG_DX, REGCM_GPR16 },
+               },
+               .rhs = {
+                       [0] = { REG_DXAX, REGCM_DIVIDEND32 },
+                       [1] = { REG_UNSET, REGCM_GPR16 },
+               },
+       },
+       [TEMPLATE_DIV32] = {
+               .lhs = { 
+                       [0] = { REG_EAX, REGCM_GPR32 },
+                       [1] = { REG_EDX, REGCM_GPR32 },
+               },
+               .rhs = {
+                       [0] = { REG_EDXEAX, REGCM_DIVIDEND64 },
+                       [1] = { REG_UNSET, REGCM_GPR32 },
+               },
+       },
 };
 
 static void fixup_branches(struct compile_state *state,
@@ -15846,11 +16231,11 @@ static void fixup_branches(struct compile_state *state,
                        branch = entry->member;
                        test = pre_triple(state, branch,
                                cmp->op, cmp->type, left, right);
-                       test->template_id = TEMPLATE_TEST; 
+                       test->template_id = TEMPLATE_TEST32
                        if (cmp->op == OP_CMP) {
-                               test->template_id = TEMPLATE_CMP_REG;
+                               test->template_id = TEMPLATE_CMP32_REG;
                                if (get_imm32(test, &RHS(test, 1))) {
-                                       test->template_id = TEMPLATE_CMP_IMM;
+                                       test->template_id = TEMPLATE_CMP32_IMM;
                                }
                        }
                        use_triple(RHS(test, 0), test);
@@ -15877,17 +16262,17 @@ static void bool_cmp(struct compile_state *state,
 
        /* Modify the comparison operator */
        ins->op = cmp_op;
-       ins->template_id = TEMPLATE_TEST;
+       ins->template_id = TEMPLATE_TEST32;
        if (cmp_op == OP_CMP) {
-               ins->template_id = TEMPLATE_CMP_REG;
+               ins->template_id = TEMPLATE_CMP32_REG;
                if (get_imm32(ins, &RHS(ins, 1))) {
-                       ins->template_id =  TEMPLATE_CMP_IMM;
+                       ins->template_id =  TEMPLATE_CMP32_IMM;
                }
        }
        /* Generate the instruction sequence that will transform the
         * result of the comparison into a logical value.
         */
-       set = post_triple(state, ins, set_op, ins->type, ins, 0);
+       set = post_triple(state, ins, set_op, &char_type, ins, 0);
        use_triple(ins, set);
        set->template_id = TEMPLATE_SET;
 
@@ -15992,16 +16377,58 @@ struct reg_info arch_reg_rhs(struct compile_state *state, struct triple *ins, in
        return result;
 }
 
+static struct triple *mod_div(struct compile_state *state,
+       struct triple *ins, int div_op, int index)
+{
+       struct triple *div, *piece0, *piece1;
+       
+       /* Generate a piece to hold the remainder */
+       piece1 = post_triple(state, ins, OP_PIECE, ins->type, 0, 0);
+       piece1->u.cval = 1;
+
+       /* Generate a piece to hold the quotient */
+       piece0 = post_triple(state, ins, OP_PIECE, ins->type, 0, 0);
+       piece0->u.cval = 0;
+
+       /* Generate the appropriate division instruction */
+       div = post_triple(state, ins, div_op, ins->type, 0, 0);
+       RHS(div, 0) = RHS(ins, 0);
+       RHS(div, 1) = RHS(ins, 1);
+       LHS(div, 0) = piece0;
+       LHS(div, 1) = piece1;
+       div->template_id  = TEMPLATE_DIV32;
+       use_triple(RHS(div, 0), div);
+       use_triple(RHS(div, 1), div);
+       use_triple(LHS(div, 0), div);
+       use_triple(LHS(div, 1), div);
+
+       /* Hook on piece0 */
+       MISC(piece0, 0) = div;
+       use_triple(div, piece0);
+
+       /* Hook on piece1 */
+       MISC(piece1, 0) = div;
+       use_triple(div, piece1);
+       
+       /* Replate uses of ins with the appropriate piece of the div */
+       propogate_use(state, ins, LHS(div, index));
+       release_triple(state, ins);
+
+       /* Return the address of the next instruction */
+       return piece1->next;
+}
+
 static struct triple *transform_to_arch_instruction(
        struct compile_state *state, struct triple *ins)
 {
        /* Transform from generic 3 address instructions
         * to archtecture specific instructions.
-        * And apply architecture specific constrains to instructions.
+        * And apply architecture specific constraints to instructions.
         * Copies are inserted to preserve the register flexibility
         * of 3 address instructions.
         */
        struct triple *next;
+       size_t size;
        next = ins->next;
        switch(ins->op) {
        case OP_INTCONST:
@@ -16020,22 +16447,46 @@ static struct triple *transform_to_arch_instruction(
                ins->template_id = TEMPLATE_NOP;
                break;
        case OP_COPY:
-               ins->template_id = TEMPLATE_COPY_REG;
-               if (is_imm8(RHS(ins, 0))) {
+               size = size_of(state, ins->type);
+               if (is_imm8(RHS(ins, 0)) && (size <= 1)) {
                        ins->template_id = TEMPLATE_COPY_IMM8;
                }
-               else if (is_imm16(RHS(ins, 0))) {
+               else if (is_imm16(RHS(ins, 0)) && (size <= 2)) {
                        ins->template_id = TEMPLATE_COPY_IMM16;
                }
-               else if (is_imm32(RHS(ins, 0))) {
+               else if (is_imm32(RHS(ins, 0)) && (size <= 4)) {
                        ins->template_id = TEMPLATE_COPY_IMM32;
                }
                else if (is_const(RHS(ins, 0))) {
                        internal_error(state, ins, "bad constant passed to copy");
                }
+               else if (size <= 1) {
+                       ins->template_id = TEMPLATE_COPY8_REG;
+               }
+               else if (size <= 2) {
+                       ins->template_id = TEMPLATE_COPY16_REG;
+               }
+               else if (size <= 4) {
+                       ins->template_id = TEMPLATE_COPY32_REG;
+               }
+               else {
+                       internal_error(state, ins, "bad type passed to copy");
+               }
                break;
        case OP_PHI:
-               ins->template_id = TEMPLATE_PHI;
+               size = size_of(state, ins->type);
+               if (size <= 1) {
+                       ins->template_id = TEMPLATE_PHI8;
+               }
+               else if (size <= 2) {
+                       ins->template_id = TEMPLATE_PHI16;
+               }
+               else if (size <= 4) {
+                       ins->template_id = TEMPLATE_PHI32;
+               }
+               else {
+                       internal_error(state, ins, "bad type passed to phi");
+               }
                break;
        case OP_STORE:
                switch(ins->type->type & TYPE_MASK) {
@@ -16082,22 +16533,45 @@ static struct triple *transform_to_arch_instruction(
        case OP_XOR:
        case OP_OR:
        case OP_SMUL:
-               ins->template_id = TEMPLATE_BINARY_REG;
+               ins->template_id = TEMPLATE_BINARY32_REG;
                if (get_imm32(ins, &RHS(ins, 1))) {
-                       ins->template_id = TEMPLATE_BINARY_IMM;
+                       ins->template_id = TEMPLATE_BINARY32_IMM;
                }
                break;
+       case OP_SDIVT:
+       case OP_UDIVT:
+               ins->template_id = TEMPLATE_DIV32;
+               next = after_lhs(state, ins);
+               break;
+               /* FIXME UMUL does not work yet.. */
+       case OP_UMUL:
+               ins->template_id = TEMPLATE_UMUL32;
+               break;
+       case OP_UDIV:
+               next = mod_div(state, ins, OP_UDIVT, 0);
+               break;
+       case OP_SDIV:
+               next = mod_div(state, ins, OP_SDIVT, 0);
+               break;
+       case OP_UMOD:
+               next = mod_div(state, ins, OP_UDIVT, 1);
+               break;
+       case OP_SMOD:
+               next = mod_div(state, ins, OP_SDIVT, 1);
+               break;
        case OP_SL:
        case OP_SSR:
        case OP_USR:
-               ins->template_id = TEMPLATE_SL_CL;
+               ins->template_id = TEMPLATE_SL32_CL;
                if (get_imm8(ins, &RHS(ins, 1))) {
-                       ins->template_id = TEMPLATE_SL_IMM;
+                       ins->template_id = TEMPLATE_SL32_IMM;
+               } else if (size_of(state, RHS(ins, 1)->type) > 1) {
+                       typed_pre_copy(state, &char_type, ins, 1);
                }
                break;
        case OP_INVERT:
        case OP_NEG:
-               ins->template_id = TEMPLATE_UNARY;
+               ins->template_id = TEMPLATE_UNARY32;
                break;
        case OP_EQ: 
                bool_cmp(state, ins, OP_CMP, OP_JMP_EQ, OP_SET_EQ); 
@@ -16186,12 +16660,12 @@ static struct triple *transform_to_arch_instruction(
                break;
                /* Already transformed instructions */
        case OP_TEST:
-               ins->template_id = TEMPLATE_TEST;
+               ins->template_id = TEMPLATE_TEST32;
                break;
        case OP_CMP:
-               ins->template_id = TEMPLATE_CMP_REG;
+               ins->template_id = TEMPLATE_CMP32_REG;
                if (get_imm32(ins, &RHS(ins, 1))) {
-                       ins->template_id = TEMPLATE_CMP_IMM;
+                       ins->template_id = TEMPLATE_CMP32_IMM;
                }
                break;
        case OP_JMP_EQ:      case OP_JMP_NOTEQ:
@@ -16218,18 +16692,21 @@ static struct triple *transform_to_arch_instruction(
        return next;
 }
 
+static long next_label(struct compile_state *state)
+{
+       static long label_counter = 0;
+       return ++label_counter;
+}
 static void generate_local_labels(struct compile_state *state)
 {
        struct triple *first, *label;
-       int label_counter;
-       label_counter = 0;
        first = RHS(state->main_function, 0);
        label = first;
        do {
                if ((label->op == OP_LABEL) || 
                        (label->op == OP_SDECL)) {
                        if (label->use) {
-                               label->u.cval = ++label_counter;
+                               label->u.cval = next_label(state);
                        } else {
                                label->u.cval = 0;
                        }
@@ -16258,6 +16735,9 @@ static int check_reg(struct compile_state *state,
 
 static const char *arch_reg_str(int reg)
 {
+#if REG_XMM7 != 44
+#error "Registers have renumberd fix arch_reg_str"
+#endif
        static const char *regs[] = {
                "%unset",
                "%unneeded",
@@ -16266,6 +16746,7 @@ static const char *arch_reg_str(int reg)
                "%ax", "%bx", "%cx", "%dx", "%si", "%di", "%bp", "%sp",
                "%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi", "%ebp", "%esp",
                "%edx:%eax",
+               "%dx:%ax",
                "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7",
                "%xmm0", "%xmm1", "%xmm2", "%xmm3", 
                "%xmm4", "%xmm5", "%xmm6", "%xmm7",
@@ -16309,6 +16790,12 @@ static void print_const_val(
                        (long_t)(ins->u.cval));
                break;
        case OP_ADDRCONST:
+               if (MISC(ins, 0)->op != OP_SDECL) {
+                       internal_error(state, ins, "bad base for addrconst");
+               }
+               if (MISC(ins, 0)->u.cval <= 0) {
+                       internal_error(state, ins, "unlabeled constant");
+               }
                fprintf(fp, " $L%s%lu+%lu ",
                        state->label_prefix, 
                        MISC(ins, 0)->u.cval,
@@ -16320,11 +16807,81 @@ static void print_const_val(
        }
 }
 
+static void print_const(struct compile_state *state,
+       struct triple *ins, FILE *fp)
+{
+       switch(ins->op) {
+       case OP_INTCONST:
+               switch(ins->type->type & TYPE_MASK) {
+               case TYPE_CHAR:
+               case TYPE_UCHAR:
+                       fprintf(fp, ".byte 0x%02lx\n", ins->u.cval);
+                       break;
+               case TYPE_SHORT:
+               case TYPE_USHORT:
+                       fprintf(fp, ".short 0x%04lx\n", ins->u.cval);
+                       break;
+               case TYPE_INT:
+               case TYPE_UINT:
+               case TYPE_LONG:
+               case TYPE_ULONG:
+                       fprintf(fp, ".int %lu\n", ins->u.cval);
+                       break;
+               default:
+                       internal_error(state, ins, "Unknown constant type");
+               }
+               break;
+       case OP_ADDRCONST:
+               if (MISC(ins, 0)->op != OP_SDECL) {
+                       internal_error(state, ins, "bad base for addrconst");
+               }
+               if (MISC(ins, 0)->u.cval <= 0) {
+                       internal_error(state, ins, "unlabeled constant");
+               }
+               fprintf(fp, ".int L%s%lu+%lu\n",
+                       state->label_prefix,
+                       MISC(ins, 0)->u.cval,
+                       ins->u.cval);
+               break;
+       case OP_BLOBCONST:
+       {
+               unsigned char *blob;
+               size_t size, i;
+               size = size_of(state, ins->type);
+               blob = ins->u.blob;
+               for(i = 0; i < size; i++) {
+                       fprintf(fp, ".byte 0x%02x\n",
+                               blob[i]);
+               }
+               break;
+       }
+       default:
+               internal_error(state, ins, "Unknown constant type");
+               break;
+       }
+}
+
+#define TEXT_SECTION ".rom.text"
+#define DATA_SECTION ".rom.data"
+
+static long get_const_pool_ref(
+       struct compile_state *state, struct triple *ins, FILE *fp)
+{
+       long ref;
+       ref = next_label(state);
+       fprintf(fp, ".section \"" DATA_SECTION "\"\n");
+       fprintf(fp, ".balign %d\n", align_of(state, ins->type));
+       fprintf(fp, "L%s%lu:\n", state->label_prefix, ref);
+       print_const(state, ins, fp);
+       fprintf(fp, ".section \"" TEXT_SECTION "\"\n");
+       return ref;
+}
+
 static void print_binary_op(struct compile_state *state,
        const char *op, struct triple *ins, FILE *fp) 
 {
        unsigned mask;
-       mask = REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8;
+       mask = REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8_LO;
        if (RHS(ins, 0)->id != ins->id) {
                internal_error(state, ins, "invalid register assignment");
        }
@@ -16352,7 +16909,7 @@ static void print_unary_op(struct compile_state *state,
        const char *op, struct triple *ins, FILE *fp)
 {
        unsigned mask;
-       mask = REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8;
+       mask = REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8_LO;
        fprintf(fp, "\t%s %s\n",
                op,
                reg(state, RHS(ins, 0), mask));
@@ -16362,7 +16919,7 @@ static void print_op_shift(struct compile_state *state,
        const char *op, struct triple *ins, FILE *fp)
 {
        unsigned mask;
-       mask = REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8;
+       mask = REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8_LO;
        if (RHS(ins, 0)->id != ins->id) {
                internal_error(state, ins, "invalid register assignment");
        }
@@ -16375,7 +16932,7 @@ static void print_op_shift(struct compile_state *state,
        else {
                fprintf(fp, "\t%s %s, %s\n",
                        op,
-                       reg(state, RHS(ins, 1), REGCM_GPR8),
+                       reg(state, RHS(ins, 1), REGCM_GPR8_LO),
                        reg(state, RHS(ins, 0), mask));
        }
 }
@@ -16387,7 +16944,7 @@ static void print_op_in(struct compile_state *state, struct triple *ins, FILE *f
        int dreg;
        mask = 0;
        switch(ins->op) {
-       case OP_INB: op = "inb", mask = REGCM_GPR8; break;
+       case OP_INB: op = "inb", mask = REGCM_GPR8_LO; break;
        case OP_INW: op = "inw", mask = REGCM_GPR16; break;
        case OP_INL: op = "inl", mask = REGCM_GPR32; break;
        default:
@@ -16425,7 +16982,7 @@ static void print_op_out(struct compile_state *state, struct triple *ins, FILE *
        int lreg;
        mask = 0;
        switch(ins->op) {
-       case OP_OUTB: op = "outb", mask = REGCM_GPR8; break;
+       case OP_OUTB: op = "outb", mask = REGCM_GPR8_LO; break;
        case OP_OUTW: op = "outw", mask = REGCM_GPR16; break;
        case OP_OUTL: op = "outl", mask = REGCM_GPR32; break;
        default:
@@ -16470,10 +17027,6 @@ static void print_op_move(struct compile_state *state,
                src = RHS(ins, 0);
                dst = ins;
        }
-       else if (ins->op == OP_WRITE) {
-               dst = LHS(ins, 0);
-               src = RHS(ins, 0);
-       }
        else {
                internal_error(state, ins, "unknown move operation");
                src = dst = 0;
@@ -16481,13 +17034,13 @@ static void print_op_move(struct compile_state *state,
        if (!is_const(src)) {
                int src_reg, dst_reg;
                int src_regcm, dst_regcm;
-               src_reg = ID_REG(src->id);
+               src_reg   = ID_REG(src->id);
                dst_reg   = ID_REG(dst->id);
                src_regcm = arch_reg_regcm(state, src_reg);
-               dst_regcm   = arch_reg_regcm(state, dst_reg);
+               dst_regcm = arch_reg_regcm(state, dst_reg);
                /* If the class is the same just move the register */
                if (src_regcm & dst_regcm & 
-                       (REGCM_GPR8 | REGCM_GPR16 | REGCM_GPR32)) {
+                       (REGCM_GPR8_LO | REGCM_GPR16 | REGCM_GPR32)) {
                        if ((src_reg != dst_reg) || !omit_copy) {
                                fprintf(fp, "\tmov %s, %s\n",
                                        reg(state, src, src_regcm),
@@ -16504,9 +17057,19 @@ static void print_op_move(struct compile_state *state,
                                        arch_reg_str(dst_reg));
                        }
                }
+               /* Move from 32bit gprs to 16bit gprs */
+               else if ((src_regcm & REGCM_GPR32) &&
+                       (dst_regcm & REGCM_GPR16)) {
+                       dst_reg = (dst_reg - REGC_GPR16_FIRST) + REGC_GPR32_FIRST;
+                       if ((src_reg != dst_reg) || !omit_copy) {
+                               fprintf(fp, "\tmov %s, %s\n",
+                                       arch_reg_str(src_reg),
+                                       arch_reg_str(dst_reg));
+                       }
+               }
                /* Move 32bit to 8bit */
                else if ((src_regcm & REGCM_GPR32_8) &&
-                       (dst_regcm & REGCM_GPR8))
+                       (dst_regcm & REGCM_GPR8_LO))
                {
                        src_reg = (src_reg - REGC_GPR32_8_FIRST) + REGC_GPR8_FIRST;
                        if ((src_reg != dst_reg) || !omit_copy) {
@@ -16517,7 +17080,7 @@ static void print_op_move(struct compile_state *state,
                }
                /* Move 16bit to 8bit */
                else if ((src_regcm & REGCM_GPR16_8) &&
-                       (dst_regcm & REGCM_GPR8))
+                       (dst_regcm & REGCM_GPR8_LO))
                {
                        src_reg = (src_reg - REGC_GPR16_8_FIRST) + REGC_GPR8_FIRST;
                        if ((src_reg != dst_reg) || !omit_copy) {
@@ -16527,7 +17090,7 @@ static void print_op_move(struct compile_state *state,
                        }
                }
                /* Move 8/16bit to 16/32bit */
-               else if ((src_regcm & (REGCM_GPR8 | REGCM_GPR16)) && 
+               else if ((src_regcm & (REGCM_GPR8_LO | REGCM_GPR16)) && 
                        (dst_regcm & (REGCM_GPR16 | REGCM_GPR32))) {
                        const char *op;
                        op = is_signed(src->type)? "movsx": "movzx";
@@ -16544,15 +17107,26 @@ static void print_op_move(struct compile_state *state,
                                        reg(state, dst, dst_regcm));
                        }
                }
-               /* Move between mmx registers or mmx & sse  registers */
-               else if ((src_regcm & (REGCM_MMX | REGCM_XMM)) &&
-                       (dst_regcm & (REGCM_MMX | REGCM_XMM))) {
+               /* Move between mmx registers */
+               else if ((src_regcm & dst_regcm & REGCM_MMX)) {
                        if ((src_reg != dst_reg) || !omit_copy) {
                                fprintf(fp, "\tmovq %s, %s\n",
                                        reg(state, src, src_regcm),
                                        reg(state, dst, dst_regcm));
                        }
                }
+               /* Move from sse to mmx registers */
+               else if ((src_regcm & REGCM_XMM) && (dst_regcm & REGCM_MMX)) {
+                       fprintf(fp, "\tmovdq2q %s, %s\n",
+                               reg(state, src, src_regcm),
+                               reg(state, dst, dst_regcm));
+               }
+               /* Move from mmx to sse registers */
+               else if ((src_regcm & REGCM_MMX) && (dst_regcm & REGCM_XMM)) {
+                       fprintf(fp, "\tmovq2dq %s, %s\n",
+                               reg(state, src, src_regcm),
+                               reg(state, dst, dst_regcm));
+               }
                /* Move between 32bit gprs & mmx/sse registers */
                else if ((src_regcm & (REGCM_GPR32 | REGCM_MMX | REGCM_XMM)) &&
                        (dst_regcm & (REGCM_GPR32 | REGCM_MMX | REGCM_XMM))) {
@@ -16560,9 +17134,71 @@ static void print_op_move(struct compile_state *state,
                                reg(state, src, src_regcm),
                                reg(state, dst, dst_regcm));
                }
+               /* Move from 16bit gprs &  mmx/sse registers */
+               else if ((src_regcm & REGCM_GPR16) &&
+                       (dst_regcm & (REGCM_MMX | REGCM_XMM))) {
+                       const char *op;
+                       int mid_reg;
+                       op = is_signed(src->type)? "movsx":"movzx";
+                       mid_reg = (src_reg - REGC_GPR16_FIRST) + REGC_GPR32_FIRST;
+                       fprintf(fp, "\t%s %s, %s\n\tmovd %s, %s\n",
+                               op,
+                               arch_reg_str(src_reg),
+                               arch_reg_str(mid_reg),
+                               arch_reg_str(mid_reg),
+                               arch_reg_str(dst_reg));
+               }
+               /* Move from mmx/sse registers to 16bit gprs */
+               else if ((src_regcm & (REGCM_MMX | REGCM_XMM)) &&
+                       (dst_regcm & REGCM_GPR16)) {
+                       dst_reg = (dst_reg - REGC_GPR16_FIRST) + REGC_GPR32_FIRST;
+                       fprintf(fp, "\tmovd %s, %s\n",
+                               arch_reg_str(src_reg),
+                               arch_reg_str(dst_reg));
+               }
+               /* Move from gpr to 64bit dividend */
+               else if ((src_regcm & (REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8_LO))  &&
+                       (dst_regcm & REGCM_DIVIDEND64)) {
+                       const char *extend;
+                       extend = is_signed(src->type)? "cltd":"movl $0, %edx";
+                       fprintf(fp, "\tmov %s, %%eax\n\t%s\n",
+                               arch_reg_str(src_reg), 
+                               extend);
+               }
+               /* Move from 64bit gpr to gpr */
+               else if ((src_regcm & REGCM_DIVIDEND64) &&
+                       (dst_regcm & (REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8_LO))) {
+                       if (dst_regcm & REGCM_GPR32) {
+                               src_reg = REG_EAX;
+                       } 
+                       else if (dst_regcm & REGCM_GPR16) {
+                               src_reg = REG_AX;
+                       }
+                       else if (dst_regcm & REGCM_GPR8_LO) {
+                               src_reg = REG_AL;
+                       }
+                       fprintf(fp, "\tmov %s, %s\n",
+                               arch_reg_str(src_reg),
+                               arch_reg_str(dst_reg));
+               }
+               /* Move from mmx/sse registers to 64bit gpr */
+               else if ((src_regcm & (REGCM_MMX | REGCM_XMM)) &&
+                       (dst_regcm & REGCM_DIVIDEND64)) {
+                       const char *extend;
+                       extend = is_signed(src->type)? "cltd": "movl $0, %edx";
+                       fprintf(fp, "\tmovd %s, %%eax\n\t%s\n",
+                               arch_reg_str(src_reg),
+                               extend);
+               }
+               /* Move from 64bit gpr to mmx/sse register */
+               else if ((src_regcm & REGCM_DIVIDEND64) &&
+                       (dst_regcm & (REGCM_XMM | REGCM_MMX))) {
+                       fprintf(fp, "\tmovd %%eax, %s\n",
+                               arch_reg_str(dst_reg));
+               }
 #if X86_4_8BIT_GPRS
                /* Move from 8bit gprs to  mmx/sse registers */
-               else if ((src_regcm & REGCM_GPR8) && (src_reg <= REG_DL) &&
+               else if ((src_regcm & REGCM_GPR8_LO) && (src_reg <= REG_DL) &&
                        (dst_regcm & (REGCM_MMX | REGCM_XMM))) {
                        const char *op;
                        int mid_reg;
@@ -16577,26 +17213,16 @@ static void print_op_move(struct compile_state *state,
                }
                /* Move from mmx/sse registers and 8bit gprs */
                else if ((src_regcm & (REGCM_MMX | REGCM_XMM)) &&
-                       (dst_regcm & REGCM_GPR8) && (dst_reg <= REG_DL)) {
+                       (dst_regcm & REGCM_GPR8_LO) && (dst_reg <= REG_DL)) {
                        int mid_reg;
                        mid_reg = (dst_reg - REGC_GPR8_FIRST) + REGC_GPR32_FIRST;
                        fprintf(fp, "\tmovd %s, %s\n",
                                reg(state, src, src_regcm),
                                arch_reg_str(mid_reg));
                }
-               /* Move from 32bit gprs to 16bit gprs */
-               else if ((src_regcm & REGCM_GPR32) &&
-                       (dst_regcm & REGCM_GPR16)) {
-                       dst_reg = (dst_reg - REGC_GPR16_FIRST) + REGC_GPR32_FIRST;
-                       if ((src_reg != dst_reg) || !omit_copy) {
-                               fprintf(fp, "\tmov %s, %s\n",
-                                       arch_reg_str(src_reg),
-                                       arch_reg_str(dst_reg));
-                       }
-               }
                /* Move from 32bit gprs to 8bit gprs */
                else if ((src_regcm & REGCM_GPR32) &&
-                       (dst_regcm & REGCM_GPR8)) {
+                       (dst_regcm & REGCM_GPR8_LO)) {
                        dst_reg = (dst_reg - REGC_GPR8_FIRST) + REGC_GPR32_FIRST;
                        if ((src_reg != dst_reg) || !omit_copy) {
                                fprintf(fp, "\tmov %s, %s\n",
@@ -16606,7 +17232,7 @@ static void print_op_move(struct compile_state *state,
                }
                /* Move from 16bit gprs to 8bit gprs */
                else if ((src_regcm & REGCM_GPR16) &&
-                       (dst_regcm & REGCM_GPR8)) {
+                       (dst_regcm & REGCM_GPR8_LO)) {
                        dst_reg = (dst_reg - REGC_GPR8_FIRST) + REGC_GPR16_FIRST;
                        if ((src_reg != dst_reg) || !omit_copy) {
                                fprintf(fp, "\tmov %s, %s\n",
@@ -16620,10 +17246,44 @@ static void print_op_move(struct compile_state *state,
                }
        }
        else {
-               fprintf(fp, "\tmov ");
-               print_const_val(state, src, fp);
-               fprintf(fp, ", %s\n",
-                       reg(state, dst, REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8));
+               int dst_reg;
+               int dst_regcm;
+               dst_reg = ID_REG(dst->id);
+               dst_regcm = arch_reg_regcm(state, dst_reg);
+               if (dst_regcm & (REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8_LO)) {
+                       fprintf(fp, "\tmov ");
+                       print_const_val(state, src, fp);
+                       fprintf(fp, ", %s\n",
+                               reg(state, dst, REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8_LO));
+               }
+               else if (dst_regcm & REGCM_DIVIDEND64) {
+                       if (size_of(state, dst->type) > 4) {
+                               internal_error(state, ins, "64bit constant...");
+                       }
+                       fprintf(fp, "\tmov $0, %%edx\n");
+                       fprintf(fp, "\tmov ");
+                       print_const_val(state, src, fp);
+                       fprintf(fp, ", %%eax\n");
+               }
+               else if (dst_regcm & REGCM_DIVIDEND32) {
+                       if (size_of(state, dst->type) > 2) {
+                               internal_error(state, ins, "32bit constant...");
+                       }
+                       fprintf(fp, "\tmov $0, %%dx\n");
+                       fprintf(fp, "\tmov ");
+                       print_const_val(state, src, fp);
+                       fprintf(fp, ", %%ax");
+               }
+               else if (dst_regcm & (REGCM_XMM | REGCM_MMX)) {
+                       long ref;
+                       ref = get_const_pool_ref(state, src, fp);
+                       fprintf(fp, "\tmovq L%s%lu, %s\n",
+                               state->label_prefix, ref,
+                               reg(state, dst, (REGCM_XMM | REGCM_MMX)));
+               }
+               else {
+                       internal_error(state, ins, "unknown copy immediate type");
+               }
        }
 }
 
@@ -16638,7 +17298,7 @@ static void print_op_load(struct compile_state *state,
        }
        fprintf(fp, "\tmov (%s), %s\n",
                reg(state, src, REGCM_GPR32),
-               reg(state, dst, REGCM_GPR8 | REGCM_GPR16 | REGCM_GPR32));
+               reg(state, dst, REGCM_GPR8_LO | REGCM_GPR16 | REGCM_GPR32));
 }
 
 
@@ -16646,8 +17306,8 @@ static void print_op_store(struct compile_state *state,
        struct triple *ins, FILE *fp)
 {
        struct triple *dst, *src;
-       dst = LHS(ins, 0);
-       src = RHS(ins, 0);
+       dst = RHS(ins, 0);
+       src = RHS(ins, 1);
        if (is_const(src) && (src->op == OP_INTCONST)) {
                long_t value;
                value = (long_t)(src->u.cval);
@@ -16659,7 +17319,7 @@ static void print_op_store(struct compile_state *state,
        else if (is_const(dst) && (dst->op == OP_INTCONST)) {
                fprintf(fp, "\tmov%s %s, 0x%08lx\n",
                        type_suffix(state, src->type),
-                       reg(state, src, REGCM_GPR8 | REGCM_GPR16 | REGCM_GPR32),
+                       reg(state, src, REGCM_GPR8_LO | REGCM_GPR16 | REGCM_GPR32),
                        dst->u.cval);
        }
        else {
@@ -16668,7 +17328,7 @@ static void print_op_store(struct compile_state *state,
                }
                fprintf(fp, "\tmov%s %s, (%s)\n",
                        type_suffix(state, src->type),
-                       reg(state, src, REGCM_GPR8 | REGCM_GPR16 | REGCM_GPR32),
+                       reg(state, src, REGCM_GPR8_LO | REGCM_GPR16 | REGCM_GPR32),
                        reg(state, dst, REGCM_GPR32));
        }
        
@@ -16695,7 +17355,7 @@ static void print_op_cmp(struct compile_state *state,
 {
        unsigned mask;
        int dreg;
-       mask = REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8;
+       mask = REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8_LO;
        dreg = check_reg(state, ins, REGCM_FLAGS);
        if (!reg_is_reg(state, dreg, REG_EFLAGS)) {
                internal_error(state, ins, "bad dest register for cmp");
@@ -16723,7 +17383,7 @@ static void print_op_test(struct compile_state *state,
        struct triple *ins, FILE *fp)
 {
        unsigned mask;
-       mask = REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8;
+       mask = REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8_LO;
        fprintf(fp, "\ttest %s, %s\n",
                reg(state, RHS(ins, 0), mask),
                reg(state, RHS(ins, 0), mask));
@@ -16810,7 +17470,7 @@ static void print_op_set(struct compile_state *state,
                break;
        }
        fprintf(fp, "\t%s %s\n",
-               sop, reg(state, set, REGCM_GPR8));
+               sop, reg(state, set, REGCM_GPR8_LO));
 }
 
 static void print_op_bit_scan(struct compile_state *state, 
@@ -16836,50 +17496,6 @@ static void print_op_bit_scan(struct compile_state *state,
                reg(state, ins, REGCM_GPR32));
 }
 
-static void print_const(struct compile_state *state,
-       struct triple *ins, FILE *fp)
-{
-       switch(ins->op) {
-       case OP_INTCONST:
-               switch(ins->type->type & TYPE_MASK) {
-               case TYPE_CHAR:
-               case TYPE_UCHAR:
-                       fprintf(fp, ".byte 0x%02lx\n", ins->u.cval);
-                       break;
-               case TYPE_SHORT:
-               case TYPE_USHORT:
-                       fprintf(fp, ".short 0x%04lx\n", ins->u.cval);
-                       break;
-               case TYPE_INT:
-               case TYPE_UINT:
-               case TYPE_LONG:
-               case TYPE_ULONG:
-                       fprintf(fp, ".int %lu\n", ins->u.cval);
-                       break;
-               default:
-                       internal_error(state, ins, "Unknown constant type");
-               }
-               break;
-       case OP_BLOBCONST:
-       {
-               unsigned char *blob;
-               size_t size, i;
-               size = size_of(state, ins->type);
-               blob = ins->u.blob;
-               for(i = 0; i < size; i++) {
-                       fprintf(fp, ".byte 0x%02x\n",
-                               blob[i]);
-               }
-               break;
-       }
-       default:
-               internal_error(state, ins, "Unknown constant type");
-               break;
-       }
-}
-
-#define TEXT_SECTION ".rom.text"
-#define DATA_SECTION ".rom.data"
 
 static void print_sdecl(struct compile_state *state,
        struct triple *ins, FILE *fp)
@@ -16923,7 +17539,6 @@ static void print_instruction(struct compile_state *state,
        case OP_SDECL:
                print_sdecl(state, ins, fp);
                break;
-       case OP_WRITE: 
        case OP_COPY:   
                print_op_move(state, ins, fp);
                break;
@@ -16973,6 +17588,15 @@ static void print_instruction(struct compile_state *state,
        case OP_HLT:
                fprintf(fp, "\thlt\n");
                break;
+       case OP_SDIVT:
+               fprintf(fp, "\tidiv %s\n", reg(state, RHS(ins, 1), REGCM_GPR32));
+               break;
+       case OP_UDIVT:
+               fprintf(fp, "\tdiv %s\n", reg(state, RHS(ins, 1), REGCM_GPR32));
+               break;
+       case OP_UMUL:
+               fprintf(fp, "\tmul %s\n", reg(state, RHS(ins, 1), REGCM_GPR32));
+               break;
        case OP_LABEL:
                if (!ins->use) {
                        return;
@@ -16982,11 +17606,9 @@ static void print_instruction(struct compile_state *state,
                /* Ignore OP_PIECE */
        case OP_PIECE:
                break;
-               /* Operations I am not yet certain how to handle */
-       case OP_UMUL:
+               /* Operations that should never get here */
        case OP_SDIV: case OP_UDIV:
        case OP_SMOD: case OP_UMOD:
-               /* Operations that should never get here */
        case OP_LTRUE:   case OP_LFALSE:  case OP_EQ:      case OP_NOTEQ:
        case OP_SLESS:   case OP_ULESS:   case OP_SMORE:   case OP_UMORE:
        case OP_SLESSEQ: case OP_ULESSEQ: case OP_SMOREEQ: case OP_UMOREEQ:
@@ -17003,6 +17625,8 @@ static void print_instructions(struct compile_state *state)
        int print_location;
        struct occurance *last_occurance;
        FILE *fp;
+       int max_inline_depth;
+       max_inline_depth = 0;
        print_location = 1;
        last_occurance = 0;
        fp = state->output;
@@ -17021,8 +17645,11 @@ static void print_instructions(struct compile_state *state)
                        }
                        else {
                                struct occurance *ptr;
+                               int inline_depth;
                                fprintf(fp, "\t/*\n");
+                               inline_depth = 0;
                                for(ptr = ins->occurance; ptr; ptr = ptr->parent) {
+                                       inline_depth++;
                                        fprintf(fp, "\t * %s,%s:%d.%d\n",
                                                ptr->function,
                                                ptr->filename,
@@ -17030,7 +17657,9 @@ static void print_instructions(struct compile_state *state)
                                                ptr->col);
                                }
                                fprintf(fp, "\t */\n");
-                               
+                               if (inline_depth > max_inline_depth) {
+                                       max_inline_depth = inline_depth;
+                               }
                        }
                        if (last_occurance) {
                                put_occurance(last_occurance);
@@ -17042,8 +17671,12 @@ static void print_instructions(struct compile_state *state)
                print_instruction(state, ins, fp);
                ins = ins->next;
        } while(ins != first);
-       
+       if (print_location) {
+               fprintf(fp, "/* max inline depth %d */\n",
+                       max_inline_depth);
+       }
 }
+
 static void generate_code(struct compile_state *state)
 {
        generate_local_labels(state);