- Update romcc to version 0.37
[coreboot.git] / util / romcc / romcc.c
index 5c784fc5e67c94c2536713a0d26b36585798fb51..648f7cae44a24f2b7eb73fc82a882e6d87750e77 100644 (file)
 #include <string.h>
 #include <limits.h>
 
-#define DEBUG_ERROR_MESSAGES 0
-#define DEBUG_COLOR_GRAPH 0
-#define DEBUG_SCC 0
-#define DEBUG_CONSISTENCY 1
+#define MAX_ALLOCATION_PASSES 100
+
+#define DEBUG_CONSISTENCY 2
+#define DEBUG_SDP_BLOCKS 0
+#define DEBUG_TRIPLE_COLOR 0
 
 #warning "FIXME boundary cases with small types in larger registers"
 #warning "FIXME give clear error messages about unused variables"
+#warning "FIXME properly handle multi dimensional arrays"
 
 /*  Control flow graph of a loop without goto.
  * 
@@ -202,9 +204,32 @@ static char *slurp_file(const char *dirname, const char *filename, off_t *r_size
        return buf;
 }
 
-/* Long on the destination platform */
-typedef unsigned long ulong_t;
-typedef long long_t;
+/* Types on the destination platform */
+#warning "FIXME this assumes 32bit x86 is the destination"
+typedef int8_t   schar_t;
+typedef uint8_t  uchar_t;
+typedef int8_t   char_t;
+typedef int16_t  short_t;
+typedef uint16_t ushort_t;
+typedef int32_t  int_t;
+typedef uint32_t uint_t;
+typedef int32_t  long_t;
+typedef uint32_t ulong_t;
+
+#define SCHAR_T_MIN (-128)
+#define SCHAR_T_MAX 127
+#define UCHAR_T_MAX 255
+#define CHAR_T_MIN  SCHAR_T_MIN
+#define CHAR_T_MAX  SCHAR_T_MAX
+#define SHRT_T_MIN  (-32768)
+#define SHRT_T_MAX  32767
+#define USHRT_T_MAX 65535
+#define INT_T_MIN   (-LONG_T_MAX - 1)
+#define INT_T_MAX   2147483647
+#define UINT_T_MAX  4294967295U
+#define LONG_T_MIN  (-LONG_T_MAX - 1)
+#define LONG_T_MAX  2147483647
+#define ULONG_T_MAX 4294967295U
 
 struct file_state {
        struct file_state *prev;
@@ -259,23 +284,25 @@ struct token {
 /* Operations on general purpose registers.
  */
 
-#define OP_SMUL       0
-#define OP_UMUL       1
-#define OP_SDIV       2
-#define OP_UDIV       3
-#define OP_SMOD       4
-#define OP_UMOD       5
-#define OP_ADD        6
-#define OP_SUB        7
-#define OP_SL         8
-#define OP_USR        9
-#define OP_SSR       10 
-#define OP_AND       11 
-#define OP_XOR       12
-#define OP_OR        13
-#define OP_POS       14 /* Dummy positive operator don't use it */
-#define OP_NEG       15
-#define OP_INVERT    16
+#define OP_SDIVT      0
+#define OP_UDIVT      1
+#define OP_SMUL       2
+#define OP_UMUL       3
+#define OP_SDIV       4
+#define OP_UDIV       5
+#define OP_SMOD       6
+#define OP_UMOD       7
+#define OP_ADD        8
+#define OP_SUB        9
+#define OP_SL        10
+#define OP_USR       11
+#define OP_SSR       12 
+#define OP_AND       13 
+#define OP_XOR       14
+#define OP_OR        15
+#define OP_POS       16 /* Dummy positive operator don't use it */
+#define OP_NEG       17
+#define OP_INVERT    18
                     
 #define OP_EQ        20
 #define OP_NOTEQ     21
@@ -293,6 +320,10 @@ struct token {
 
 #define OP_LOAD      32
 #define OP_STORE     33
+/* For OP_STORE ->type holds the type
+ * RHS(0) holds the destination address
+ * RHS(1) holds the value to store.
+ */
 
 #define OP_NOOP      34
 
@@ -300,6 +331,9 @@ struct token {
 #define OP_MAX_CONST 59
 #define IS_CONST_OP(X) (((X) >= OP_MIN_CONST) && ((X) <= OP_MAX_CONST))
 #define OP_INTCONST  50
+/* For OP_INTCONST ->type holds the type.
+ * ->u.cval holds the constant value.
+ */
 #define OP_BLOBCONST 51
 /* For OP_BLOBCONST ->type holds the layout and size
  * information.  u.blob holds a pointer to the raw binary
@@ -313,8 +347,8 @@ struct token {
 
 #define OP_WRITE     60 
 /* OP_WRITE moves one pseudo register to another.
- * LHS(0) holds the destination pseudo register, which must be an OP_DECL.
- * RHS(0) holds the psuedo to move.
+ * RHS(0) holds the destination pseudo register, which must be an OP_DECL.
+ * RHS(1) holds the psuedo to move.
  */
 
 #define OP_READ      61
@@ -383,8 +417,8 @@ struct token {
  * Not seen outside of expressions.
  */
 
-#define OP_CALL      72
-/* OP_CALL performs a procedure call. 
+#define OP_FCALL      72
+/* OP_FCALL performs a procedure call. 
  * MISC(0) holds a pointer to the OP_LIST of a function
  * RHS(x) holds argument x of a function
  * 
@@ -399,30 +433,52 @@ struct token {
 
 /* statements */
 #define OP_LIST      80
-/* OP_LIST Holds a list of statements, and a result value.
+/* OP_LIST Holds a list of statements that compose a function, and a result value.
  * RHS(0) holds the list of statements.
  * MISC(0) holds the value of the statements.
+ * A list of all functions is maintained.
  */
 
-#define OP_BRANCH    81 /* branch */
+#define OP_BRANCH    81 /* an unconditional branch */
 /* For branch instructions
  * TARG(0) holds the branch target.
- * RHS(0) if present holds the branch condition.
  * ->next holds where to branch to if the branch is not taken.
- * The branch target can only be a decl...
+ * The branch target can only be a label
+ */
+
+#define OP_CBRANCH   82 /* a conditional branch */
+/* For conditional branch instructions
+ * RHS(0) holds the branch condition.
+ * TARG(1) holds the branch target.
+ * ->next holds where to branch to if the branch is not taken.
+ * The branch target can only be a label
+ */
+
+#define OP_CALL      83 /* an uncontional branch that will return */
+/* For call instructions
+ * MISC(0) holds the OP_RET that returns from the branch
+ * TARG(0) holds the branch target.
+ * ->next holds where to branch to if the branch is not taken.
+ * The branch target can only be a label
+ */
+
+#define OP_RET       84 /* an uncontinonal branch through a variable back to an OP_CALL */
+/* For call instructions
+ * RHS(0) holds the variable with the return address
+ * The branch target can only be a label
  */
 
-#define OP_LABEL     83
+#define OP_LABEL     86
 /* OP_LABEL is a triple that establishes an target for branches.
  * ->use is the list of all branches that use this label.
  */
 
-#define OP_ADECL     84 
-/* OP_DECL is a triple that establishes an lvalue for assignments.
+#define OP_ADECL     87 
+/* OP_ADECL is a triple that establishes an lvalue for assignments.
  * ->use is a list of statements that use the variable.
  */
 
-#define OP_SDECL     85
+#define OP_SDECL     88
 /* OP_SDECL is a triple that establishes a variable of static
  * storage duration.
  * ->use is a list of statements that use the variable.
@@ -430,12 +486,12 @@ struct token {
  */
 
 
-#define OP_PHI       86
+#define OP_PHI       89
 /* OP_PHI is a triple used in SSA form code.  
  * It is used when multiple code paths merge and a variable needs
  * a single assignment from any of those code paths.
  * The operation is a cross between OP_DECL and OP_WRITE, which
- * is what OP_PHI is geneared from.
+ * is what OP_PHI is generated from.
  * 
  * RHS(x) points to the value from code path x
  * The number of RHS entries is the number of control paths into the block
@@ -487,11 +543,14 @@ struct token {
 struct op_info {
        const char *name;
        unsigned flags;
-#define PURE   1
-#define IMPURE 2
+#define PURE   1 /* Triple has no side effects */
+#define IMPURE 2 /* Triple has side effects */
 #define PURE_BITS(FLAGS) ((FLAGS) & 0x3)
-#define DEF    4
+#define DEF    4 /* Triple is a variable definition */
 #define BLOCK  8 /* Triple stores the current block */
+#define STRUCTURAL 16 /* Triple does not generate a machine instruction */
+#define BRANCH     32 /* Triple is a branch instruction */
+#define CBRANCH    64 /* Triple is a conditional branch instruction */
        unsigned char lhs, rhs, misc, targ;
 };
 
@@ -504,6 +563,8 @@ struct op_info {
        .targ = (TARG), \
         }
 static const struct op_info table_ops[] = {
+[OP_SDIVT      ] = OP( 2,  2, 0, 0, PURE | BLOCK , "sdivt"),
+[OP_UDIVT      ] = OP( 2,  2, 0, 0, PURE | BLOCK , "udivt"),
 [OP_SMUL       ] = OP( 0,  2, 0, 0, PURE | DEF | BLOCK , "smul"),
 [OP_UMUL       ] = OP( 0,  2, 0, 0, PURE | DEF | BLOCK , "umul"),
 [OP_SDIV       ] = OP( 0,  2, 0, 0, PURE | DEF | BLOCK , "sdiv"),
@@ -536,18 +597,18 @@ static const struct op_info table_ops[] = {
 [OP_LTRUE      ] = OP( 0,  1, 0, 0, PURE | DEF | BLOCK , "ltrue"),
 
 [OP_LOAD       ] = OP( 0,  1, 0, 0, IMPURE | DEF | BLOCK, "load"),
-[OP_STORE      ] = OP( 1,  1, 0, 0, IMPURE | BLOCK , "store"),
+[OP_STORE      ] = OP( 0,  2, 0, 0, IMPURE | BLOCK , "store"),
 
-[OP_NOOP       ] = OP( 0,  0, 0, 0, PURE | BLOCK, "noop"),
+[OP_NOOP       ] = OP( 0,  0, 0, 0, PURE | BLOCK | STRUCTURAL, "noop"),
 
 [OP_INTCONST   ] = OP( 0,  0, 0, 0, PURE | DEF, "intconst"),
-[OP_BLOBCONST  ] = OP( 0,  0, 0, 0, PURE, "blobconst"),
+[OP_BLOBCONST  ] = OP( 0,  0, 0, 0, PURE , "blobconst"),
 [OP_ADDRCONST  ] = OP( 0,  0, 1, 0, PURE | DEF, "addrconst"),
 
-[OP_WRITE      ] = OP( 1,  1, 0, 0, PURE | BLOCK, "write"),
+[OP_WRITE      ] = OP( 0,  2, 0, 0, PURE | BLOCK, "write"),
 [OP_READ       ] = OP( 0,  1, 0, 0, PURE | DEF | BLOCK, "read"),
 [OP_COPY       ] = OP( 0,  1, 0, 0, PURE | DEF | BLOCK, "copy"),
-[OP_PIECE      ] = OP( 0,  0, 1, 0, PURE | DEF, "piece"),
+[OP_PIECE      ] = OP( 0,  0, 1, 0, PURE | DEF | STRUCTURAL, "piece"),
 [OP_ASM        ] = OP(-1, -1, 0, 0, IMPURE, "asm"),
 [OP_DEREF      ] = OP( 0,  1, 0, 0, 0 | DEF | BLOCK, "deref"), 
 [OP_DOT        ] = OP( 0,  1, 0, 0, 0 | DEF | BLOCK, "dot"),
@@ -558,16 +619,18 @@ static const struct op_info table_ops[] = {
 [OP_COND       ] = OP( 0,  3, 0, 0, 0 | DEF | BLOCK, "cond"),
 [OP_COMMA      ] = OP( 0,  2, 0, 0, 0 | DEF | BLOCK, "comma"),
 /* Call is special most it can stand in for anything so it depends on context */
-[OP_CALL       ] = OP(-1, -1, 1, 0, 0 | BLOCK, "call"),
-/* The sizes of OP_CALL and OP_VAL_VEC depend upon context */
-[OP_VAL_VEC    ] = OP( 0, -1, 0, 0, 0 | BLOCK, "valvec"),
-
-[OP_LIST       ] = OP( 0,  1, 1, 0, 0 | DEF, "list"),
-/* The number of targets for OP_BRANCH depends on context */
-[OP_BRANCH     ] = OP( 0, -1, 0, 1, PURE | BLOCK, "branch"),
-[OP_LABEL      ] = OP( 0,  0, 0, 0, PURE | BLOCK, "label"),
-[OP_ADECL      ] = OP( 0,  0, 0, 0, PURE | BLOCK, "adecl"),
-[OP_SDECL      ] = OP( 0,  0, 1, 0, PURE | BLOCK, "sdecl"),
+[OP_FCALL       ] = OP(-1, -1, 1, 0, 0 | BLOCK, "fcall"),
+/* The sizes of OP_FCALL and OP_VAL_VEC depend upon context */
+[OP_VAL_VEC    ] = OP( 0, -1, 0, 0, 0 | BLOCK | STRUCTURAL, "valvec"),
+
+[OP_LIST       ] = OP( 0,  1, 1, 0, 0 | DEF | STRUCTURAL, "list"),
+[OP_BRANCH     ] = OP( 0,  0, 0, 1, PURE | BLOCK | BRANCH, "branch"),
+[OP_CBRANCH    ] = OP( 0,  1, 0, 1, PURE | BLOCK | BRANCH | CBRANCH, "cbranch"),
+[OP_CALL       ] = OP( 0,  0, 1, 1, PURE | BLOCK | BRANCH, "call"),
+[OP_RET        ] = OP( 0,  1, 0, 0, PURE | BLOCK | BRANCH, "ret"),
+[OP_LABEL      ] = OP( 0,  0, 0, 0, PURE | BLOCK | STRUCTURAL, "label"),
+[OP_ADECL      ] = OP( 0,  0, 0, 0, PURE | BLOCK | STRUCTURAL, "adecl"),
+[OP_SDECL      ] = OP( 0,  0, 1, 0, PURE | BLOCK | STRUCTURAL, "sdecl"),
 /* The number of RHS elements of OP_PHI depend upon context */
 [OP_PHI        ] = OP( 0, -1, 1, 0, PURE | DEF | BLOCK, "phi"),
 
@@ -583,17 +646,17 @@ static const struct op_info table_ops[] = {
 [OP_SET_ULESSEQ] = OP( 0,  1, 0, 0, PURE | DEF | BLOCK, "set_ulesseq"),
 [OP_SET_SMOREEQ] = OP( 0,  1, 0, 0, PURE | DEF | BLOCK, "set_smoreq"),
 [OP_SET_UMOREEQ] = OP( 0,  1, 0, 0, PURE | DEF | BLOCK, "set_umoreq"),
-[OP_JMP        ] = OP( 0,  0, 0, 1, PURE | BLOCK, "jmp"),
-[OP_JMP_EQ     ] = OP( 0,  1, 0, 1, PURE | BLOCK, "jmp_eq"),
-[OP_JMP_NOTEQ  ] = OP( 0,  1, 0, 1, PURE | BLOCK, "jmp_noteq"),
-[OP_JMP_SLESS  ] = OP( 0,  1, 0, 1, PURE | BLOCK, "jmp_sless"),
-[OP_JMP_ULESS  ] = OP( 0,  1, 0, 1, PURE | BLOCK, "jmp_uless"),
-[OP_JMP_SMORE  ] = OP( 0,  1, 0, 1, PURE | BLOCK, "jmp_smore"),
-[OP_JMP_UMORE  ] = OP( 0,  1, 0, 1, PURE | BLOCK, "jmp_umore"),
-[OP_JMP_SLESSEQ] = OP( 0,  1, 0, 1, PURE | BLOCK, "jmp_slesseq"),
-[OP_JMP_ULESSEQ] = OP( 0,  1, 0, 1, PURE | BLOCK, "jmp_ulesseq"),
-[OP_JMP_SMOREEQ] = OP( 0,  1, 0, 1, PURE | BLOCK, "jmp_smoreq"),
-[OP_JMP_UMOREEQ] = OP( 0,  1, 0, 1, PURE | BLOCK, "jmp_umoreq"),
+[OP_JMP        ] = OP( 0,  0, 0, 1, PURE | BLOCK | BRANCH, "jmp"),
+[OP_JMP_EQ     ] = OP( 0,  1, 0, 1, PURE | BLOCK | BRANCH | CBRANCH, "jmp_eq"),
+[OP_JMP_NOTEQ  ] = OP( 0,  1, 0, 1, PURE | BLOCK | BRANCH | CBRANCH, "jmp_noteq"),
+[OP_JMP_SLESS  ] = OP( 0,  1, 0, 1, PURE | BLOCK | BRANCH | CBRANCH, "jmp_sless"),
+[OP_JMP_ULESS  ] = OP( 0,  1, 0, 1, PURE | BLOCK | BRANCH | CBRANCH, "jmp_uless"),
+[OP_JMP_SMORE  ] = OP( 0,  1, 0, 1, PURE | BLOCK | BRANCH | CBRANCH, "jmp_smore"),
+[OP_JMP_UMORE  ] = OP( 0,  1, 0, 1, PURE | BLOCK | BRANCH | CBRANCH, "jmp_umore"),
+[OP_JMP_SLESSEQ] = OP( 0,  1, 0, 1, PURE | BLOCK | BRANCH | CBRANCH, "jmp_slesseq"),
+[OP_JMP_ULESSEQ] = OP( 0,  1, 0, 1, PURE | BLOCK | BRANCH | CBRANCH, "jmp_ulesseq"),
+[OP_JMP_SMOREEQ] = OP( 0,  1, 0, 1, PURE | BLOCK | BRANCH | CBRANCH, "jmp_smoreq"),
+[OP_JMP_UMOREEQ] = OP( 0,  1, 0, 1, PURE | BLOCK | BRANCH | CBRANCH, "jmp_umoreq"),
 
 [OP_INB        ] = OP( 0,  1, 0, 0, IMPURE | DEF | BLOCK, "__inb"),
 [OP_INW        ] = OP( 0,  1, 0, 0, IMPURE | DEF | BLOCK, "__inw"),
@@ -631,9 +694,9 @@ struct triple_set {
 };
 
 #define MAX_LHS  15
-#define MAX_RHS  15
-#define MAX_MISC 15
-#define MAX_TARG 15
+#define MAX_RHS  250
+#define MAX_MISC 3
+#define MAX_TARG 3
 
 struct occurance {
        int count;
@@ -651,19 +714,19 @@ struct triple {
        unsigned char template_id;
        unsigned short sizes;
 #define TRIPLE_LHS(SIZES)  (((SIZES) >>  0) & 0x0f)
-#define TRIPLE_RHS(SIZES)  (((SIZES) >>  4) & 0x0f)
-#define TRIPLE_MISC(SIZES) (((SIZES) >>  8) & 0x0f)
-#define TRIPLE_TARG(SIZES) (((SIZES) >> 12) & 0x0f)
+#define TRIPLE_RHS(SIZES)  (((SIZES) >>  4) & 0xff)
+#define TRIPLE_MISC(SIZES) (((SIZES) >> 12) & 0x03)
+#define TRIPLE_TARG(SIZES) (((SIZES) >> 14) & 0x03)
 #define TRIPLE_SIZE(SIZES) \
-       ((((SIZES) >> 0) & 0x0f) + \
-       (((SIZES) >>  4) & 0x0f) + \
-       (((SIZES) >>  8) & 0x0f) + \
-       (((SIZES) >> 12) & 0x0f))
+       (TRIPLE_LHS(SIZES)  + \
+        TRIPLE_RHS(SIZES)  + \
+        TRIPLE_MISC(SIZES) + \
+        TRIPLE_TARG(SIZES))
 #define TRIPLE_SIZES(LHS, RHS, MISC, TARG) \
        ((((LHS) & 0x0f) <<  0) | \
-       (((RHS) & 0x0f)  <<  4) | \
-       (((MISC) & 0x0f) <<  8) | \
-       (((TARG) & 0x0f) << 12))
+       (((RHS)  & 0xff) <<  4) | \
+       (((MISC) & 0x03) << 12) | \
+       (((TARG) & 0x03) << 14))
 #define TRIPLE_LHS_OFF(SIZES)  (0)
 #define TRIPLE_RHS_OFF(SIZES)  (TRIPLE_LHS_OFF(SIZES) + TRIPLE_LHS(SIZES))
 #define TRIPLE_MISC_OFF(SIZES) (TRIPLE_RHS_OFF(SIZES) + TRIPLE_RHS(SIZES))
@@ -676,6 +739,8 @@ struct triple {
 #define TRIPLE_FLAG_FLATTENED   (1 << 31)
 #define TRIPLE_FLAG_PRE_SPLIT   (1 << 30)
 #define TRIPLE_FLAG_POST_SPLIT  (1 << 29)
+#define TRIPLE_FLAG_VOLATILE    (1 << 28)
+#define TRIPLE_FLAG_LOCAL      (1 << 27)
        struct occurance *occurance;
        union {
                ulong_t cval;
@@ -706,8 +771,9 @@ struct block_set {
 };
 struct block {
        struct block *work_next;
-       struct block *left, *right;
        struct triple *first, *last;
+       int edge_count;
+       struct block_set *edges;
        int users;
        struct block_set *use;
        struct block_set *idominates;
@@ -741,68 +807,84 @@ struct hash_entry {
        int tok;
        struct macro *sym_define;
        struct symbol *sym_label;
-       struct symbol *sym_struct;
+       struct symbol *sym_tag;
        struct symbol *sym_ident;
 };
 
 #define HASH_TABLE_SIZE 2048
 
-struct compile_state {
+struct compiler_state {
        const char *label_prefix;
        const char *ofilename;
+       unsigned long flags;
+       unsigned long debug;
+       unsigned long max_allocation_passes;
+};
+struct arch_state {
+       unsigned long features;
+};
+struct compile_state {
+       struct compiler_state *compiler;
+       struct arch_state *arch;
        FILE *output;
-       struct triple *vars;
        struct file_state *file;
        struct occurance *last_occurance;
        const char *function;
        struct token token[4];
        struct hash_entry *hash_table[HASH_TABLE_SIZE];
+       struct hash_entry *i_switch;
+       struct hash_entry *i_case;
        struct hash_entry *i_continue;
        struct hash_entry *i_break;
+       struct hash_entry *i_default;
+       struct hash_entry *i_return;
        int scope_depth;
        int if_depth, if_value;
        int macro_line;
        struct file_state *macro_file;
+       struct triple *functions;
        struct triple *main_function;
+       struct triple *first;
+       struct triple *global_pool;
        struct block *first_block, *last_block;
        int last_vertex;
-       int cpu;
-       int debug;
-       int optimize;
 };
 
 /* visibility global/local */
 /* static/auto duration */
 /* typedef, register, inline */
 #define STOR_SHIFT         0
-#define STOR_MASK     0x000f
+#define STOR_MASK     0x001f
 /* Visibility */
 #define STOR_GLOBAL   0x0001
 /* Duration */
 #define STOR_PERM     0x0002
+/* Definition locality */
+#define STOR_NONLOCAL 0x0004  /* The definition is not in this translation unit */
 /* Storage specifiers */
 #define STOR_AUTO     0x0000
 #define STOR_STATIC   0x0002
-#define STOR_EXTERN   0x0003
-#define STOR_REGISTER 0x0004
-#define STOR_TYPEDEF  0x0008
-#define STOR_INLINE   0x000c
-
-#define QUAL_SHIFT         4
-#define QUAL_MASK     0x0070
+#define STOR_LOCAL    0x0003
+#define STOR_EXTERN   0x0007
+#define STOR_INLINE   0x0008
+#define STOR_REGISTER 0x0010
+#define STOR_TYPEDEF  0x0018
+
+#define QUAL_SHIFT         5
+#define QUAL_MASK     0x00e0
 #define QUAL_NONE     0x0000
-#define QUAL_CONST    0x0010
-#define QUAL_VOLATILE 0x0020
-#define QUAL_RESTRICT 0x0040
+#define QUAL_CONST    0x0020
+#define QUAL_VOLATILE 0x0040
+#define QUAL_RESTRICT 0x0080
 
 #define TYPE_SHIFT         8
 #define TYPE_MASK     0x1f00
-#define TYPE_INTEGER(TYPE)    (((TYPE) >= TYPE_CHAR) && ((TYPE) <= TYPE_ULLONG))
-#define TYPE_ARITHMETIC(TYPE) (((TYPE) >= TYPE_CHAR) && ((TYPE) <= TYPE_LDOUBLE))
+#define TYPE_INTEGER(TYPE)    ((((TYPE) >= TYPE_CHAR) && ((TYPE) <= TYPE_ULLONG)) || ((TYPE) == TYPE_ENUM))
+#define TYPE_ARITHMETIC(TYPE) ((((TYPE) >= TYPE_CHAR) && ((TYPE) <= TYPE_LDOUBLE)) || ((TYPE) == TYPE_ENUM))
 #define TYPE_UNSIGNED(TYPE)   ((TYPE) & 0x0100)
 #define TYPE_SIGNED(TYPE)     (!TYPE_UNSIGNED(TYPE))
-#define TYPE_MKUNSIGNED(TYPE) ((TYPE) | 0x0100)
-#define TYPE_RANK(TYPE)       ((TYPE) & ~0x0100)
+#define TYPE_MKUNSIGNED(TYPE) (((TYPE) & ~0xF000) | 0x0100)
+#define TYPE_RANK(TYPE)       ((TYPE) & ~0xF1FF)
 #define TYPE_PTR(TYPE)        (((TYPE) & TYPE_MASK) == TYPE_POINTER)
 #define TYPE_DEFAULT  0x0000
 #define TYPE_VOID     0x0100
@@ -819,8 +901,17 @@ struct compile_state {
 #define TYPE_FLOAT    0x0c00
 #define TYPE_DOUBLE   0x0d00
 #define TYPE_LDOUBLE  0x0e00 /* long double */
+
+/* Note: TYPE_ENUM is chosen very carefully so TYPE_RANK works */
+#define TYPE_ENUM     0x1600
+#define TYPE_LIST     0x1700
+/* TYPE_LIST is a basic building block when defining enumerations
+ * type->field_ident holds the name of this enumeration entry.
+ * type->right holds the entry in the list.
+ */
+
 #define TYPE_STRUCT   0x1000
-#define TYPE_ENUM     0x1100
+#define TYPE_UNION    0x1100
 #define TYPE_POINTER  0x1200 
 /* For TYPE_POINTER:
  * type->left holds the type pointed to.
@@ -840,13 +931,13 @@ struct compile_state {
  * type->left and type->right holds to types that overlap
  * each other in memory.
  */
-#define TYPE_ARRAY    0x1600
+#define TYPE_ARRAY    0x1800
 /* TYPE_ARRAY is a basic building block when definitng arrays.
  * type->left holds the type we are an array of.
  * type-> holds the number of elements.
  */
 
-#define ELEMENT_COUNT_UNSPECIFIED (~0UL)
+#define ELEMENT_COUNT_UNSPECIFIED ULONG_T_MAX
 
 struct type {
        unsigned int type;
@@ -856,17 +947,13 @@ struct type {
        struct hash_entry *type_ident;
 };
 
-#define MAX_REGISTERS      75
+#define TEMPLATE_BITS      7
+#define MAX_TEMPLATES      (1<<TEMPLATE_BITS)
 #define MAX_REG_EQUIVS     16
-#if 1
-#define REGISTER_BITS      16
-#else
-#define REGISTER_BITS      28
-#endif
+#define MAX_REGC           14
+#define MAX_REGISTERS      75
+#define REGISTER_BITS      7
 #define MAX_VIRT_REGISTERS (1<<REGISTER_BITS)
-#define TEMPLATE_BITS      6
-#define MAX_TEMPLATES      (1<<TEMPLATE_BITS)
-#define MAX_REGC           12
 #define REG_UNSET          0
 #define REG_UNNEEDED       1
 #define REG_VIRT0          (MAX_REGISTERS + 0)
@@ -875,13 +962,19 @@ struct type {
 #define REG_VIRT3          (MAX_REGISTERS + 3)
 #define REG_VIRT4          (MAX_REGISTERS + 4)
 #define REG_VIRT5          (MAX_REGISTERS + 5)
-#define REG_VIRT6          (MAX_REGISTERS + 5)
-#define REG_VIRT7          (MAX_REGISTERS + 5)
-#define REG_VIRT8          (MAX_REGISTERS + 5)
-#define REG_VIRT9          (MAX_REGISTERS + 5)
+#define REG_VIRT6          (MAX_REGISTERS + 6)
+#define REG_VIRT7          (MAX_REGISTERS + 7)
+#define REG_VIRT8          (MAX_REGISTERS + 8)
+#define REG_VIRT9          (MAX_REGISTERS + 9)
+
+#if (MAX_REGISTERS + 9) > MAX_VIRT_REGISTERS
+#error "MAX_VIRT_REGISTERS to small"
+#endif
+#if (MAX_REGC + REGISTER_BITS) > 27
+#error "Too many id bits used"
+#endif
 
 /* Provision for 8 register classes */
-#if 1
 #define REG_SHIFT  0
 #define REGC_SHIFT REGISTER_BITS
 #define REGC_MASK (((1 << MAX_REGC) - 1) << REGISTER_BITS)
@@ -892,14 +985,10 @@ struct type {
 #define SET_REGCM(ID, REGCM)   ((ID) = (((ID) & ~REGC_MASK) | (((REGCM) << REGC_SHIFT) & REGC_MASK)))
 #define SET_INFO(ID, INFO)     ((ID) = (((ID) & ~(REG_MASK | REGC_MASK)) | \
                (((INFO).reg) & REG_MASK) | ((((INFO).regcm) << REGC_SHIFT) & REGC_MASK)))
-#else
-#define REG_MASK (MAX_VIRT_REGISTERS -1)
-#define ID_REG(ID)              ((ID) & REG_MASK)
-#define SET_REG(ID, REG)        ((ID) = (((ID) & ~REG_MASK) | ((REG) & REG_MASK)))
-#endif
 
 static unsigned arch_reg_regcm(struct compile_state *state, int reg);
 static unsigned arch_regcm_normalize(struct compile_state *state, unsigned regcm);
+static unsigned arch_regcm_reg_normalize(struct compile_state *state, unsigned regcm);
 static void arch_reg_equivs(
        struct compile_state *state, unsigned *equiv, int reg);
 static int arch_select_free_register(
@@ -921,27 +1010,191 @@ static struct triple *transform_to_arch_instruction(
 
 
 
-#define DEBUG_ABORT_ON_ERROR    0x0001
-#define DEBUG_INTERMEDIATE_CODE 0x0002
-#define DEBUG_CONTROL_FLOW      0x0004
-#define DEBUG_BASIC_BLOCKS      0x0008
-#define DEBUG_FDOMINATORS       0x0010
-#define DEBUG_RDOMINATORS       0x0020
-#define DEBUG_TRIPLES           0x0040
-#define DEBUG_INTERFERENCE      0x0080
-#define DEBUG_ARCH_CODE         0x0100
-#define DEBUG_CODE_ELIMINATION  0x0200
-#define DEBUG_INSERTED_COPIES   0x0400
-
-#define GLOBAL_SCOPE_DEPTH 1
+#define DEBUG_ABORT_ON_ERROR    0x00000001
+#define DEBUG_BASIC_BLOCKS      0x00000002
+#define DEBUG_FDOMINATORS       0x00000004
+#define DEBUG_RDOMINATORS       0x00000008
+#define DEBUG_TRIPLES           0x00000010
+#define DEBUG_INTERFERENCE      0x00000020
+#define DEBUG_SCC_TRANSFORM     0x00000040
+#define DEBUG_SCC_TRANSFORM2    0x00000080
+#define DEBUG_REBUILD_SSA_FORM  0x00000100
+#define DEBUG_INLINE            0x00000200
+#define DEBUG_RANGE_CONFLICTS   0x00000400
+#define DEBUG_RANGE_CONFLICTS2  0x00000800
+#define DEBUG_COLOR_GRAPH       0x00001000
+#define DEBUG_COLOR_GRAPH2      0x00002000
+#define DEBUG_COALESCING        0x00004000
+#define DEBUG_COALESCING2       0x00008000
+
+#define DEBUG_DEFAULT ( \
+       DEBUG_ABORT_ON_ERROR | \
+       DEBUG_BASIC_BLOCKS | \
+       DEBUG_FDOMINATORS | \
+       DEBUG_RDOMINATORS | \
+       DEBUG_TRIPLES | \
+       0 )
+
+#define COMPILER_ELIMINATE_INEFECTUAL_CODE 0x00000001
+#define COMPILER_SIMPLIFY                  0x00000002
+#define COMPILER_SCC_TRANSFORM             0x00000004
+#define COMPILER_INLINE                    0x00000008
+#define COMPILER_ALWAYS_INLINE             0x00000010
+#define COMPILER_SIMPLIFY_OP               0x00000020
+#define COMPILER_SIMPLIFY_PHI              0x00000040
+#define COMPILER_SIMPLIFY_LABEL            0x00000080
+#define COMPILER_SIMPLIFY_BRANCH           0x00000100
+#define COMPILER_SIMPLIFY_COPY             0x00000200
+#define COMPILER_SIMPLIFY_ARITH            0x00000400
+#define COMPILER_SIMPLIFY_SHIFT            0x00000800
+#define COMPILER_SIMPLIFY_BITWISE          0x00001000
+#define COMPILER_SIMPLIFY_LOGICAL          0x00002000
+
+#define COMPILER_DEFAULT_FLAGS ( \
+       COMPILER_ELIMINATE_INEFECTUAL_CODE | \
+       COMPILER_INLINE | \
+       COMPILER_ALWAYS_INLINE | \
+       COMPILER_SIMPLIFY_OP | \
+       COMPILER_SIMPLIFY_PHI | \
+       COMPILER_SIMPLIFY_LABEL | \
+       COMPILER_SIMPLIFY_BRANCH | \
+       COMPILER_SIMPLIFY_COPY | \
+       COMPILER_SIMPLIFY_ARITH | \
+       COMPILER_SIMPLIFY_SHIFT | \
+       COMPILER_SIMPLIFY_BITWISE | \
+       COMPILER_SIMPLIFY_LOGICAL | \
+       0 )
+
+#define GLOBAL_SCOPE_DEPTH   1
+#define FUNCTION_SCOPE_DEPTH (GLOBAL_SCOPE_DEPTH + 1)
 
 static void compile_file(struct compile_state *old_state, const char *filename, int local);
 
+
+
+static void init_compiler_state(struct compiler_state *compiler)
+{
+       memset(compiler, 0, sizeof(*compiler));
+       compiler->label_prefix = "";
+       compiler->ofilename = "auto.inc";
+       compiler->flags = COMPILER_DEFAULT_FLAGS;
+       compiler->debug = 0;
+       compiler->max_allocation_passes = MAX_ALLOCATION_PASSES;
+
+}
+
+struct compiler_flag {
+       const char *name;
+       unsigned long flag;
+};
+static int set_flag(
+       const struct compiler_flag *ptr, unsigned long *flags,
+       int act, const char *flag)
+{
+       int result = -1;
+       for(; ptr->name; ptr++) {
+               if (strcmp(ptr->name, flag) == 0) {
+                       break;
+               }
+       }
+       if (ptr->name) {
+               result = 0;
+               *flags &= ~(ptr->flag);
+               if (act) {
+                       *flags |= ptr->flag;
+               }
+       }
+       return result;
+}
+
+static int compiler_encode_flag(
+       struct compiler_state *compiler, const char *flag)
+{
+       static const struct compiler_flag flags[] = {
+               { "eliminate-inefectual-code", COMPILER_ELIMINATE_INEFECTUAL_CODE },
+               { "simplify",                  COMPILER_SIMPLIFY },
+               { "scc-transform",             COMPILER_SCC_TRANSFORM },
+               { "inline",                    COMPILER_INLINE },
+               { "always-inline",             COMPILER_ALWAYS_INLINE },
+               { "simplify-op",               COMPILER_SIMPLIFY_OP },
+               { "simplify-phi",              COMPILER_SIMPLIFY_PHI },
+               { "simplify-label",            COMPILER_SIMPLIFY_LABEL },
+               { "simplify-branch",           COMPILER_SIMPLIFY_BRANCH },
+               { "simplify-copy",             COMPILER_SIMPLIFY_COPY },
+               { "simplify-arith",            COMPILER_SIMPLIFY_ARITH },
+               { "simplify-shift",            COMPILER_SIMPLIFY_SHIFT },
+               { "simplify-bitwise",          COMPILER_SIMPLIFY_BITWISE },
+               { "simplify-logical",          COMPILER_SIMPLIFY_LOGICAL },
+               { 0, 0 },
+       };
+       static const struct compiler_flag opt_flags[] = {
+               { "-O",  COMPILER_SIMPLIFY },
+               { "-O2", COMPILER_SIMPLIFY | COMPILER_SCC_TRANSFORM },
+               { 0, 0, },
+       };
+       static const struct compiler_flag debug_flags[] = {
+               { "abort-on-error",        DEBUG_ABORT_ON_ERROR },
+               { "basic-blocks",          DEBUG_BASIC_BLOCKS },
+               { "fdominators",           DEBUG_FDOMINATORS },
+               { "rdominators",           DEBUG_RDOMINATORS },
+               { "triples",               DEBUG_TRIPLES },
+               { "interference",          DEBUG_INTERFERENCE },
+               { "scc-transform",         DEBUG_SCC_TRANSFORM },
+               { "scc-transform2",        DEBUG_SCC_TRANSFORM2 },
+               { "rebuild-ssa-form",      DEBUG_REBUILD_SSA_FORM },
+               { "inline",                DEBUG_INLINE },
+               { "live-range-conflicts",  DEBUG_RANGE_CONFLICTS },
+               { "live-range-conflicts2", DEBUG_RANGE_CONFLICTS2 },
+               { "color-graph",           DEBUG_COLOR_GRAPH },
+               { "color-graph2",          DEBUG_COLOR_GRAPH2 },
+               { "coalescing",            DEBUG_COALESCING },
+               { "coalescing2",           DEBUG_COALESCING2 },
+               { 0, 0 },
+       };
+       int act;
+       int result;
+
+       act = 1;
+       result = -1;
+       if (strncmp(flag, "no-", 3) == 0) {
+               flag += 3;
+               act = 0;
+       }
+       if (strncmp(flag, "-O", 2) == 0) {
+               result = set_flag(opt_flags, &compiler->flags, act, flag);
+       }
+       else if (act && strncmp(flag, "label-prefix=", 13) == 0) {
+               result = 0;
+               compiler->label_prefix = flag + 13;
+       }
+       else if (act && strncmp(flag, "max-allocation-passes=", 22) == 0) {
+               unsigned long max_passes;
+               char *end;
+               max_passes = strtoul(flag + 22, &end, 10);
+               if (end[0] == '\0') {
+                       result = 0;
+                       compiler->max_allocation_passes = max_passes;
+               }
+       }
+       else if (act && strcmp(flag, "debug") == 0) {
+               result = 0;
+               compiler->debug |= DEBUG_DEFAULT;
+       }
+       else if (strncmp(flag, "debug-", 6) == 0) {
+               flag += 6;
+               result = set_flag(debug_flags, &compiler->debug, act, flag);
+       }
+       else {
+               result = set_flag(flags, &compiler->flags, act, flag);
+       }
+       return result;
+}
+
 static void do_cleanup(struct compile_state *state)
 {
        if (state->output) {
                fclose(state->output);
-               unlink(state->ofilename);
+               unlink(state->compiler->ofilename);
        }
 }
 
@@ -965,11 +1218,12 @@ static int get_col(struct file_state *file)
 static void loc(FILE *fp, struct compile_state *state, struct triple *triple)
 {
        int col;
-       if (triple) {
-               fprintf(fp, "%s:%d.%d: ", 
-                       triple->occurance->filename, 
-                       triple->occurance->line, 
-                       triple->occurance->col);
+       if (triple && triple->occurance) {
+               struct occurance *spot;
+               for(spot = triple->occurance; spot; spot = spot->parent) {
+                       fprintf(fp, "%s:%d.%d: ", 
+                               spot->filename, spot->line, spot->col);
+               }
                return;
        }
        if (!state->file) {
@@ -980,12 +1234,13 @@ static void loc(FILE *fp, struct compile_state *state, struct triple *triple)
                state->file->report_name, state->file->report_line, col);
 }
 
-static void __internal_error(struct compile_state *state, struct triple *ptr, 
+static void internal_error(struct compile_state *state, struct triple *ptr, 
        char *fmt, ...)
 {
        va_list args;
        va_start(args, fmt);
        loc(stderr, state, ptr);
+       fputc('\n', stderr);
        if (ptr) {
                fprintf(stderr, "%p %s ", ptr, tops(ptr->op));
        }
@@ -998,12 +1253,15 @@ static void __internal_error(struct compile_state *state, struct triple *ptr,
 }
 
 
-static void __internal_warning(struct compile_state *state, struct triple *ptr, 
+static void internal_warning(struct compile_state *state, struct triple *ptr, 
        char *fmt, ...)
 {
        va_list args;
        va_start(args, fmt);
        loc(stderr, state, ptr);
+       if (ptr) {
+               fprintf(stderr, "%p %s ", ptr, tops(ptr->op));
+       }
        fprintf(stderr, "Internal compiler warning: ");
        vfprintf(stderr, fmt, args);
        fprintf(stderr, "\n");
@@ -1012,23 +1270,27 @@ static void __internal_warning(struct compile_state *state, struct triple *ptr,
 
 
 
-static void __error(struct compile_state *state, struct triple *ptr, 
+static void error(struct compile_state *state, struct triple *ptr, 
        char *fmt, ...)
 {
        va_list args;
        va_start(args, fmt);
        loc(stderr, state, ptr);
+       fputc('\n', stderr);
+       if (ptr && (state->compiler->debug & DEBUG_ABORT_ON_ERROR)) {
+               fprintf(stderr, "%p %s ", ptr, tops(ptr->op));
+       }
        vfprintf(stderr, fmt, args);
        va_end(args);
        fprintf(stderr, "\n");
        do_cleanup(state);
-       if (state->debug & DEBUG_ABORT_ON_ERROR) {
+       if (state->compiler->debug & DEBUG_ABORT_ON_ERROR) {
                abort();
        }
        exit(1);
 }
 
-static void __warning(struct compile_state *state, struct triple *ptr, 
+static void warning(struct compile_state *state, struct triple *ptr, 
        char *fmt, ...)
 {
        va_list args;
@@ -1040,17 +1302,6 @@ static void __warning(struct compile_state *state, struct triple *ptr,
        va_end(args);
 }
 
-#if DEBUG_ERROR_MESSAGES 
-#  define internal_error fprintf(stderr,  "@ %s.%s:%d \t", __FILE__, __func__, __LINE__),__internal_error
-#  define internal_warning fprintf(stderr,  "@ %s.%s:%d \t", __FILE__, __func__, __LINE__),__internal_warning
-#  define error fprintf(stderr, "@ %s.%s:%d \t", __FILE__, __func__, __LINE__),__error
-#  define warning fprintf(stderr, "@ %s.%s:%d \t", __FILE__, __func__, __LINE__),__warning
-#else
-#  define internal_error __internal_error
-#  define internal_warning __internal_warning
-#  define error __error
-#  define warning __warning
-#endif
 #define FINISHME() warning(state, 0, "FINISHME @ %s.%s:%d", __FILE__, __func__, __LINE__)
 
 static void valid_op(struct compile_state *state, int op)
@@ -1173,54 +1424,24 @@ static void unuse_triple(struct triple *used, struct triple *unuser)
        }
 }
 
-static void push_triple(struct triple *used, struct triple *user)
-{
-       struct triple_set *new;
-       if (!used)
-               return;
-       if (!user)
-               return;
-       /* Append new to the head of the list,
-        * it's the only sensible behavoir for a stack.
-        */
-       new = xcmalloc(sizeof(*new), "triple_set");
-       new->member = user;
-       new->next   = used->use;
-       used->use   = new;
-}
-
-static void pop_triple(struct triple *used, struct triple *unuser)
-{
-       struct triple_set *use, **ptr;
-       ptr = &used->use;
-       while(*ptr) {
-               use = *ptr;
-               if (use->member == unuser) {
-                       *ptr = use->next;
-                       xfree(use);
-                       /* Only free one occurance from the stack */
-                       return;
-               }
-               else {
-                       ptr = &use->next;
-               }
-       }
-}
-
 static void put_occurance(struct occurance *occurance)
 {
-       occurance->count -= 1;
-       if (occurance->count <= 0) {
-               if (occurance->parent) {
-                       put_occurance(occurance->parent);
+       if (occurance) {
+               occurance->count -= 1;
+               if (occurance->count <= 0) {
+                       if (occurance->parent) {
+                               put_occurance(occurance->parent);
+                       }
+                       xfree(occurance);
                }
-               xfree(occurance);
        }
 }
 
 static void get_occurance(struct occurance *occurance)
 {
-       occurance->count += 1;
+       if (occurance) {
+               occurance->count += 1;
+       }
 }
 
 
@@ -1248,7 +1469,9 @@ static struct occurance *new_occurance(struct compile_state *state)
                (last->col == col) &&
                (last->line == line) &&
                (last->function == function) &&
-               (strcmp(last->filename, filename) == 0)) {
+               ((last->filename == filename) ||
+                       (strcmp(last->filename, filename) == 0))) 
+       {
                get_occurance(last);
                return last;
        }
@@ -1268,35 +1491,48 @@ static struct occurance *new_occurance(struct compile_state *state)
 }
 
 static struct occurance *inline_occurance(struct compile_state *state,
-       struct occurance *new, struct occurance *orig)
+       struct occurance *base, struct occurance *top)
 {
        struct occurance *result, *last;
+       if (top->parent) {
+               internal_error(state, 0, "inlining an already inlined function?");
+       }
+       /* If I have a null base treat it that way */
+       if ((base->parent == 0) &&
+               (base->col == 0) &&
+               (base->line == 0) &&
+               (base->function[0] == '\0') &&
+               (base->filename[0] == '\0')) {
+               base = 0;
+       }
+       /* See if I can reuse the last occurance I had */
        last = state->last_occurance;
        if (last &&
-               (last->parent   == orig) &&
-               (last->col      == new->col) &&
-               (last->line     == new->line) &&
-               (last->function == new->function) &&
-               (last->filename == new->filename)) {
+               (last->parent   == base) &&
+               (last->col      == top->col) &&
+               (last->line     == top->line) &&
+               (last->function == top->function) &&
+               (last->filename == top->filename)) {
                get_occurance(last);
                return last;
        }
+       /* I can't reuse the last occurance so free it */
        if (last) {
                state->last_occurance = 0;
                put_occurance(last);
        }
-       get_occurance(orig);
+       /* Generate a new occurance structure */
+       get_occurance(base);
        result = xmalloc(sizeof(*result), "occurance");
        result->count    = 2;
-       result->filename = new->filename;
-       result->function = new->function;
-       result->line     = new->line;
-       result->col      = new->col;
-       result->parent   = orig;
+       result->filename = top->filename;
+       result->function = top->function;
+       result->line     = top->line;
+       result->col      = top->col;
+       result->parent   = base;
        state->last_occurance = result;
        return result;
 }
-       
 
 static struct occurance dummy_occurance = {
        .count    = 2,
@@ -1318,16 +1554,20 @@ static struct triple zero_triple = {
        .op        = OP_INTCONST,
        .sizes     = TRIPLE_SIZES(0, 0, 0, 0),
        .id        = -1, /* An invalid id */
-       .u = { .cval   = 0, },
+       .u = { .cval = 0, },
        .occurance = &dummy_occurance,
-       .param { [0] = 0, [1] = 0, },
+       .param { [0] = 0, [1] = 0, },
 };
 
 
 static unsigned short triple_sizes(struct compile_state *state,
-       int op, struct type *type, int lhs_wanted, int rhs_wanted)
+       int op, struct type *type, int lhs_wanted, int rhs_wanted,
+       struct occurance *occurance)
 {
        int lhs, rhs, misc, targ;
+       struct triple dummy;
+       dummy.op = op;
+       dummy.occurance = occurance;
        valid_op(state, op);
        lhs = table_ops[op].lhs;
        rhs = table_ops[op].rhs;
@@ -1335,26 +1575,17 @@ static unsigned short triple_sizes(struct compile_state *state,
        targ = table_ops[op].targ;
        
        
-       if (op == OP_CALL) {
-               struct type *param;
-               rhs = 0;
-               param = type->right;
-               while((param->type & TYPE_MASK) == TYPE_PRODUCT) {
-                       rhs++;
-                       param = param->right;
-               }
-               if ((param->type & TYPE_MASK) != TYPE_VOID) {
-                       rhs++;
-               }
+       if (op == OP_FCALL) {
+               rhs = rhs_wanted;
                lhs = 0;
-               if ((type->left->type & TYPE_MASK) == TYPE_STRUCT) {
+               if ((type->type & TYPE_MASK) == TYPE_STRUCT) {
                        lhs = type->left->elements;
                }
        }
        else if (op == OP_VAL_VEC) {
                rhs = type->elements;
        }
-       else if ((op == OP_BRANCH) || (op == OP_PHI)) {
+       else if (op == OP_PHI) {
                rhs = rhs_wanted;
        }
        else if (op == OP_ASM) {
@@ -1362,16 +1593,16 @@ static unsigned short triple_sizes(struct compile_state *state,
                lhs = lhs_wanted;
        }
        if ((rhs < 0) || (rhs > MAX_RHS)) {
-               internal_error(state, 0, "bad rhs");
+               internal_error(state, &dummy, "bad rhs %d", rhs);
        }
        if ((lhs < 0) || (lhs > MAX_LHS)) {
-               internal_error(state, 0, "bad lhs");
+               internal_error(state, &dummy, "bad lhs");
        }
        if ((misc < 0) || (misc > MAX_MISC)) {
-               internal_error(state, 0, "bad misc");
+               internal_error(state, &dummy, "bad misc");
        }
        if ((targ < 0) || (targ > MAX_TARG)) {
-               internal_error(state, 0, "bad targs");
+               internal_error(state, &dummy, "bad targs");
        }
        return TRIPLE_SIZES(lhs, rhs, misc, targ);
 }
@@ -1382,7 +1613,7 @@ static struct triple *alloc_triple(struct compile_state *state,
 {
        size_t size, sizes, extra_count, min_count;
        struct triple *ret;
-       sizes = triple_sizes(state, op, type, lhs, rhs);
+       sizes = triple_sizes(state, op, type, lhs, rhs, occurance);
 
        min_count = sizeof(ret->param)/sizeof(ret->param[0]);
        extra_count = TRIPLE_SIZE(sizes);
@@ -1461,20 +1692,20 @@ static struct triple *branch(struct compile_state *state,
        struct triple *targ, struct triple *test)
 {
        struct triple *ret;
-       ret = new_triple(state, OP_BRANCH, &void_type, -1, test?1:0);
        if (test) {
+               ret = new_triple(state, OP_CBRANCH, &void_type, -1, 1);
                RHS(ret, 0) = test;
+       } else {
+               ret = new_triple(state, OP_BRANCH, &void_type, -1, 0);
        }
        TARG(ret, 0) = targ;
        /* record the branch target was used */
        if (!targ || (targ->op != OP_LABEL)) {
                internal_error(state, 0, "branch not to label");
-               use_triple(targ, ret);
        }
        return ret;
 }
 
-
 static void insert_triple(struct compile_state *state,
        struct triple *first, struct triple *ptr)
 {
@@ -1486,8 +1717,8 @@ static void insert_triple(struct compile_state *state,
                ptr->prev       = first->prev;
                ptr->prev->next = ptr;
                ptr->next->prev = ptr;
-               if ((ptr->prev->op == OP_BRANCH) && 
-                       TRIPLE_RHS(ptr->prev->sizes)) {
+               
+               if ((ptr->prev->op == OP_CBRANCH) || (ptr->prev->op == OP_CALL)) {
                        unuse_triple(first, ptr->prev);
                        use_triple(ptr, ptr->prev);
                }
@@ -1509,10 +1740,13 @@ static struct block *block_of_triple(struct compile_state *state,
        struct triple *ins)
 {
        struct triple *first;
-       first = RHS(state->main_function, 0);
+       if (!ins || ins == &zero_triple) {
+               return 0;
+       }
+       first = state->first;
        while(ins != first && !triple_stores_block(state, ins)) {
                if (ins == ins->prev) {
-                       internal_error(state, 0, "ins == ins->prev?");
+                       internal_error(state, ins, "ins == ins->prev?");
                }
                ins = ins->prev;
        }
@@ -1558,7 +1792,7 @@ static struct triple *post_triple(struct compile_state *state,
        }
        /* If I have a left hand side skip over it */
        zlhs = TRIPLE_LHS(base->sizes);
-       if (zlhs && (base->op != OP_WRITE) && (base->op != OP_STORE)) {
+       if (zlhs) {
                base = LHS(base, zlhs - 1);
        }
 
@@ -1599,12 +1833,12 @@ static void display_triple(FILE *fp, struct triple *ins)
        if (ins->op == OP_INTCONST) {
                fprintf(fp, "(%p) %c%c %-7s %-2d %-10s <0x%08lx>         ",
                        ins, pre, post, reg, ins->template_id, tops(ins->op), 
-                       ins->u.cval);
+                       (unsigned long)(ins->u.cval));
        }
        else if (ins->op == OP_ADDRCONST) {
                fprintf(fp, "(%p) %c%c %-7s %-2d %-10s %-10p <0x%08lx>",
                        ins, pre, post, reg, ins->template_id, tops(ins->op), 
-                       MISC(ins, 0), ins->u.cval);
+                       MISC(ins, 0), (unsigned long)(ins->u.cval));
        }
        else {
                int i, count;
@@ -1627,10 +1861,98 @@ static void display_triple(FILE *fp, struct triple *ins)
                        ptr->col);
        }
        fprintf(fp, "\n");
+#if 0
+       {
+               struct triple_set *user;
+               for(user = ptr->use; user; user = user->next) {
+                       fprintf(fp, "use: %p\n", user->member);
+               }
+       }
+#endif
        fflush(fp);
 }
 
-static int triple_is_pure(struct compile_state *state, struct triple *ins)
+static void display_triple_changes(
+       FILE *fp, const struct triple *new, const struct triple *orig)
+{
+
+       int new_count, orig_count;
+       new_count = TRIPLE_SIZE(new->sizes);
+       orig_count = TRIPLE_SIZE(orig->sizes);
+       if ((new->op != orig->op) ||
+               (new_count != orig_count) ||
+               (memcmp(orig->param, new->param,        
+                       orig_count * sizeof(orig->param[0])) != 0) ||
+               (memcmp(&orig->u, &new->u, sizeof(orig->u)) != 0)) 
+       {
+               struct occurance *ptr;
+               int i, min_count, indent;
+               fprintf(fp, "(%p)", orig);
+               if (orig->op == new->op) {
+                       fprintf(fp, " %-11s", tops(orig->op));
+               } else {
+                       fprintf(fp, " [%-10s %-10s]", 
+                               tops(new->op), tops(orig->op));
+               }
+               min_count = new_count;
+               if (min_count > orig_count) {
+                       min_count = orig_count;
+               }
+               for(indent = i = 0; i < min_count; i++) {
+                       if (orig->param[i] == new->param[i]) {
+                               fprintf(fp, " %-11p", 
+                                       orig->param[i]);
+                               indent += 12;
+                       } else {
+                               fprintf(fp, " [%-10p %-10p]",
+                                       new->param[i], 
+                                       orig->param[i]);
+                               indent += 24;
+                       }
+               }
+               for(; i < orig_count; i++) {
+                       fprintf(fp, " [%-9p]", orig->param[i]);
+                       indent += 12;
+               }
+               for(; i < new_count; i++) {
+                       fprintf(fp, " [%-9p]", new->param[i]);
+                       indent += 12;
+               }
+               if ((new->op == OP_INTCONST)||
+                       (new->op == OP_ADDRCONST)) {
+                       fprintf(fp, " <0x%08lx>", 
+                               (unsigned long)(new->u.cval));
+                       indent += 13;
+               }
+               for(;indent < 36; indent++) {
+                       putc(' ', fp);
+               }
+               fprintf(fp, " @");
+               for(ptr = orig->occurance; ptr; ptr = ptr->parent) {
+                       fprintf(fp, " %s,%s:%d.%d",
+                               ptr->function, 
+                               ptr->filename,
+                               ptr->line, 
+                               ptr->col);
+                       
+               }
+               fprintf(fp, "\n");
+               fflush(fp);
+       }
+}
+
+static void display_func(FILE *fp, struct triple *func)
+{
+       struct triple *first, *ins;
+       fprintf(fp, "display_func %s\n", func->type->type_ident->name);
+       first = ins = RHS(func, 0);
+       do {
+               display_triple(fp, ins);
+               ins = ins->next;
+       } while(ins != first);
+}
+
+static int triple_is_pure(struct compile_state *state, struct triple *ins, unsigned id)
 {
        /* Does the triple have no side effects.
         * I.e. Rexecuting the triple with the same arguments 
@@ -1643,18 +1965,28 @@ static int triple_is_pure(struct compile_state *state, struct triple *ins)
                internal_error(state, 0, "Purity of %s not known\n",
                        tops(ins->op));
        }
-       return pure == PURE;
+       return (pure == PURE) && !(id & TRIPLE_FLAG_VOLATILE);
 }
 
 static int triple_is_branch(struct compile_state *state, struct triple *ins)
 {
-       /* This function is used to determine which triples need
-        * a register.
-        */
-       int is_branch;
+       /* Is this triple a branch instruction? */
+       valid_ins(state, ins);
+       return (table_ops[ins->op].flags & BRANCH) != 0;
+}
+
+static int triple_is_cond_branch(struct compile_state *state, struct triple *ins)
+{
+       /* Is this triple a conditional branch instruction? */
+       valid_ins(state, ins);
+       return (table_ops[ins->op].flags & CBRANCH) != 0;
+}
+
+static int triple_is_uncond_branch(struct compile_state *state, struct triple *ins)
+{
+       /* Is this triple a unconditional branch instruction? */
        valid_ins(state, ins);
-       is_branch = (table_ops[ins->op].targ != 0);
-       return is_branch;
+       return (table_ops[ins->op].flags & CBRANCH) == 0;
 }
 
 static int triple_is_def(struct compile_state *state, struct triple *ins)
@@ -1668,6 +2000,14 @@ static int triple_is_def(struct compile_state *state, struct triple *ins)
        return is_def;
 }
 
+static int triple_is_structural(struct compile_state *state, struct triple *ins)
+{
+       int is_structural;
+       valid_ins(state, ins);
+       is_structural = (table_ops[ins->op].flags & STRUCTURAL) == STRUCTURAL;
+       return is_structural;
+}
+
 static struct triple **triple_iter(struct compile_state *state,
        size_t count, struct triple **vector,
        struct triple *ins, struct triple **last)
@@ -1715,22 +2055,43 @@ static struct triple **triple_targ(struct compile_state *state,
        ret = 0;
        count = TRIPLE_TARG(ins->sizes);
        vector = &TARG(ins, 0);
-       if (count) {
+       if (!ret && 
+               ((ins->op == OP_CALL) || (table_ops[ins->op].flags & CBRANCH))) {
+               if (!last) {
+                       ret = &ins->next;
+               } else if (last == &ins->next) {
+                       last = 0;
+               }
+       }
+       if (!ret && count) {
                if (!last) {
                        ret = vector;
                }
                else if ((last >= vector) && (last < (vector + count - 1))) {
                        ret = last + 1;
                }
-               else if ((last == (vector + count - 1)) && 
-                       TRIPLE_RHS(ins->sizes)) {
-                       ret = &ins->next;
+               else if (last == vector + count - 1) {
+                       last = 0;
+               }
+       }
+       if (!ret && (ins->op == OP_RET)) {
+               struct triple_set *use;
+               for(use = ins->use; use; use = use->next) {
+                       if (use->member->op != OP_CALL) {
+                               continue;
+                       }
+                       if (!last) {
+                               ret = &use->member->next;
+                               break;
+                       }
+                       else if (last == &use->member->next) {
+                               last = 0;
+                       }
                }
        }
        return ret;
 }
 
-
 static void verify_use(struct compile_state *state,
        struct triple *user, struct triple *used)
 {
@@ -1787,6 +2148,21 @@ static void release_triple(struct compile_state *state, struct triple *ptr)
 {
        struct triple_set *set, *next;
        struct triple **expr;
+       struct block *block;
+       valid_ins(state, ptr);
+       /* Make certain the we are not the first or last element of a block */
+       block = block_of_triple(state, ptr);
+       if (block) {
+               if ((block->last == ptr) && (block->first == ptr)) {
+                       block->last = block->first = 0;
+               }
+               else if (block->last == ptr) {
+                       block->last = ptr->prev;
+               }
+               else if (block->first == ptr) {
+                       block->first = ptr->next;
+               }
+       }
        /* Remove ptr from use chains where it is the user */
        expr = triple_rhs(state, ptr, 0);
        for(; expr; expr = triple_rhs(state, ptr, expr)) {
@@ -1808,13 +2184,14 @@ static void release_triple(struct compile_state *state, struct triple *ptr)
        }
        expr = triple_targ(state, ptr, 0);
        for(; expr; expr = triple_targ(state, ptr, expr)) {
-               if (*expr) {
+               if (*expr){
                        unuse_triple(*expr, ptr);
                }
        }
        /* Reomve ptr from use chains where it is used */
        for(set = ptr->use; set; set = next) {
                next = set->next;
+               valid_ins(state, set->member);
                expr = triple_rhs(state, set->member, 0);
                for(; expr; expr = triple_rhs(state, set->member, expr)) {
                        if (*expr == ptr) {
@@ -1844,7 +2221,8 @@ static void release_triple(struct compile_state *state, struct triple *ptr)
        free_triple(state, ptr);
 }
 
-static void print_triple(struct compile_state *state, struct triple *ptr);
+static void print_triples(struct compile_state *state);
+static void print_blocks(struct compile_state *state, const char *func, FILE *fp);
 
 #define TOK_UNKNOWN     0
 #define TOK_SPACE       1
@@ -2169,6 +2547,22 @@ static void symbol(
        *chain    = sym;
 }
 
+static void label_symbol(struct compile_state *state, 
+       struct hash_entry *ident, struct triple *label)
+{
+       struct symbol *sym;
+       if (ident->sym_label) {
+               error(state, 0, "label %s already defined", ident->name);
+       }
+       sym = xcmalloc(sizeof(*sym), "label");
+       sym->ident = ident;
+       sym->def   = label;
+       sym->type  = &void_type;
+       sym->scope_depth = FUNCTION_SCOPE_DEPTH;
+       sym->next  = 0;
+       ident->sym_label = sym;
+}
+
 static void start_scope(struct compile_state *state)
 {
        state->scope_depth++;
@@ -2198,9 +2592,9 @@ static void end_scope(struct compile_state *state)
                struct hash_entry *entry;
                entry = state->hash_table[i];
                while(entry) {
-                       end_scope_syms(&entry->sym_label,  depth);
-                       end_scope_syms(&entry->sym_struct, depth);
-                       end_scope_syms(&entry->sym_ident,  depth);
+                       end_scope_syms(&entry->sym_label, depth);
+                       end_scope_syms(&entry->sym_tag,   depth);
+                       end_scope_syms(&entry->sym_ident, depth);
                        entry = entry->next;
                }
        }
@@ -2915,14 +3309,17 @@ static long_t mprimary_expr(struct compile_state *state, int index)
                break;
        case TOK_LIT_INT:
        {
+               long lval;
                char *end;
                meat(state, index, TOK_LIT_INT);
                errno = 0;
-               val = strtol(state->token[index].val.str, &end, 0);
-               if (((val == LONG_MIN) || (val == LONG_MAX)) &&
-                       (errno == ERANGE)) {
+               lval = strtol(state->token[index].val.str, &end, 0);
+               if ((lval > LONG_T_MAX) || (lval < LONG_T_MIN) ||
+                       (((lval == LONG_MIN) || (lval == LONG_MAX)) &&
+                               (errno == ERANGE))) {
                        error(state, 0, "Integer constant to large");
                }
+               val = lval;
                break;
        }
        default:
@@ -3250,6 +3647,7 @@ static void preprocess(struct compile_state *state, int index)
                        meat(state, index, TOK_LIT_STRING);
                        name = xmalloc(tk->str_len, "report_name");
                        token = tk->val.str + 1;
+                       base = strrchr(token, '/');
                        name_len = tk->str_len - 2;
                        if (base != 0) {
                                dir_len = base - token;
@@ -3732,6 +4130,17 @@ static struct type uint_type   = { .type  = TYPE_UINT };
 static struct type long_type   = { .type  = TYPE_LONG };
 static struct type ulong_type  = { .type  = TYPE_ULONG };
 
+static struct type void_ptr_type  = {
+       .type = TYPE_POINTER,
+       .left = &void_type,
+};
+
+static struct type void_func_type = { 
+       .type  = TYPE_FUNCTION,
+       .left  = &void_type,
+       .right = &void_type,
+};
+
 static struct triple *variable(struct compile_state *state, struct type *type)
 {
        struct triple *result;
@@ -3770,6 +4179,9 @@ static void stor_of(FILE *fp, struct type *type)
        case STOR_STATIC:
                fprintf(fp, "static ");
                break;
+       case STOR_LOCAL:
+               fprintf(fp, "local ");
+               break;
        case STOR_EXTERN:
                fprintf(fp, "extern ");
                break;
@@ -3779,9 +4191,18 @@ static void stor_of(FILE *fp, struct type *type)
        case STOR_TYPEDEF:
                fprintf(fp, "typedef ");
                break;
-       case STOR_INLINE:
+       case STOR_INLINE | STOR_LOCAL:
                fprintf(fp, "inline ");
                break;
+       case STOR_INLINE | STOR_STATIC:
+               fprintf(fp, "static inline");
+               break;
+       case STOR_INLINE | STOR_EXTERN:
+               fprintf(fp, "extern inline");
+               break;
+       default:
+               fprintf(fp, "stor:%x", type->type & STOR_MASK);
+               break;
        }
 }
 static void qual_of(FILE *fp, struct type *type)
@@ -3866,7 +4287,7 @@ static void name_of(FILE *fp, struct type *type)
        }
        case TYPE_ARRAY:
                name_of(fp, type->left);
-               fprintf(fp, " [%ld]", type->elements);
+               fprintf(fp, " [%ld]", (long)(type->elements));
                break;
        default:
                fprintf(fp, "????: %x", type->type & TYPE_MASK);
@@ -3922,6 +4343,15 @@ static size_t align_of(struct compile_state *state, struct type *type)
        return align;
 }
 
+static size_t needed_padding(size_t offset, size_t align)
+{
+        size_t padding;
+       padding = 0;
+       if (offset % align) {
+               padding = align - (offset % align);
+       }
+       return padding;
+}
 static size_t size_of(struct compile_state *state, struct type *type)
 {
        size_t size;
@@ -3951,16 +4381,16 @@ static size_t size_of(struct compile_state *state, struct type *type)
        case TYPE_PRODUCT:
        {
                size_t align, pad;
-               size = size_of(state, type->left);
-               while((type->right->type & TYPE_MASK) == TYPE_PRODUCT) {
-                       type = type->right;
+               size = 0;
+               while((type->type & TYPE_MASK) == TYPE_PRODUCT) {
                        align = align_of(state, type->left);
-                       pad = align - (size % align);
+                       pad = needed_padding(size, align);
                        size = size + pad + size_of(state, type->left);
+                       type = type->right;
                }
-               align = align_of(state, type->right);
-               pad = align - (size % align);
-               size = size + pad + sizeof(type->right);
+               align = align_of(state, type);
+               pad = needed_padding(size, align);
+               size = size + pad + size_of(state, type);
                break;
        }
        case TYPE_OVERLAP:
@@ -3979,10 +4409,17 @@ static size_t size_of(struct compile_state *state, struct type *type)
                }
                break;
        case TYPE_STRUCT:
+       {
+               size_t align, pad;
                size = size_of(state, type->left);
+               /* Pad structures so their size is a multiples of their alignment */
+               align = align_of(state, type);
+               pad = needed_padding(size, align);
+               size = size + pad;
                break;
+       }
        default:
-               error(state, 0, "sizeof not yet defined for type\n");
+               internal_error(state, 0, "sizeof not yet defined for type\n");
                break;
        }
        return size;
@@ -3991,25 +4428,27 @@ static size_t size_of(struct compile_state *state, struct type *type)
 static size_t field_offset(struct compile_state *state, 
        struct type *type, struct hash_entry *field)
 {
-       size_t size, align, pad;
+       struct type *member;
+       size_t size, align;
        if ((type->type & TYPE_MASK) != TYPE_STRUCT) {
                internal_error(state, 0, "field_offset only works on structures");
        }
        size = 0;
-       type = type->left;
-       while((type->type & TYPE_MASK) == TYPE_PRODUCT) {
-               if (type->left->field_ident == field) {
-                       type = type->left;
+       member = type->left;
+       while((member->type & TYPE_MASK) == TYPE_PRODUCT) {
+               align = align_of(state, member->left);
+               size += needed_padding(size, align);
+               if (member->left->field_ident == field) {
+                       member = member->left;
+                       break;
                }
-               size += size_of(state, type->left);
-               type = type->right;
-               align = align_of(state, type->left);
-               pad = align - (size % align);
-               size += pad;
+               size += size_of(state, member->left);
+               member = member->right;
        }
-       if (type->field_ident != field) {
-               internal_error(state, 0, "field_offset: member %s not present",
-                       field->name);
+       align = align_of(state, member);
+       size += needed_padding(size, align);
+       if (member->field_ident != field) {
+               error(state, 0, "member %s not present", field->name);
        }
        return size;
 }
@@ -4017,39 +4456,67 @@ static size_t field_offset(struct compile_state *state,
 static struct type *field_type(struct compile_state *state, 
        struct type *type, struct hash_entry *field)
 {
+       struct type *member;
        if ((type->type & TYPE_MASK) != TYPE_STRUCT) {
                internal_error(state, 0, "field_type only works on structures");
        }
-       type = type->left;
-       while((type->type & TYPE_MASK) == TYPE_PRODUCT) {
-               if (type->left->field_ident == field) {
-                       type = type->left;
+       member = type->left;
+       while((member->type & TYPE_MASK) == TYPE_PRODUCT) {
+               if (member->left->field_ident == field) {
+                       member = member->left;
                        break;
                }
-               type = type->right;
+               member = member->right;
        }
-       if (type->field_ident != field) {
-               internal_error(state, 0, "field_type: member %s not present", 
-                       field->name);
+       if (member->field_ident != field) {
+               error(state, 0, "member %s not present", field->name);
        }
-       return type;
+       return member;
 }
 
-static struct triple *struct_field(struct compile_state *state,
-       struct triple *decl, struct hash_entry *field)
+static struct type *next_field(struct compile_state *state,
+       struct type *type, struct type *prev_member) 
 {
-       struct triple **vector;
-       struct type *type;
-       ulong_t index;
-       type = decl->type;
+       struct type *member;
        if ((type->type & TYPE_MASK) != TYPE_STRUCT) {
-               return decl;
-       }
-       if (decl->op != OP_VAL_VEC) {
-               internal_error(state, 0, "Invalid struct variable");
+               internal_error(state, 0, "next_field only works on structures");
        }
-       if (!field) {
-               internal_error(state, 0, "Missing structure field");
+       member = type->left;
+       while((member->type & TYPE_MASK) == TYPE_PRODUCT) {
+               if (!prev_member) {
+                       member = member->left;
+                       break;
+               }
+               if (member->left == prev_member) {
+                       prev_member = 0;
+               }
+               member = member->right;
+       }
+       if (member == prev_member) {
+               prev_member = 0;
+       }
+       if (prev_member) {
+               internal_error(state, 0, "prev_member %s not present", 
+                       prev_member->field_ident->name);
+       }
+       return member;
+}
+
+static struct triple *struct_field(struct compile_state *state,
+       struct triple *decl, struct hash_entry *field)
+{
+       struct triple **vector;
+       struct type *type;
+       ulong_t index;
+       type = decl->type;
+       if ((type->type & TYPE_MASK) != TYPE_STRUCT) {
+               return decl;
+       }
+       if (decl->op != OP_VAL_VEC) {
+               internal_error(state, 0, "Invalid struct variable");
+       }
+       if (!field) {
+               internal_error(state, 0, "Missing structure field");
        }
        type = type->left;
        vector = &RHS(decl, 0);
@@ -4081,8 +4548,8 @@ static void arrays_complete(struct compile_state *state, struct type *type)
 static unsigned int do_integral_promotion(unsigned int type)
 {
        type &= TYPE_MASK;
-       if (TYPE_INTEGER(type) && 
-               TYPE_RANK(type) < TYPE_RANK(TYPE_INT)) {
+       if (type == TYPE_ENUM) type = TYPE_INT;
+       if (TYPE_INTEGER(type) && (TYPE_RANK(type) < TYPE_RANK(TYPE_INT))) {
                type = TYPE_INT;
        }
        return type;
@@ -4093,6 +4560,9 @@ static unsigned int do_arithmetic_conversion(
 {
        left &= TYPE_MASK;
        right &= TYPE_MASK;
+       /* Convert enums to ints */
+       if (left == TYPE_ENUM) left = TYPE_INT;
+       if (right == TYPE_ENUM) right = TYPE_INT;
        if ((left == TYPE_LDOUBLE) || (right == TYPE_LDOUBLE)) {
                return TYPE_LDOUBLE;
        }
@@ -4137,6 +4607,10 @@ static int equiv_types(struct type *left, struct type *right)
                return 0;
        }
        type = left->type & TYPE_MASK;
+       /* If the basic types match and it is a void type we are done */
+       if (type == TYPE_VOID) {
+               return 1;
+       }
        /* if the basic types match and it is an arithmetic type we are done */
        if (TYPE_ARITHMETIC(type)) {
                return 1;
@@ -4260,7 +4734,14 @@ static struct triple *integral_promotion(
                int_type = type->type & ~TYPE_MASK;
                int_type |= do_integral_promotion(type->type);
                if (int_type != type->type) {
-                       def->type = new_type(int_type, 0, 0);
+                       if (def->op != OP_LOAD) {
+                               def->type = new_type(int_type, 0, 0);
+                       }
+                       else {
+#warning "FIXME can I just cast all operands like this?"
+                               def = triple(state, OP_COPY, 
+                                       new_type(int_type, 0, 0), def, 0);
+                       }
                }
        }
        return def;
@@ -4372,16 +4853,13 @@ static int is_lvalue(struct compile_state *state, struct triple *def)
        if (!is_stable(state, def)) {
                return 0;
        }
-       if (def->type->type & QUAL_CONST) {
-               ret = 0;
-       }
-       else if (def->op == OP_DOT) {
+       if (def->op == OP_DOT) {
                ret = is_lvalue(state, RHS(def, 0));
        }
        return ret;
 }
 
-static void lvalue(struct compile_state *state, struct triple *def)
+static void clvalue(struct compile_state *state, struct triple *def)
 {
        if (!def) {
                internal_error(state, def, "nothing where lvalue expected?");
@@ -4390,6 +4868,13 @@ static void lvalue(struct compile_state *state, struct triple *def)
                error(state, def, "lvalue expected");
        }
 }
+static void lvalue(struct compile_state *state, struct triple *def)
+{
+       clvalue(state, def);
+       if (def->type->type & QUAL_CONST) {
+               error(state, def, "modifable lvalue expected");
+       }
+}
 
 static int is_pointer(struct triple *def)
 {
@@ -4421,11 +4906,15 @@ static struct triple *int_const(
 }
 
 
+static struct triple *read_expr(struct compile_state *state, struct triple *def);
+
 static struct triple *do_mk_addr_expr(struct compile_state *state, 
        struct triple *expr, struct type *type, ulong_t offset)
 {
        struct triple *result;
-       lvalue(state, expr);
+       clvalue(state, expr);
+
+       type = new_type(TYPE_POINTER | (type->type & QUAL_MASK), type, 0);
 
        result = 0;
        if (expr->op == OP_ADECL) {
@@ -4441,19 +4930,16 @@ static struct triple *do_mk_addr_expr(struct compile_state *state,
                        RHS(expr, 0),
                        int_const(state, &ulong_type, offset));
        }
+       if (!result) {
+               internal_error(state, expr, "cannot take address of expression");
+       }
        return result;
 }
 
 static struct triple *mk_addr_expr(
        struct compile_state *state, struct triple *expr, ulong_t offset)
 {
-       struct type *type;
-       
-       type = new_type(
-               TYPE_POINTER | (expr->type->type & QUAL_MASK),
-               expr->type, 0);
-
-       return do_mk_addr_expr(state, expr, type, offset);
+       return do_mk_addr_expr(state, expr, expr->type, offset);
 }
 
 static struct triple *mk_deref_expr(
@@ -4462,13 +4948,32 @@ static struct triple *mk_deref_expr(
        struct type *base_type;
        pointer(state, expr);
        base_type = expr->type->left;
-       if (!TYPE_PTR(base_type->type) && !TYPE_ARITHMETIC(base_type->type)) {
-               error(state, 0, 
-                       "Only pointer and arithmetic values can be dereferenced");
-       }
        return triple(state, OP_DEREF, base_type, expr, 0);
 }
 
+static struct triple *array_to_pointer(struct compile_state *state, struct triple *def)
+{
+       if ((def->type->type & TYPE_MASK) == TYPE_ARRAY) {
+               struct type *type;
+               type = new_type(
+                       TYPE_POINTER | (def->type->type & QUAL_MASK),
+                       def->type->left, 0);
+               if ((def->op == OP_SDECL) || IS_CONST_OP(def->op)) {
+                       struct triple *addrconst;
+                       if ((def->op != OP_SDECL) && (def->op != OP_BLOBCONST)) {
+                               internal_error(state, def, "bad array constant");
+                       }
+                       addrconst = triple(state, OP_ADDRCONST, type, 0, 0);
+                       MISC(addrconst, 0) = def;
+                       def = addrconst;
+               }
+               else {
+                       def = triple(state, OP_COPY, type, def, 0);
+               }
+       }
+       return def;
+}
+
 static struct triple *deref_field(
        struct compile_state *state, struct triple *expr, struct hash_entry *field)
 {
@@ -4483,17 +4988,7 @@ static struct triple *deref_field(
                error(state, 0, "request for member %s in something not a struct or union",
                        field->name);
        }
-       member = type->left;
-       while((member->type & TYPE_MASK) == TYPE_PRODUCT) {
-               if (member->left->field_ident == field) {
-                       member = member->left;
-                       break;
-               }
-               member = member->right;
-       }
-       if (member->field_ident != field) {
-               error(state, 0, "%s is not a member", field->name);
-       }
+       member = field_type(state, type, field);
        if ((type->type & STOR_MASK) == STOR_PERM) {
                /* Do the pointer arithmetic to get a deref the field */
                ulong_t offset;
@@ -4503,8 +4998,7 @@ static struct triple *deref_field(
        }
        else {
                /* Find the variable for the field I want. */
-               result = triple(state, OP_DOT, 
-                       field_type(state, type, field), expr, 0);
+               result = triple(state, OP_DOT, member, expr, 0);
                result->u.field = field;
        }
        return result;
@@ -4520,26 +5014,24 @@ static struct triple *read_expr(struct compile_state *state, struct triple *def)
                return def;
        }
        /* Tranform an array to a pointer to the first element */
+       
 #warning "CHECK_ME is this the right place to transform arrays to pointers?"
        if ((def->type->type & TYPE_MASK) == TYPE_ARRAY) {
-               struct type *type;
-               struct triple *result;
-               type = new_type(
-                       TYPE_POINTER | (def->type->type & QUAL_MASK),
-                       def->type->left, 0);
-               result = triple(state, OP_ADDRCONST, type, 0, 0);
-               MISC(result, 0) = def;
-               return result;
+               return array_to_pointer(state, def);
        }
        if (is_in_reg(state, def)) {
                op = OP_READ;
        } else {
+               if (def->op == OP_SDECL) {
+                       def = mk_addr_expr(state, def, 0);
+                       def = mk_deref_expr(state, def);
+               }
                op = OP_LOAD;
        }
        return triple(state, op, def->type, def, 0);
 }
 
-static void write_compatible(struct compile_state *state,
+int is_write_compatible(struct compile_state *state, 
        struct type *dest, struct type *rval)
 {
        int compatible = 0;
@@ -4564,11 +5056,31 @@ static void write_compatible(struct compile_state *state,
                (dest->type_ident == rval->type_ident)) {
                compatible = 1;
        }
-       if (!compatible) {
+       return compatible;
+}
+
+
+static void write_compatible(struct compile_state *state,
+       struct type *dest, struct type *rval)
+{
+       if (!is_write_compatible(state, dest, rval)) {
                error(state, 0, "Incompatible types in assignment");
        }
 }
 
+static int is_init_compatible(struct compile_state *state,
+       struct type *dest, struct type *rval)
+{
+       int compatible = 0;
+       if (is_write_compatible(state, dest, rval)) {
+               compatible = 1;
+       }
+       else if (equiv_types(dest, rval)) {
+               compatible = 1;
+       }
+       return compatible;
+}
+
 static struct triple *write_expr(
        struct compile_state *state, struct triple *dest, struct triple *rval)
 {
@@ -4586,6 +5098,9 @@ static struct triple *write_expr(
        if (!is_lvalue(state, dest)) {
                internal_error(state, 0, "writing to a non lvalue?");
        }
+       if (dest->type->type & QUAL_CONST) {
+               internal_error(state, 0, "modifable lvalue expexted");
+       }
 
        write_compatible(state, dest->type, rval->type);
 
@@ -4760,7 +5275,7 @@ static int expr_depth(struct compile_state *state, struct triple *ins)
                rdepth = expr_depth(state, RHS(ins, 1));
                count = (ldepth >= rdepth)? ldepth : rdepth;
        }
-       else if (ins->op == OP_CALL) {
+       else if (ins->op == OP_FCALL) {
                /* Don't figure the depth of a call just guess it is huge */
                count = 1000;
        }
@@ -4784,17 +5299,18 @@ static struct triple *flatten(
        struct compile_state *state, struct triple *first, struct triple *ptr);
 
 static struct triple *flatten_generic(
-       struct compile_state *state, struct triple *first, struct triple *ptr)
+       struct compile_state *state, struct triple *first, struct triple *ptr,
+       int ignored)
 {
        struct rhs_vector {
                int depth;
                struct triple **ins;
        } vector[MAX_RHS];
        int i, rhs, lhs;
-       /* Only operations with just a rhs should come here */
+       /* Only operations with just a rhs and a lhs should come here */
        rhs = TRIPLE_RHS(ptr->sizes);
        lhs = TRIPLE_LHS(ptr->sizes);
-       if (TRIPLE_SIZE(ptr->sizes) != lhs + rhs) {
+       if (TRIPLE_SIZE(ptr->sizes) != lhs + rhs + ignored) {
                internal_error(state, ptr, "unexpected args for: %d %s",
                        ptr->op, tops(ptr->op));
        }
@@ -4918,154 +5434,10 @@ static struct triple *flatten_cond(
        return read_expr(state, val);
 }
 
-struct triple *copy_func(struct compile_state *state, struct triple *ofunc, 
-       struct occurance *base_occurance)
-{
-       struct triple *nfunc;
-       struct triple *nfirst, *ofirst;
-       struct triple *new, *old;
-
-#if 0
-       fprintf(stdout, "\n");
-       loc(stdout, state, 0);
-       fprintf(stdout, "\n__________ copy_func _________\n");
-       print_triple(state, ofunc);
-       fprintf(stdout, "__________ copy_func _________ done\n\n");
-#endif
-
-       /* Make a new copy of the old function */
-       nfunc = triple(state, OP_LIST, ofunc->type, 0, 0);
-       nfirst = 0;
-       ofirst = old = RHS(ofunc, 0);
-       do {
-               struct triple *new;
-               struct occurance *occurance;
-               int old_lhs, old_rhs;
-               old_lhs = TRIPLE_LHS(old->sizes);
-               old_rhs = TRIPLE_RHS(old->sizes);
-               occurance = inline_occurance(state, base_occurance, old->occurance);
-               new = alloc_triple(state, old->op, old->type, old_lhs, old_rhs,
-                       occurance);
-               if (!triple_stores_block(state, new)) {
-                       memcpy(&new->u, &old->u, sizeof(new->u));
-               }
-               if (!nfirst) {
-                       RHS(nfunc, 0) = nfirst = new;
-               }
-               else {
-                       insert_triple(state, nfirst, new);
-               }
-               new->id |= TRIPLE_FLAG_FLATTENED;
-               
-               /* During the copy remember new as user of old */
-               use_triple(old, new);
-
-               /* Populate the return type if present */
-               if (old == MISC(ofunc, 0)) {
-                       MISC(nfunc, 0) = new;
-               }
-               old = old->next;
-       } while(old != ofirst);
-
-       /* Make a second pass to fix up any unresolved references */
-       old = ofirst;
-       new = nfirst;
-       do {
-               struct triple **oexpr, **nexpr;
-               int count, i;
-               /* Lookup where the copy is, to join pointers */
-               count = TRIPLE_SIZE(old->sizes);
-               for(i = 0; i < count; i++) {
-                       oexpr = &old->param[i];
-                       nexpr = &new->param[i];
-                       if (!*nexpr && *oexpr && (*oexpr)->use) {
-                               *nexpr = (*oexpr)->use->member;
-                               if (*nexpr == old) {
-                                       internal_error(state, 0, "new == old?");
-                               }
-                               use_triple(*nexpr, new);
-                       }
-                       if (!*nexpr && *oexpr) {
-                               internal_error(state, 0, "Could not copy %d\n", i);
-                       }
-               }
-               old = old->next;
-               new = new->next;
-       } while((old != ofirst) && (new != nfirst));
-       
-       /* Make a third pass to cleanup the extra useses */
-       old = ofirst;
-       new = nfirst;
-       do {
-               unuse_triple(old, new);
-               old = old->next;
-               new = new->next;
-       } while ((old != ofirst) && (new != nfirst));
-       return nfunc;
-}
-
-static struct triple *flatten_call(
+static struct triple *flatten_fcall(
        struct compile_state *state, struct triple *first, struct triple *ptr)
 {
-       /* Inline the function call */
-       struct type *ptype;
-       struct triple *ofunc, *nfunc, *nfirst, *param, *result;
-       struct triple *end, *nend;
-       int pvals, i;
-
-       /* Find the triples */
-       ofunc = MISC(ptr, 0);
-       if (ofunc->op != OP_LIST) {
-               internal_error(state, 0, "improper function");
-       }
-       nfunc = copy_func(state, ofunc, ptr->occurance);
-       nfirst = RHS(nfunc, 0)->next;
-       /* Prepend the parameter reading into the new function list */
-       ptype = nfunc->type->right;
-       param = RHS(nfunc, 0)->next;
-       pvals = TRIPLE_RHS(ptr->sizes);
-       for(i = 0; i < pvals; i++) {
-               struct type *atype;
-               struct triple *arg;
-               atype = ptype;
-               if ((ptype->type & TYPE_MASK) == TYPE_PRODUCT) {
-                       atype = ptype->left;
-               }
-               while((param->type->type & TYPE_MASK) != (atype->type & TYPE_MASK)) {
-                       param = param->next;
-               }
-               arg = RHS(ptr, i);
-               flatten(state, nfirst, write_expr(state, param, arg));
-               ptype = ptype->right;
-               param = param->next;
-       }
-       result = 0;
-       if ((nfunc->type->left->type & TYPE_MASK) != TYPE_VOID) {
-               result = read_expr(state, MISC(nfunc,0));
-       }
-#if 0
-       fprintf(stdout, "\n");
-       loc(stdout, state, 0);
-       fprintf(stdout, "\n__________ flatten_call _________\n");
-       print_triple(state, nfunc);
-       fprintf(stdout, "__________ flatten_call _________ done\n\n");
-#endif
-
-       /* Get rid of the extra triples */
-       nfirst = RHS(nfunc, 0)->next;
-       free_triple(state, RHS(nfunc, 0));
-       RHS(nfunc, 0) = 0;
-       free_triple(state, nfunc);
-
-       /* Append the new function list onto the return list */
-       end = first->prev;
-       nend = nfirst->prev;
-       end->next    = nfirst;
-       nfirst->prev = end;
-       nend->next   = first;
-       first->prev  = nend;
-
-       return result;
+       return flatten_generic(state, first, ptr, 1);
 }
 
 static struct triple *flatten(
@@ -5081,13 +5453,6 @@ static struct triple *flatten(
                        return ptr;
                }
                switch(ptr->op) {
-               case OP_WRITE:
-               case OP_STORE:
-                       RHS(ptr, 0) = flatten(state, first, RHS(ptr, 0));
-                       LHS(ptr, 0) = flatten(state, first, LHS(ptr, 0));
-                       use_triple(LHS(ptr, 0), ptr);
-                       use_triple(RHS(ptr, 0), ptr);
-                       break;
                case OP_COMMA:
                        RHS(ptr, 0) = flatten(state, first, RHS(ptr, 0));
                        ptr = RHS(ptr, 1);
@@ -5105,8 +5470,8 @@ static struct triple *flatten(
                case OP_COND:
                        ptr = flatten_cond(state, first, ptr);
                        break;
-               case OP_CALL:
-                       ptr = flatten_call(state, first, ptr);
+               case OP_FCALL:
+                       ptr = flatten_fcall(state, first, ptr);
                        break;
                case OP_READ:
                case OP_LOAD:
@@ -5115,16 +5480,31 @@ static struct triple *flatten(
                        break;
                case OP_BRANCH:
                        use_triple(TARG(ptr, 0), ptr);
-                       if (TRIPLE_RHS(ptr->sizes)) {
-                               use_triple(RHS(ptr, 0), ptr);
-                               if (ptr->next != ptr) {
-                                       use_triple(ptr->next, ptr);
-                               }
+                       break;
+               case OP_CBRANCH:
+                       RHS(ptr, 0) = flatten(state, first, RHS(ptr, 0));
+                       use_triple(RHS(ptr, 0), ptr);
+                       use_triple(TARG(ptr, 0), ptr);
+                       if (ptr->next != ptr) {
+                               use_triple(ptr->next, ptr);
+                       }
+                       break;
+               case OP_CALL:
+                       MISC(ptr, 0) = flatten(state, first, MISC(ptr, 0));
+                       use_triple(MISC(ptr, 0), ptr);
+                       use_triple(TARG(ptr, 0), ptr);
+                       if (ptr->next != ptr) {
+                               use_triple(ptr->next, ptr);
                        }
                        break;
+               case OP_RET:
+                       RHS(ptr, 0) = flatten(state, first, RHS(ptr, 0));
+                       use_triple(RHS(ptr, 0), ptr);
+                       break;
                case OP_BLOBCONST:
-                       insert_triple(state, first, ptr);
+                       insert_triple(state, state->global_pool, ptr);
                        ptr->id |= TRIPLE_FLAG_FLATTENED;
+                       ptr->id &= ~TRIPLE_FLAG_LOCAL;
                        ptr = triple(state, OP_SDECL, ptr->type, ptr, 0);
                        use_triple(MISC(ptr, 0), ptr);
                        break;
@@ -5138,8 +5518,18 @@ static struct triple *flatten(
                {
                        struct triple *base;
                        base = RHS(ptr, 0);
-                       base = flatten(state, first, base);
-                       if (base->op == OP_VAL_VEC) {
+                       if (base->op == OP_DEREF) {
+                               struct triple *left;
+                               ulong_t offset;
+                               offset = field_offset(state, base->type, ptr->u.field);
+                               left = RHS(base, 0);
+                               ptr = triple(state, OP_ADD, left->type, 
+                                       read_expr(state, left),
+                                       int_const(state, &ulong_type, offset));
+                               free_triple(state, base);
+                       }
+                       else if (base->op == OP_VAL_VEC) {
+                               base = flatten(state, first, base);
                                ptr = struct_field(state, base, ptr->u.field);
                        }
                        break;
@@ -5150,21 +5540,29 @@ static struct triple *flatten(
                        use_triple(ptr, MISC(ptr, 0));
                        break;
                case OP_ADDRCONST:
-               case OP_SDECL:
                        MISC(ptr, 0) = flatten(state, first, MISC(ptr, 0));
                        use_triple(MISC(ptr, 0), ptr);
                        break;
+               case OP_SDECL:
+                       first = state->global_pool;
+                       MISC(ptr, 0) = flatten(state, first, MISC(ptr, 0));
+                       use_triple(MISC(ptr, 0), ptr);
+                       insert_triple(state, first, ptr);
+                       ptr->id |= TRIPLE_FLAG_FLATTENED;
+                       ptr->id &= ~TRIPLE_FLAG_LOCAL;
+                       return ptr;
                case OP_ADECL:
                        break;
                default:
                        /* Flatten the easy cases we don't override */
-                       ptr = flatten_generic(state, first, ptr);
+                       ptr = flatten_generic(state, first, ptr, 0);
                        break;
                }
        } while(ptr && (ptr != orig_ptr));
        if (ptr) {
                insert_triple(state, first, ptr);
                ptr->id |= TRIPLE_FLAG_FLATTENED;
+               ptr->id &= ~TRIPLE_FLAG_LOCAL;
        }
        return ptr;
 }
@@ -5346,8 +5744,17 @@ static struct triple *mk_subscript_expr(
        return mk_deref_expr(state, mk_add_expr(state, left, right));
 }
 
-/*
- * Compile time evaluation
+static struct triple *mk_cast_expr(
+       struct compile_state *state, struct type *type, struct triple *expr)
+{
+       struct triple *def;
+       def = read_expr(state, expr);
+       def = triple(state, OP_COPY, type, def, 0);
+       return def;
+}
+
+/*
+ * Compile time evaluation
  * ===========================
  */
 static int is_const(struct triple *ins)
@@ -5355,6 +5762,11 @@ static int is_const(struct triple *ins)
        return IS_CONST_OP(ins->op);
 }
 
+static int is_simple_const(struct triple *ins)
+{
+       return IS_CONST_OP(ins->op) && (ins->op != OP_ADDRCONST);
+}
+
 static int constants_equal(struct compile_state *state, 
        struct triple *left, struct triple *right)
 {
@@ -5405,14 +5817,30 @@ static int constants_equal(struct compile_state *state,
 
 static int is_zero(struct triple *ins)
 {
-       return is_const(ins) && (ins->u.cval == 0);
+       return is_simple_const(ins) && (ins->u.cval == 0);
 }
 
 static int is_one(struct triple *ins)
 {
-       return is_const(ins) && (ins->u.cval == 1);
+       return is_simple_const(ins) && (ins->u.cval == 1);
 }
 
+static long_t bit_count(ulong_t value)
+{
+       int count;
+       int i;
+       count = 0;
+       for(i = (sizeof(ulong_t)*8) -1; i >= 0; i--) {
+               ulong_t mask;
+               mask = 1;
+               mask <<= i;
+               if (value & mask) {
+                       count++;
+               }
+       }
+       return count;
+       
+}
 static long_t bsr(ulong_t value)
 {
        int i;
@@ -5469,10 +5897,8 @@ static int is_pow2(struct triple *ins)
 }
 
 static ulong_t read_const(struct compile_state *state,
-       struct triple *ins, struct triple **expr)
+       struct triple *ins, struct triple *rhs)
 {
-       struct triple *rhs;
-       rhs = *expr;
        switch(rhs->type->type &TYPE_MASK) {
        case TYPE_CHAR:   
        case TYPE_SHORT:
@@ -5488,16 +5914,124 @@ static ulong_t read_const(struct compile_state *state,
                internal_error(state, rhs, "bad type to read_const\n");
                break;
        }
+       if (!is_simple_const(rhs)) {
+               internal_error(state, rhs, "bad op to read_const\n");
+       }
        return rhs->u.cval;
 }
 
-static long_t read_sconst(struct triple *ins, struct triple **expr)
+static long_t read_sconst(struct compile_state *state,
+       struct triple *ins, struct triple *rhs)
 {
-       struct triple *rhs;
-       rhs = *expr;
        return (long_t)(rhs->u.cval);
 }
 
+int const_ltrue(struct compile_state *state, struct triple *ins, struct triple *rhs)
+{
+       if (!is_const(rhs)) {
+               internal_error(state, 0, "non const passed to const_true\n");
+       }
+       return !is_zero(rhs);
+}
+
+int const_eq(struct compile_state *state, struct triple *ins,
+       struct triple *left, struct triple *right)
+{
+       int result;
+       if (!is_const(left) || !is_const(right)) {
+               internal_error(state, ins, "non const passed to const_eq\n");
+               result = 0;
+       }
+       else if (left == right) {
+               result = 1;
+       }
+       else if (is_simple_const(left) && is_simple_const(right)) {
+               ulong_t lval, rval;
+               lval = read_const(state, ins, left);
+               rval = read_const(state, ins, right);
+               result = (lval == rval);
+       }
+       else if ((left->op == OP_ADDRCONST) && 
+               (right->op == OP_ADDRCONST)) {
+               result = (MISC(left, 0) == MISC(right, 0)) &&
+                       (left->u.cval == right->u.cval);
+       }
+       else {
+               internal_error(state, ins, "incomparable constants passed to const_eq\n");
+               result = 0;
+       }
+       return result;
+       
+}
+
+int const_ucmp(struct compile_state *state, struct triple *ins,
+       struct triple *left, struct triple *right)
+{
+       int result;
+       if (!is_const(left) || !is_const(right)) {
+               internal_error(state, ins, "non const past to ucmp_const\n");
+               result = -2;
+       }
+       else if (left == right) {
+               result = 0;
+       }
+       else if (is_simple_const(left) && is_simple_const(right)) {
+               ulong_t lval, rval;
+               lval = read_const(state, ins, left);
+               rval = read_const(state, ins, right);
+               result = 0;
+               if (lval > rval) {
+                       result = 1;
+               } else if (rval > lval) {
+                       result = -1;
+               }
+       }
+       else if ((left->op == OP_ADDRCONST) && 
+               (right->op == OP_ADDRCONST) &&
+               (MISC(left, 0) == MISC(right, 0))) {
+               result = 0;
+               if (left->u.cval > right->u.cval) {
+                       result = 1;
+               } else if (left->u.cval < right->u.cval) {
+                       result = -1;
+               }
+       }
+       else {
+               internal_error(state, ins, "incomparable constants passed to const_ucmp\n");
+               result = -2;
+       }
+       return result;
+}
+
+int const_scmp(struct compile_state *state, struct triple *ins,
+       struct triple *left, struct triple *right)
+{
+       int result;
+       if (!is_const(left) || !is_const(right)) {
+               internal_error(state, ins, "non const past to ucmp_const\n");
+               result = -2;
+       }
+       else if (left == right) {
+               result = 0;
+       }
+       else if (is_simple_const(left) && is_simple_const(right)) {
+               long_t lval, rval;
+               lval = read_sconst(state, ins, left);
+               rval = read_sconst(state, ins, right);
+               result = 0;
+               if (lval > rval) {
+                       result = 1;
+               } else if (rval > lval) {
+                       result = -1;
+               }
+       }
+       else {
+               internal_error(state, ins, "incomparable constants passed to const_scmp\n");
+               result = -2;
+       }
+       return result;
+}
+
 static void unuse_rhs(struct compile_state *state, struct triple *ins)
 {
        struct triple **expr;
@@ -5552,9 +6086,12 @@ static void wipe_ins(struct compile_state *state, struct triple *ins)
 static void mkcopy(struct compile_state *state, 
        struct triple *ins, struct triple *rhs)
 {
+       struct block *block;
+       block = block_of_triple(state, ins);
        wipe_ins(state, ins);
        ins->op = OP_COPY;
        ins->sizes = TRIPLE_SIZES(0, 1, 0, 0);
+       ins->u.block = block;
        RHS(ins, 0) = rhs;
        use_triple(RHS(ins, 0), ins);
 }
@@ -5574,6 +6111,9 @@ static void mkconst(struct compile_state *state,
 static void mkaddr_const(struct compile_state *state,
        struct triple *ins, struct triple *sdecl, ulong_t value)
 {
+       if (sdecl->op != OP_SDECL) {
+               internal_error(state, ins, "bad base for addrconst");
+       }
        wipe_ins(state, ins);
        ins->op = OP_ADDRCONST;
        ins->sizes = TRIPLE_SIZES(0, 0, 1, 0);
@@ -5586,7 +6126,7 @@ static void mkaddr_const(struct compile_state *state,
 static void flatten_structures(struct compile_state *state)
 {
        struct triple *ins, *first;
-       first = RHS(state->main_function, 0);
+       first = state->first;
        ins = first;
        /* Pass one expand structure values into valvecs.
         */
@@ -5594,6 +6134,7 @@ static void flatten_structures(struct compile_state *state)
        do {
                struct triple *next;
                next = ins->next;
+               valid_ins(state, ins);
                if ((ins->type->type & TYPE_MASK) == TYPE_STRUCT) {
                        if (ins->op == OP_VAL_VEC) {
                                /* Do nothing */
@@ -5630,7 +6171,7 @@ static void flatten_structures(struct compile_state *state)
                                }
                                propogate_use(state, ins, next);
                                flatten(state, ins, next);
-                               free_triple(state, ins);
+                               release_triple(state, ins);
                        }
                        else if ((ins->op == OP_STORE) || (ins->op == OP_WRITE)) {
                                struct triple *src, *dst, **vector;
@@ -5639,8 +6180,8 @@ static void flatten_structures(struct compile_state *state)
                                ulong_t i;
 
                                op = ins->op;
-                               src = RHS(ins, 0);
-                               dst = LHS(ins, 0);
+                               src = RHS(ins, 1);
+                               dst = RHS(ins, 0);
                                get_occurance(ins->occurance);
                                next = alloc_triple(state, OP_VAL_VEC, ins->type, -1, -1,
                                        ins->occurance);
@@ -5665,7 +6206,7 @@ static void flatten_structures(struct compile_state *state)
                                }
                                propogate_use(state, ins, next);
                                flatten(state, ins, next);
-                               free_triple(state, ins);
+                               release_triple(state, ins);
                        }
                }
                ins = next;
@@ -5677,6 +6218,9 @@ static void flatten_structures(struct compile_state *state)
                struct triple *next;
                next = ins->next;
                if (ins->op == OP_VAL_VEC) {
+                       if (ins->use) {
+                               internal_error(state, ins, "valvec used\n");
+                       }
                        release_triple(state, ins);
                } 
                ins = next;
@@ -5686,14 +6230,15 @@ static void flatten_structures(struct compile_state *state)
        ins = first;
        do {
                ins->id &= ~TRIPLE_FLAG_FLATTENED;
-               if ((ins->type->type & TYPE_MASK) == TYPE_STRUCT) {
-                       internal_error(state, 0, "STRUCT_TYPE remains?");
+               if ((ins->op != OP_BLOBCONST) && (ins->op != OP_SDECL) &&
+                       ((ins->type->type & TYPE_MASK) == TYPE_STRUCT)) {
+                       internal_error(state, ins, "STRUCT_TYPE remains?");
                }
                if (ins->op == OP_DOT) {
-                       internal_error(state, 0, "OP_DOT remains?");
+                       internal_error(state, ins, "OP_DOT remains?");
                }
                if (ins->op == OP_VAL_VEC) {
-                       internal_error(state, 0, "OP_VAL_VEC remains?");
+                       internal_error(state, ins, "OP_VAL_VEC remains?");
                }
                ins = ins->next;
        } while(ins != first);
@@ -5715,8 +6260,8 @@ static void simplify_smul(struct compile_state *state, struct triple *ins)
        }
        if (is_const(RHS(ins, 0)) && is_const(RHS(ins, 1))) {
                long_t left, right;
-               left  = read_sconst(ins, &RHS(ins, 0));
-               right = read_sconst(ins, &RHS(ins, 1));
+               left  = read_sconst(state, ins, RHS(ins, 0));
+               right = read_sconst(state, ins, RHS(ins, 1));
                mkconst(state, ins, left * right);
        }
        else if (is_zero(RHS(ins, 1))) {
@@ -5729,7 +6274,7 @@ static void simplify_smul(struct compile_state *state, struct triple *ins)
                struct triple *val;
                val = int_const(state, ins->type, tlog2(RHS(ins, 1)));
                ins->op = OP_SL;
-               insert_triple(state, ins, val);
+               insert_triple(state, state->global_pool, val);
                unuse_triple(RHS(ins, 1), ins);
                use_triple(val, ins);
                RHS(ins, 1) = val;
@@ -5746,8 +6291,8 @@ static void simplify_umul(struct compile_state *state, struct triple *ins)
        }
        if (is_const(RHS(ins, 0)) && is_const(RHS(ins, 1))) {
                ulong_t left, right;
-               left  = read_const(state, ins, &RHS(ins, 0));
-               right = read_const(state, ins, &RHS(ins, 1));
+               left  = read_const(state, ins, RHS(ins, 0));
+               right = read_const(state, ins, RHS(ins, 1));
                mkconst(state, ins, left * right);
        }
        else if (is_zero(RHS(ins, 1))) {
@@ -5760,7 +6305,7 @@ static void simplify_umul(struct compile_state *state, struct triple *ins)
                struct triple *val;
                val = int_const(state, ins->type, tlog2(RHS(ins, 1)));
                ins->op = OP_SL;
-               insert_triple(state, ins, val);
+               insert_triple(state, state->global_pool, val);
                unuse_triple(RHS(ins, 1), ins);
                use_triple(val, ins);
                RHS(ins, 1) = val;
@@ -5771,8 +6316,8 @@ static void simplify_sdiv(struct compile_state *state, struct triple *ins)
 {
        if (is_const(RHS(ins, 0)) && is_const(RHS(ins, 1))) {
                long_t left, right;
-               left  = read_sconst(ins, &RHS(ins, 0));
-               right = read_sconst(ins, &RHS(ins, 1));
+               left  = read_sconst(state, ins, RHS(ins, 0));
+               right = read_sconst(state, ins, RHS(ins, 1));
                mkconst(state, ins, left / right);
        }
        else if (is_zero(RHS(ins, 0))) {
@@ -5788,7 +6333,7 @@ static void simplify_sdiv(struct compile_state *state, struct triple *ins)
                struct triple *val;
                val = int_const(state, ins->type, tlog2(RHS(ins, 1)));
                ins->op = OP_SSR;
-               insert_triple(state, ins, val);
+               insert_triple(state, state->global_pool, val);
                unuse_triple(RHS(ins, 1), ins);
                use_triple(val, ins);
                RHS(ins, 1) = val;
@@ -5799,8 +6344,8 @@ static void simplify_udiv(struct compile_state *state, struct triple *ins)
 {
        if (is_const(RHS(ins, 0)) && is_const(RHS(ins, 1))) {
                ulong_t left, right;
-               left  = read_const(state, ins, &RHS(ins, 0));
-               right = read_const(state, ins, &RHS(ins, 1));
+               left  = read_const(state, ins, RHS(ins, 0));
+               right = read_const(state, ins, RHS(ins, 1));
                mkconst(state, ins, left / right);
        }
        else if (is_zero(RHS(ins, 0))) {
@@ -5816,7 +6361,7 @@ static void simplify_udiv(struct compile_state *state, struct triple *ins)
                struct triple *val;
                val = int_const(state, ins->type, tlog2(RHS(ins, 1)));
                ins->op = OP_USR;
-               insert_triple(state, ins, val);
+               insert_triple(state, state->global_pool, val);
                unuse_triple(RHS(ins, 1), ins);
                use_triple(val, ins);
                RHS(ins, 1) = val;
@@ -5827,8 +6372,8 @@ static void simplify_smod(struct compile_state *state, struct triple *ins)
 {
        if (is_const(RHS(ins, 0)) && is_const(RHS(ins, 1))) {
                long_t left, right;
-               left  = read_const(state, ins, &RHS(ins, 0));
-               right = read_const(state, ins, &RHS(ins, 1));
+               left  = read_const(state, ins, RHS(ins, 0));
+               right = read_const(state, ins, RHS(ins, 1));
                mkconst(state, ins, left % right);
        }
        else if (is_zero(RHS(ins, 0))) {
@@ -5844,18 +6389,19 @@ static void simplify_smod(struct compile_state *state, struct triple *ins)
                struct triple *val;
                val = int_const(state, ins->type, RHS(ins, 1)->u.cval - 1);
                ins->op = OP_AND;
-               insert_triple(state, ins, val);
+               insert_triple(state, state->global_pool, val);
                unuse_triple(RHS(ins, 1), ins);
                use_triple(val, ins);
                RHS(ins, 1) = val;
        }
 }
+
 static void simplify_umod(struct compile_state *state, struct triple *ins)
 {
        if (is_const(RHS(ins, 0)) && is_const(RHS(ins, 1))) {
                ulong_t left, right;
-               left  = read_const(state, ins, &RHS(ins, 0));
-               right = read_const(state, ins, &RHS(ins, 1));
+               left  = read_const(state, ins, RHS(ins, 0));
+               right = read_const(state, ins, RHS(ins, 1));
                mkconst(state, ins, left % right);
        }
        else if (is_zero(RHS(ins, 0))) {
@@ -5871,7 +6417,7 @@ static void simplify_umod(struct compile_state *state, struct triple *ins)
                struct triple *val;
                val = int_const(state, ins->type, RHS(ins, 1)->u.cval - 1);
                ins->op = OP_AND;
-               insert_triple(state, ins, val);
+               insert_triple(state, state->global_pool, val);
                unuse_triple(RHS(ins, 1), ins);
                use_triple(val, ins);
                RHS(ins, 1) = val;
@@ -5888,13 +6434,13 @@ static void simplify_add(struct compile_state *state, struct triple *ins)
                RHS(ins, 1) = tmp;
        }
        if (is_const(RHS(ins, 0)) && is_const(RHS(ins, 1))) {
-               if (!is_pointer(RHS(ins, 0))) {
+               if (RHS(ins, 0)->op == OP_INTCONST) {
                        ulong_t left, right;
-                       left  = read_const(state, ins, &RHS(ins, 0));
-                       right = read_const(state, ins, &RHS(ins, 1));
+                       left  = read_const(state, ins, RHS(ins, 0));
+                       right = read_const(state, ins, RHS(ins, 1));
                        mkconst(state, ins, left + right);
                }
-               else /* op == OP_ADDRCONST */ {
+               else if (RHS(ins, 0)->op == OP_ADDRCONST) {
                        struct triple *sdecl;
                        ulong_t left, right;
                        sdecl = MISC(RHS(ins, 0), 0);
@@ -5902,6 +6448,9 @@ static void simplify_add(struct compile_state *state, struct triple *ins)
                        right = RHS(ins, 1)->u.cval;
                        mkaddr_const(state, ins, sdecl, left + right);
                }
+               else {
+                       internal_warning(state, ins, "Optimize me!");
+               }
        }
        else if (is_const(RHS(ins, 0)) && !is_const(RHS(ins, 1))) {
                struct triple *tmp;
@@ -5914,13 +6463,13 @@ static void simplify_add(struct compile_state *state, struct triple *ins)
 static void simplify_sub(struct compile_state *state, struct triple *ins)
 {
        if (is_const(RHS(ins, 0)) && is_const(RHS(ins, 1))) {
-               if (!is_pointer(RHS(ins, 0))) {
+               if (RHS(ins, 0)->op == OP_INTCONST) {
                        ulong_t left, right;
-                       left  = read_const(state, ins, &RHS(ins, 0));
-                       right = read_const(state, ins, &RHS(ins, 1));
+                       left  = read_const(state, ins, RHS(ins, 0));
+                       right = read_const(state, ins, RHS(ins, 1));
                        mkconst(state, ins, left - right);
                }
-               else /* op == OP_ADDRCONST */ {
+               else if (RHS(ins, 0)->op == OP_ADDRCONST) {
                        struct triple *sdecl;
                        ulong_t left, right;
                        sdecl = MISC(RHS(ins, 0), 0);
@@ -5928,6 +6477,9 @@ static void simplify_sub(struct compile_state *state, struct triple *ins)
                        right = RHS(ins, 1)->u.cval;
                        mkaddr_const(state, ins, sdecl, left - right);
                }
+               else {
+                       internal_warning(state, ins, "Optimize me!");
+               }
        }
 }
 
@@ -5935,15 +6487,15 @@ static void simplify_sl(struct compile_state *state, struct triple *ins)
 {
        if (is_const(RHS(ins, 1))) {
                ulong_t right;
-               right = read_const(state, ins, &RHS(ins, 1));
+               right = read_const(state, ins, RHS(ins, 1));
                if (right >= (size_of(state, ins->type)*8)) {
                        warning(state, ins, "left shift count >= width of type");
                }
        }
        if (is_const(RHS(ins, 0)) && is_const(RHS(ins, 1))) {
                ulong_t left, right;
-               left  = read_const(state, ins, &RHS(ins, 0));
-               right = read_const(state, ins, &RHS(ins, 1));
+               left  = read_const(state, ins, RHS(ins, 0));
+               right = read_const(state, ins, RHS(ins, 1));
                mkconst(state, ins,  left << right);
        }
 }
@@ -5952,15 +6504,15 @@ static void simplify_usr(struct compile_state *state, struct triple *ins)
 {
        if (is_const(RHS(ins, 1))) {
                ulong_t right;
-               right = read_const(state, ins, &RHS(ins, 1));
+               right = read_const(state, ins, RHS(ins, 1));
                if (right >= (size_of(state, ins->type)*8)) {
                        warning(state, ins, "right shift count >= width of type");
                }
        }
        if (is_const(RHS(ins, 0)) && is_const(RHS(ins, 1))) {
                ulong_t left, right;
-               left  = read_const(state, ins, &RHS(ins, 0));
-               right = read_const(state, ins, &RHS(ins, 1));
+               left  = read_const(state, ins, RHS(ins, 0));
+               right = read_const(state, ins, RHS(ins, 1));
                mkconst(state, ins, left >> right);
        }
 }
@@ -5969,15 +6521,15 @@ static void simplify_ssr(struct compile_state *state, struct triple *ins)
 {
        if (is_const(RHS(ins, 1))) {
                ulong_t right;
-               right = read_const(state, ins, &RHS(ins, 1));
+               right = read_const(state, ins, RHS(ins, 1));
                if (right >= (size_of(state, ins->type)*8)) {
                        warning(state, ins, "right shift count >= width of type");
                }
        }
        if (is_const(RHS(ins, 0)) && is_const(RHS(ins, 1))) {
                long_t left, right;
-               left  = read_sconst(ins, &RHS(ins, 0));
-               right = read_sconst(ins, &RHS(ins, 1));
+               left  = read_sconst(state, ins, RHS(ins, 0));
+               right = read_sconst(state, ins, RHS(ins, 1));
                mkconst(state, ins, left >> right);
        }
 }
@@ -5986,8 +6538,8 @@ static void simplify_and(struct compile_state *state, struct triple *ins)
 {
        if (is_const(RHS(ins, 0)) && is_const(RHS(ins, 1))) {
                ulong_t left, right;
-               left  = read_const(state, ins, &RHS(ins, 0));
-               right = read_const(state, ins, &RHS(ins, 1));
+               left  = read_const(state, ins, RHS(ins, 0));
+               right = read_const(state, ins, RHS(ins, 1));
                mkconst(state, ins, left & right);
        }
 }
@@ -5996,8 +6548,8 @@ static void simplify_or(struct compile_state *state, struct triple *ins)
 {
        if (is_const(RHS(ins, 0)) && is_const(RHS(ins, 1))) {
                ulong_t left, right;
-               left  = read_const(state, ins, &RHS(ins, 0));
-               right = read_const(state, ins, &RHS(ins, 1));
+               left  = read_const(state, ins, RHS(ins, 0));
+               right = read_const(state, ins, RHS(ins, 1));
                mkconst(state, ins, left | right);
        }
 }
@@ -6006,8 +6558,8 @@ static void simplify_xor(struct compile_state *state, struct triple *ins)
 {
        if (is_const(RHS(ins, 0)) && is_const(RHS(ins, 1))) {
                ulong_t left, right;
-               left  = read_const(state, ins, &RHS(ins, 0));
-               right = read_const(state, ins, &RHS(ins, 1));
+               left  = read_const(state, ins, RHS(ins, 0));
+               right = read_const(state, ins, RHS(ins, 1));
                mkconst(state, ins, left ^ right);
        }
 }
@@ -6026,7 +6578,7 @@ static void simplify_neg(struct compile_state *state, struct triple *ins)
 {
        if (is_const(RHS(ins, 0))) {
                ulong_t left;
-               left = read_const(state, ins, &RHS(ins, 0));
+               left = read_const(state, ins, RHS(ins, 0));
                mkconst(state, ins, -left);
        }
        else if (RHS(ins, 0)->op == OP_NEG) {
@@ -6038,91 +6590,97 @@ static void simplify_invert(struct compile_state *state, struct triple *ins)
 {
        if (is_const(RHS(ins, 0))) {
                ulong_t left;
-               left = read_const(state, ins, &RHS(ins, 0));
+               left = read_const(state, ins, RHS(ins, 0));
                mkconst(state, ins, ~left);
        }
 }
 
 static void simplify_eq(struct compile_state *state, struct triple *ins)
 {
-       if (is_const(RHS(ins, 0)) && is_const(RHS(ins, 1))) {
-               ulong_t left, right;
-               left  = read_const(state, ins, &RHS(ins, 0));
-               right = read_const(state, ins, &RHS(ins, 1));
-               mkconst(state, ins, left == right);
+       struct triple *left, *right;
+       left = RHS(ins, 0);
+       right = RHS(ins, 1);
+
+       if (is_const(left) && is_const(right)) {
+               mkconst(state, ins, const_eq(state, ins, left, right) == 1);
        }
-       else if (RHS(ins, 0) == RHS(ins, 1)) {
+       else if (left == right) {
                mkconst(state, ins, 1);
        }
 }
 
 static void simplify_noteq(struct compile_state *state, struct triple *ins)
 {
-       if (is_const(RHS(ins, 0)) && is_const(RHS(ins, 1))) {
-               ulong_t left, right;
-               left  = read_const(state, ins, &RHS(ins, 0));
-               right = read_const(state, ins, &RHS(ins, 1));
-               mkconst(state, ins, left != right);
+       struct triple *left, *right;
+       left = RHS(ins, 0);
+       right = RHS(ins, 1);
+
+       if (is_const(left) && is_const(right)) {
+               mkconst(state, ins, const_eq(state, ins, left, right) != 1);
        }
-       else if (RHS(ins, 0) == RHS(ins, 1)) {
+       if (left == right) {
                mkconst(state, ins, 0);
        }
 }
 
 static void simplify_sless(struct compile_state *state, struct triple *ins)
 {
-       if (is_const(RHS(ins, 0)) && is_const(RHS(ins, 1))) {
-               long_t left, right;
-               left  = read_sconst(ins, &RHS(ins, 0));
-               right = read_sconst(ins, &RHS(ins, 1));
-               mkconst(state, ins, left < right);
+       struct triple *left, *right;
+       left = RHS(ins, 0);
+       right = RHS(ins, 1);
+
+       if (is_const(left) && is_const(right)) {
+               mkconst(state, ins, const_scmp(state, ins, left, right) < 0);
        }
-       else if (RHS(ins, 0) == RHS(ins, 1)) {
+       else if (left == right) {
                mkconst(state, ins, 0);
        }
 }
 
 static void simplify_uless(struct compile_state *state, struct triple *ins)
 {
-       if (is_const(RHS(ins, 0)) && is_const(RHS(ins, 1))) {
-               ulong_t left, right;
-               left  = read_const(state, ins, &RHS(ins, 0));
-               right = read_const(state, ins, &RHS(ins, 1));
-               mkconst(state, ins, left < right);
+       struct triple *left, *right;
+       left = RHS(ins, 0);
+       right = RHS(ins, 1);
+
+       if (is_const(left) && is_const(right)) {
+               mkconst(state, ins, const_ucmp(state, ins, left, right) < 0);
        }
-       else if (is_zero(RHS(ins, 0))) {
-               mkconst(state, ins, 1);
+       else if (is_zero(right)) {
+               mkconst(state, ins, 0);
        }
-       else if (RHS(ins, 0) == RHS(ins, 1)) {
+       else if (left == right) {
                mkconst(state, ins, 0);
        }
 }
 
 static void simplify_smore(struct compile_state *state, struct triple *ins)
 {
-       if (is_const(RHS(ins, 0)) && is_const(RHS(ins, 1))) {
-               long_t left, right;
-               left  = read_sconst(ins, &RHS(ins, 0));
-               right = read_sconst(ins, &RHS(ins, 1));
-               mkconst(state, ins, left > right);
+       struct triple *left, *right;
+       left = RHS(ins, 0);
+       right = RHS(ins, 1);
+
+       if (is_const(left) && is_const(right)) {
+               mkconst(state, ins, const_scmp(state, ins, left, right) > 0);
        }
-       else if (RHS(ins, 0) == RHS(ins, 1)) {
+       else if (left == right) {
                mkconst(state, ins, 0);
        }
 }
 
 static void simplify_umore(struct compile_state *state, struct triple *ins)
 {
-       if (is_const(RHS(ins, 0)) && is_const(RHS(ins, 1))) {
-               ulong_t left, right;
-               left  = read_const(state, ins, &RHS(ins, 0));
-               right = read_const(state, ins, &RHS(ins, 1));
-               mkconst(state, ins, left > right);
+       struct triple *left, *right;
+       left = RHS(ins, 0);
+       right = RHS(ins, 1);
+
+       if (is_const(left) && is_const(right)) {
+               mkconst(state, ins, const_ucmp(state, ins, left, right) > 0);
        }
-       else if (is_zero(RHS(ins, 1))) {
-               mkconst(state, ins, 1);
+       else if (is_zero(left)) {
+               mkconst(state, ins, 0);
        }
-       else if (RHS(ins, 0) == RHS(ins, 1)) {
+       else if (left == right) {
                mkconst(state, ins, 0);
        }
 }
@@ -6130,108 +6688,115 @@ static void simplify_umore(struct compile_state *state, struct triple *ins)
 
 static void simplify_slesseq(struct compile_state *state, struct triple *ins)
 {
-       if (is_const(RHS(ins, 0)) && is_const(RHS(ins, 1))) {
-               long_t left, right;
-               left  = read_sconst(ins, &RHS(ins, 0));
-               right = read_sconst(ins, &RHS(ins, 1));
-               mkconst(state, ins, left <= right);
+       struct triple *left, *right;
+       left = RHS(ins, 0);
+       right = RHS(ins, 1);
+
+       if (is_const(left) && is_const(right)) {
+               mkconst(state, ins, const_scmp(state, ins, left, right) <= 0);
        }
-       else if (RHS(ins, 0) == RHS(ins, 1)) {
+       else if (left == right) {
                mkconst(state, ins, 1);
        }
 }
 
 static void simplify_ulesseq(struct compile_state *state, struct triple *ins)
 {
-       if (is_const(RHS(ins, 0)) && is_const(RHS(ins, 1))) {
-               ulong_t left, right;
-               left  = read_const(state, ins, &RHS(ins, 0));
-               right = read_const(state, ins, &RHS(ins, 1));
-               mkconst(state, ins, left <= right);
+       struct triple *left, *right;
+       left = RHS(ins, 0);
+       right = RHS(ins, 1);
+
+       if (is_const(left) && is_const(right)) {
+               mkconst(state, ins, const_ucmp(state, ins, left, right) <= 0);
        }
-       else if (is_zero(RHS(ins, 0))) {
+       else if (is_zero(left)) {
                mkconst(state, ins, 1);
        }
-       else if (RHS(ins, 0) == RHS(ins, 1)) {
+       else if (left == right) {
                mkconst(state, ins, 1);
        }
 }
 
 static void simplify_smoreeq(struct compile_state *state, struct triple *ins)
 {
-       if (is_const(RHS(ins, 0)) && is_const(RHS(ins, 0))) {
-               long_t left, right;
-               left  = read_sconst(ins, &RHS(ins, 0));
-               right = read_sconst(ins, &RHS(ins, 1));
-               mkconst(state, ins, left >= right);
+       struct triple *left, *right;
+       left = RHS(ins, 0);
+       right = RHS(ins, 1);
+
+       if (is_const(left) && is_const(right)) {
+               mkconst(state, ins, const_scmp(state, ins, left, right) >= 0);
        }
-       else if (RHS(ins, 0) == RHS(ins, 1)) {
+       else if (left == right) {
                mkconst(state, ins, 1);
        }
 }
 
 static void simplify_umoreeq(struct compile_state *state, struct triple *ins)
 {
-       if (is_const(RHS(ins, 0)) && is_const(RHS(ins, 1))) {
-               ulong_t left, right;
-               left  = read_const(state, ins, &RHS(ins, 0));
-               right = read_const(state, ins, &RHS(ins, 1));
-               mkconst(state, ins, left >= right);
+       struct triple *left, *right;
+       left = RHS(ins, 0);
+       right = RHS(ins, 1);
+
+       if (is_const(left) && is_const(right)) {
+               mkconst(state, ins, const_ucmp(state, ins, left, right) >= 0);
        }
-       else if (is_zero(RHS(ins, 1))) {
+       else if (is_zero(right)) {
                mkconst(state, ins, 1);
        }
-       else if (RHS(ins, 0) == RHS(ins, 1)) {
+       else if (left == right) {
                mkconst(state, ins, 1);
        }
 }
 
 static void simplify_lfalse(struct compile_state *state, struct triple *ins)
 {
-       if (is_const(RHS(ins, 0))) {
-               ulong_t left;
-               left = read_const(state, ins, &RHS(ins, 0));
-               mkconst(state, ins, left == 0);
+       struct triple *rhs;
+       rhs = RHS(ins, 0);
+
+       if (is_const(rhs)) {
+               mkconst(state, ins, !const_ltrue(state, ins, rhs));
        }
        /* Otherwise if I am the only user... */
-       else if ((RHS(ins, 0)->use->member == ins) && (RHS(ins, 0)->use->next == 0)) {
+       else if ((rhs->use) &&
+               (rhs->use->member == ins) && (rhs->use->next == 0)) {
                int need_copy = 1;
                /* Invert a boolean operation */
-               switch(RHS(ins, 0)->op) {
-               case OP_LTRUE:   RHS(ins, 0)->op = OP_LFALSE;  break;
-               case OP_LFALSE:  RHS(ins, 0)->op = OP_LTRUE;   break;
-               case OP_EQ:      RHS(ins, 0)->op = OP_NOTEQ;   break;
-               case OP_NOTEQ:   RHS(ins, 0)->op = OP_EQ;      break;
-               case OP_SLESS:   RHS(ins, 0)->op = OP_SMOREEQ; break;
-               case OP_ULESS:   RHS(ins, 0)->op = OP_UMOREEQ; break;
-               case OP_SMORE:   RHS(ins, 0)->op = OP_SLESSEQ; break;
-               case OP_UMORE:   RHS(ins, 0)->op = OP_ULESSEQ; break;
-               case OP_SLESSEQ: RHS(ins, 0)->op = OP_SMORE;   break;
-               case OP_ULESSEQ: RHS(ins, 0)->op = OP_UMORE;   break;
-               case OP_SMOREEQ: RHS(ins, 0)->op = OP_SLESS;   break;
-               case OP_UMOREEQ: RHS(ins, 0)->op = OP_ULESS;   break;
+               switch(rhs->op) {
+               case OP_LTRUE:   rhs->op = OP_LFALSE;  break;
+               case OP_LFALSE:  rhs->op = OP_LTRUE;   break;
+               case OP_EQ:      rhs->op = OP_NOTEQ;   break;
+               case OP_NOTEQ:   rhs->op = OP_EQ;      break;
+               case OP_SLESS:   rhs->op = OP_SMOREEQ; break;
+               case OP_ULESS:   rhs->op = OP_UMOREEQ; break;
+               case OP_SMORE:   rhs->op = OP_SLESSEQ; break;
+               case OP_UMORE:   rhs->op = OP_ULESSEQ; break;
+               case OP_SLESSEQ: rhs->op = OP_SMORE;   break;
+               case OP_ULESSEQ: rhs->op = OP_UMORE;   break;
+               case OP_SMOREEQ: rhs->op = OP_SLESS;   break;
+               case OP_UMOREEQ: rhs->op = OP_ULESS;   break;
                default:
                        need_copy = 0;
                        break;
                }
                if (need_copy) {
-                       mkcopy(state, ins, RHS(ins, 0));
+                       mkcopy(state, ins, rhs);
                }
        }
 }
 
 static void simplify_ltrue (struct compile_state *state, struct triple *ins)
 {
-       if (is_const(RHS(ins, 0))) {
-               ulong_t left;
-               left = read_const(state, ins, &RHS(ins, 0));
-               mkconst(state, ins, left != 0);
+       struct triple *rhs;
+       rhs = RHS(ins, 0);
+
+       if (is_const(rhs)) {
+               mkconst(state, ins, const_ltrue(state, ins, rhs));
        }
-       else switch(RHS(ins, 0)->op) {
+       else switch(rhs->op) {
        case OP_LTRUE:   case OP_LFALSE:  case OP_EQ:      case OP_NOTEQ:
        case OP_SLESS:   case OP_ULESS:   case OP_SMORE:   case OP_UMORE:
        case OP_SLESSEQ: case OP_ULESSEQ: case OP_SMOREEQ: case OP_UMOREEQ:
-               mkcopy(state, ins, RHS(ins, 0));
+               mkcopy(state, ins, rhs);
        }
 
 }
@@ -6243,7 +6808,7 @@ static void simplify_copy(struct compile_state *state, struct triple *ins)
                case OP_INTCONST:
                {
                        ulong_t left;
-                       left = read_const(state, ins, &RHS(ins, 0));
+                       left = read_const(state, ins, RHS(ins, 0));
                        mkconst(state, ins, left);
                        break;
                }
@@ -6263,29 +6828,99 @@ static void simplify_copy(struct compile_state *state, struct triple *ins)
        }
 }
 
+static int phi_present(struct block *block)
+{
+       struct triple *ptr;
+       if (!block) {
+               return 0;
+       }
+       ptr = block->first;
+       do {
+               if (ptr->op == OP_PHI) {
+                       return 1;
+               }
+               ptr = ptr->next;
+       } while(ptr != block->last);
+       return 0;
+}
+
+static int phi_dependency(struct block *block)
+{
+       /* A block has a phi dependency if a phi function
+        * depends on that block to exist, and makes a block
+        * that is otherwise useless unsafe to remove.
+        */
+       if (block) {
+               struct block_set *edge;
+               for(edge = block->edges; edge; edge = edge->next) {
+                       if (phi_present(edge->member)) {
+                               return 1;
+                       }
+               }
+       }
+       return 0;
+}
+
+static struct triple *branch_target(struct compile_state *state, struct triple *ins)
+{
+       struct triple *targ;
+       targ = TARG(ins, 0);
+       /* During scc_transform temporary triples are allocated that
+        * loop back onto themselves. If I see one don't advance the
+        * target.
+        */
+       while(triple_is_structural(state, targ) && 
+               (targ->next != targ) && (targ->next != state->first)) {
+               targ = targ->next;
+       }
+       return targ;
+}
+
+
 static void simplify_branch(struct compile_state *state, struct triple *ins)
 {
-       struct block *block;
-       if (ins->op != OP_BRANCH) {
+       int simplified;
+       if ((ins->op != OP_BRANCH) && (ins->op != OP_CBRANCH)) {
                internal_error(state, ins, "not branch");
        }
        if (ins->use != 0) {
                internal_error(state, ins, "branch use");
        }
-#warning "FIXME implement simplify branch."
        /* The challenge here with simplify branch is that I need to 
         * make modifications to the control flow graph as well
-        * as to the branch instruction itself.
+        * as to the branch instruction itself.  That is handled
+        * by rebuilding the basic blocks after simplify all is called.
         */
-       block = ins->u.block;
-       
-       if (TRIPLE_RHS(ins->sizes) && is_const(RHS(ins, 0))) {
+
+       /* If we have a branch to an unconditional branch update
+        * our target.  But watch out for dependencies from phi
+        * functions. 
+        */
+       do {
+               struct triple *targ;
+               simplified = 0;
+               targ = branch_target(state, ins);
+               if ((targ != ins) && (targ->op == OP_BRANCH) && 
+                       !phi_dependency(targ->u.block))
+               {
+                       unuse_triple(TARG(ins, 0), ins);
+                       TARG(ins, 0) = TARG(targ, 0);
+                       use_triple(TARG(ins, 0), ins);
+                       simplified = 1;
+               }
+       } while(simplified);
+
+       /* If we have a conditional branch with a constant condition
+        * make it an unconditional branch.
+        */
+       if ((ins->op == OP_CBRANCH) && is_const(RHS(ins, 0))) {
                struct triple *targ;
                ulong_t value;
-               value = read_const(state, ins, &RHS(ins, 0));
+               value = read_const(state, ins, RHS(ins, 0));
                unuse_triple(RHS(ins, 0), ins);
                targ = TARG(ins, 0);
                ins->sizes = TRIPLE_SIZES(0, 0, 0, 1);
+               ins->op = OP_BRANCH;
                if (value) {
                        unuse_triple(ins->next, ins);
                        TARG(ins, 0) = targ;
@@ -6294,11 +6929,14 @@ static void simplify_branch(struct compile_state *state, struct triple *ins)
                        unuse_triple(targ, ins);
                        TARG(ins, 0) = ins->next;
                }
-#warning "FIXME handle the case of making a branch unconditional"
        }
+       
+       /* If we have a branch to the next instruction
+        * make it a noop.
+        */
        if (TARG(ins, 0) == ins->next) {
                unuse_triple(ins->next, ins);
-               if (TRIPLE_RHS(ins->sizes)) {
+               if (ins->op == OP_CBRANCH) {
                        unuse_triple(RHS(ins, 0), ins);
                        unuse_triple(ins->next, ins);
                }
@@ -6307,28 +6945,86 @@ static void simplify_branch(struct compile_state *state, struct triple *ins)
                if (ins->use) {
                        internal_error(state, ins, "noop use != 0");
                }
-#warning "FIXME handle the case of killing a branch"
+       }
+}
+
+static void simplify_label(struct compile_state *state, struct triple *ins)
+{
+       /* Ignore volatile labels */
+       if (!triple_is_pure(state, ins, ins->id)) {
+               return;
+       }
+       if (ins->use == 0) {
+               ins->op = OP_NOOP;
+       }
+       else if (ins->prev->op == OP_LABEL) {
+               /* In general it is not safe to merge one label that
+                * imediately follows another.  The problem is that the empty
+                * looking block may have phi functions that depend on it.
+                */
+               if (!phi_dependency(ins->prev->u.block)) {
+                       struct triple_set *user, *next;
+                       ins->op = OP_NOOP;
+                       for(user = ins->use; user; user = next) {
+                               struct triple *use, **expr;
+                               next = user->next;
+                               use = user->member;
+                               expr = triple_targ(state, use, 0);
+                               for(;expr; expr = triple_targ(state, use, expr)) {
+                                       if (*expr == ins) {
+                                               *expr = ins->prev;
+                                               unuse_triple(ins, use);
+                                               use_triple(ins->prev, use);
+                                       }
+                                       
+                               }
+                       }
+                       if (ins->use) {
+                               internal_error(state, ins, "noop use != 0");
+                       }
+               }
        }
 }
 
 static void simplify_phi(struct compile_state *state, struct triple *ins)
 {
-       struct triple **expr;
-       ulong_t value;
-       expr = triple_rhs(state, ins, 0);
-       if (!*expr || !is_const(*expr)) {
+       struct triple **slot;
+       struct triple *value;
+       int zrhs, i;
+       ulong_t cvalue;
+       slot = &RHS(ins, 0);
+       zrhs = TRIPLE_RHS(ins->sizes);
+       if (zrhs == 0) {
                return;
        }
-       value = read_const(state, ins, expr);
-       for(;expr;expr = triple_rhs(state, ins, expr)) {
-               if (!*expr || !is_const(*expr)) {
-                       return;
+       /* See if all of the rhs members of a phi have the same value */
+       if (slot[0] && is_simple_const(slot[0])) {
+               cvalue = read_const(state, ins, slot[0]);
+               for(i = 1; i < zrhs; i++) {
+                       if (    !slot[i] ||
+                               !is_simple_const(slot[i]) ||
+                               (cvalue != read_const(state, ins, slot[i]))) {
+                               break;
+                       }
                }
-               if (value != read_const(state, ins, expr)) {
+               if (i == zrhs) {
+                       mkconst(state, ins, cvalue);
                        return;
                }
        }
-       mkconst(state, ins, value);
+       
+       /* See if all of rhs members of a phi are the same */
+       value = slot[0];
+       for(i = 1; i < zrhs; i++) {
+               if (slot[i] != value) {
+                       break;
+               }
+       }
+       if (i == zrhs) {
+               /* If the phi has a single value just copy it */
+               mkcopy(state, ins, value);
+               return;
+       }
 }
 
 
@@ -6336,7 +7032,7 @@ static void simplify_bsf(struct compile_state *state, struct triple *ins)
 {
        if (is_const(RHS(ins, 0))) {
                ulong_t left;
-               left = read_const(state, ins, &RHS(ins, 0));
+               left = read_const(state, ins, RHS(ins, 0));
                mkconst(state, ins, bsf(left));
        }
 }
@@ -6345,149 +7041,93 @@ static void simplify_bsr(struct compile_state *state, struct triple *ins)
 {
        if (is_const(RHS(ins, 0))) {
                ulong_t left;
-               left = read_const(state, ins, &RHS(ins, 0));
+               left = read_const(state, ins, RHS(ins, 0));
                mkconst(state, ins, bsr(left));
        }
 }
 
 
 typedef void (*simplify_t)(struct compile_state *state, struct triple *ins);
-static const simplify_t table_simplify[] = {
-#if 0
-#define simplify_smul     simplify_noop
-#define simplify_umul    simplify_noop
-#define simplify_sdiv    simplify_noop
-#define simplify_udiv    simplify_noop
-#define simplify_smod    simplify_noop
-#define simplify_umod    simplify_noop
-#endif
-#if 0
-#define simplify_add     simplify_noop
-#define simplify_sub     simplify_noop
-#endif
-#if 0
-#define simplify_sl      simplify_noop
-#define simplify_usr     simplify_noop
-#define simplify_ssr     simplify_noop
-#endif
-#if 0
-#define simplify_and     simplify_noop
-#define simplify_xor     simplify_noop
-#define simplify_or      simplify_noop
-#endif
-#if 0
-#define simplify_pos     simplify_noop
-#define simplify_neg     simplify_noop
-#define simplify_invert          simplify_noop
-#endif
-
-#if 0
-#define simplify_eq      simplify_noop
-#define simplify_noteq   simplify_noop
-#endif
-#if 0
-#define simplify_sless   simplify_noop
-#define simplify_uless   simplify_noop
-#define simplify_smore   simplify_noop
-#define simplify_umore   simplify_noop
-#endif
-#if 0
-#define simplify_slesseq  simplify_noop
-#define simplify_ulesseq  simplify_noop
-#define simplify_smoreeq  simplify_noop
-#define simplify_umoreeq  simplify_noop
-#endif
-#if 0
-#define simplify_lfalse          simplify_noop
-#endif
-#if 0
-#define simplify_ltrue   simplify_noop
-#endif
-
-#if 0
-#define simplify_copy    simplify_noop
-#endif
-
-#if 0
-#define simplify_branch          simplify_noop
-#endif
-
-#if 0
-#define simplify_phi     simplify_noop
-#endif
-
-#if 0
-#define simplify_bsf     simplify_noop
-#define simplify_bsr      simplify_noop
-#endif
-
-[OP_SMUL       ] = simplify_smul,
-[OP_UMUL       ] = simplify_umul,
-[OP_SDIV       ] = simplify_sdiv,
-[OP_UDIV       ] = simplify_udiv,
-[OP_SMOD       ] = simplify_smod,
-[OP_UMOD       ] = simplify_umod,
-[OP_ADD        ] = simplify_add,
-[OP_SUB        ] = simplify_sub,
-[OP_SL         ] = simplify_sl,
-[OP_USR        ] = simplify_usr,
-[OP_SSR        ] = simplify_ssr,
-[OP_AND        ] = simplify_and,
-[OP_XOR        ] = simplify_xor,
-[OP_OR         ] = simplify_or,
-[OP_POS        ] = simplify_pos,
-[OP_NEG        ] = simplify_neg,
-[OP_INVERT     ] = simplify_invert,
-
-[OP_EQ         ] = simplify_eq,
-[OP_NOTEQ      ] = simplify_noteq,
-[OP_SLESS      ] = simplify_sless,
-[OP_ULESS      ] = simplify_uless,
-[OP_SMORE      ] = simplify_smore,
-[OP_UMORE      ] = simplify_umore,
-[OP_SLESSEQ    ] = simplify_slesseq,
-[OP_ULESSEQ    ] = simplify_ulesseq,
-[OP_SMOREEQ    ] = simplify_smoreeq,
-[OP_UMOREEQ    ] = simplify_umoreeq,
-[OP_LFALSE     ] = simplify_lfalse,
-[OP_LTRUE      ] = simplify_ltrue,
-
-[OP_LOAD       ] = simplify_noop,
-[OP_STORE      ] = simplify_noop,
-
-[OP_NOOP       ] = simplify_noop,
-
-[OP_INTCONST   ] = simplify_noop,
-[OP_BLOBCONST  ] = simplify_noop,
-[OP_ADDRCONST  ] = simplify_noop,
-
-[OP_WRITE      ] = simplify_noop,
-[OP_READ       ] = simplify_noop,
-[OP_COPY       ] = simplify_copy,
-[OP_PIECE      ] = simplify_noop,
-[OP_ASM        ] = simplify_noop,
-
-[OP_DOT        ] = simplify_noop,
-[OP_VAL_VEC    ] = simplify_noop,
-
-[OP_LIST       ] = simplify_noop,
-[OP_BRANCH     ] = simplify_branch,
-[OP_LABEL      ] = simplify_noop,
-[OP_ADECL      ] = simplify_noop,
-[OP_SDECL      ] = simplify_noop,
-[OP_PHI        ] = simplify_phi,
-
-[OP_INB        ] = simplify_noop,
-[OP_INW        ] = simplify_noop,
-[OP_INL        ] = simplify_noop,
-[OP_OUTB       ] = simplify_noop,
-[OP_OUTW       ] = simplify_noop,
-[OP_OUTL       ] = simplify_noop,
-[OP_BSF        ] = simplify_bsf,
-[OP_BSR        ] = simplify_bsr,
-[OP_RDMSR      ] = simplify_noop,
-[OP_WRMSR      ] = simplify_noop,                    
-[OP_HLT        ] = simplify_noop,
+static const struct simplify_table {
+       simplify_t func;
+       unsigned long flag;
+} table_simplify[] = {
+#define simplify_sdivt    simplify_noop
+#define simplify_udivt    simplify_noop
+#define simplify_piece    simplify_noop
+
+[OP_SDIVT      ] = { simplify_sdivt,    COMPILER_SIMPLIFY_ARITH },
+[OP_UDIVT      ] = { simplify_udivt,   COMPILER_SIMPLIFY_ARITH },
+[OP_SMUL       ] = { simplify_smul,    COMPILER_SIMPLIFY_ARITH },
+[OP_UMUL       ] = { simplify_umul,    COMPILER_SIMPLIFY_ARITH },
+[OP_SDIV       ] = { simplify_sdiv,    COMPILER_SIMPLIFY_ARITH },
+[OP_UDIV       ] = { simplify_udiv,    COMPILER_SIMPLIFY_ARITH },
+[OP_SMOD       ] = { simplify_smod,    COMPILER_SIMPLIFY_ARITH },
+[OP_UMOD       ] = { simplify_umod,    COMPILER_SIMPLIFY_ARITH },
+[OP_ADD        ] = { simplify_add,     COMPILER_SIMPLIFY_ARITH },
+[OP_SUB        ] = { simplify_sub,     COMPILER_SIMPLIFY_ARITH },
+[OP_SL         ] = { simplify_sl,      COMPILER_SIMPLIFY_SHIFT },
+[OP_USR        ] = { simplify_usr,     COMPILER_SIMPLIFY_SHIFT },
+[OP_SSR        ] = { simplify_ssr,     COMPILER_SIMPLIFY_SHIFT },
+[OP_AND        ] = { simplify_and,     COMPILER_SIMPLIFY_BITWISE },
+[OP_XOR        ] = { simplify_xor,     COMPILER_SIMPLIFY_BITWISE },
+[OP_OR         ] = { simplify_or,      COMPILER_SIMPLIFY_BITWISE },
+[OP_POS        ] = { simplify_pos,     COMPILER_SIMPLIFY_ARITH },
+[OP_NEG        ] = { simplify_neg,     COMPILER_SIMPLIFY_ARITH },
+[OP_INVERT     ] = { simplify_invert,  COMPILER_SIMPLIFY_BITWISE },
+
+[OP_EQ         ] = { simplify_eq,      COMPILER_SIMPLIFY_LOGICAL },
+[OP_NOTEQ      ] = { simplify_noteq,   COMPILER_SIMPLIFY_LOGICAL },
+[OP_SLESS      ] = { simplify_sless,   COMPILER_SIMPLIFY_LOGICAL },
+[OP_ULESS      ] = { simplify_uless,   COMPILER_SIMPLIFY_LOGICAL },
+[OP_SMORE      ] = { simplify_smore,   COMPILER_SIMPLIFY_LOGICAL },
+[OP_UMORE      ] = { simplify_umore,   COMPILER_SIMPLIFY_LOGICAL },
+[OP_SLESSEQ    ] = { simplify_slesseq,         COMPILER_SIMPLIFY_LOGICAL },
+[OP_ULESSEQ    ] = { simplify_ulesseq, COMPILER_SIMPLIFY_LOGICAL },
+[OP_SMOREEQ    ] = { simplify_smoreeq, COMPILER_SIMPLIFY_LOGICAL },
+[OP_UMOREEQ    ] = { simplify_umoreeq, COMPILER_SIMPLIFY_LOGICAL },
+[OP_LFALSE     ] = { simplify_lfalse,  COMPILER_SIMPLIFY_LOGICAL },
+[OP_LTRUE      ] = { simplify_ltrue,   COMPILER_SIMPLIFY_LOGICAL },
+
+[OP_LOAD       ] = { simplify_noop,    COMPILER_SIMPLIFY_OP },
+[OP_STORE      ] = { simplify_noop,    COMPILER_SIMPLIFY_OP },
+
+[OP_NOOP       ] = { simplify_noop,    COMPILER_SIMPLIFY_OP },
+
+[OP_INTCONST   ] = { simplify_noop,    COMPILER_SIMPLIFY_OP },
+[OP_BLOBCONST  ] = { simplify_noop,    COMPILER_SIMPLIFY_OP },
+[OP_ADDRCONST  ] = { simplify_noop,    COMPILER_SIMPLIFY_OP },
+
+[OP_WRITE      ] = { simplify_noop,    COMPILER_SIMPLIFY_OP },
+[OP_READ       ] = { simplify_noop,    COMPILER_SIMPLIFY_OP },
+[OP_COPY       ] = { simplify_copy,    COMPILER_SIMPLIFY_COPY },
+[OP_PIECE      ] = { simplify_piece,   COMPILER_SIMPLIFY_OP },
+[OP_ASM        ] = { simplify_noop,    COMPILER_SIMPLIFY_OP },
+
+[OP_DOT        ] = { simplify_noop,    COMPILER_SIMPLIFY_OP },
+[OP_VAL_VEC    ] = { simplify_noop,    COMPILER_SIMPLIFY_OP },
+
+[OP_LIST       ] = { simplify_noop,    COMPILER_SIMPLIFY_OP },
+[OP_BRANCH     ] = { simplify_branch,  COMPILER_SIMPLIFY_BRANCH },
+[OP_CBRANCH    ] = { simplify_branch,  COMPILER_SIMPLIFY_BRANCH },
+[OP_CALL       ] = { simplify_noop,    COMPILER_SIMPLIFY_BRANCH },
+[OP_RET        ] = { simplify_noop,    COMPILER_SIMPLIFY_BRANCH },
+[OP_LABEL      ] = { simplify_label,   COMPILER_SIMPLIFY_LABEL },
+[OP_ADECL      ] = { simplify_noop,    COMPILER_SIMPLIFY_OP },
+[OP_SDECL      ] = { simplify_noop,    COMPILER_SIMPLIFY_OP },
+[OP_PHI        ] = { simplify_phi,     COMPILER_SIMPLIFY_PHI },
+
+[OP_INB        ] = { simplify_noop,    COMPILER_SIMPLIFY_OP },
+[OP_INW        ] = { simplify_noop,    COMPILER_SIMPLIFY_OP },
+[OP_INL        ] = { simplify_noop,    COMPILER_SIMPLIFY_OP },
+[OP_OUTB       ] = { simplify_noop,    COMPILER_SIMPLIFY_OP },
+[OP_OUTW       ] = { simplify_noop,    COMPILER_SIMPLIFY_OP },
+[OP_OUTL       ] = { simplify_noop,    COMPILER_SIMPLIFY_OP },
+[OP_BSF        ] = { simplify_bsf,     COMPILER_SIMPLIFY_OP },
+[OP_BSR        ] = { simplify_bsr,     COMPILER_SIMPLIFY_OP },
+[OP_RDMSR      ] = { simplify_noop,    COMPILER_SIMPLIFY_OP },
+[OP_WRMSR      ] = { simplify_noop,     COMPILER_SIMPLIFY_OP },               
+[OP_HLT        ] = { simplify_noop,     COMPILER_SIMPLIFY_OP },
 };
 
 static void simplify(struct compile_state *state, struct triple *ins)
@@ -6500,9 +7140,13 @@ static void simplify(struct compile_state *state, struct triple *ins)
                if ((op < 0) || (op > sizeof(table_simplify)/sizeof(table_simplify[0]))) {
                        do_simplify = 0;
                }
+               else if (state->compiler->flags & table_simplify[op].flag) {
+                       do_simplify = table_simplify[op].func;
+               }
                else {
-                       do_simplify = table_simplify[op];
+                       do_simplify = simplify_noop;
                }
+       
                if (!do_simplify) {
                        internal_error(state, ins, "cannot simplify op: %d %s\n",
                                op, tops(op));
@@ -6512,15 +7156,28 @@ static void simplify(struct compile_state *state, struct triple *ins)
        } while(ins->op != op);
 }
 
+static void rebuild_ssa_form(struct compile_state *state);
+
 static void simplify_all(struct compile_state *state)
 {
        struct triple *ins, *first;
-       first = RHS(state->main_function, 0);
+       if (!(state->compiler->flags & COMPILER_SIMPLIFY)) {
+               return;
+       }
+       first = state->first;
+       ins = first->prev;
+       do {
+               simplify(state, ins);
+               ins = ins->prev;
+       } while(ins != first->prev);
        ins = first;
        do {
                simplify(state, ins);
                ins = ins->next;
-       } while(ins != first);
+       }while(ins != first);
+       rebuild_ssa_form(state);
+
+       print_blocks(state, __func__, stdout);
 }
 
 /*
@@ -6532,7 +7189,7 @@ static void register_builtin_function(struct compile_state *state,
        const char *name, int op, struct type *rtype, ...)
 {
        struct type *ftype, *atype, *param, **next;
-       struct triple *def, *arg, *result, *work, *last, *first;
+       struct triple *def, *arg, *result, *work, *last, *first, *retvar, *ret;
        struct hash_entry *ident;
        struct file_state file;
        int parameters;
@@ -6558,7 +7215,7 @@ static void register_builtin_function(struct compile_state *state,
        }
 
        /* Find the function type */
-       ftype = new_type(TYPE_FUNCTION, rtype, 0);
+       ftype = new_type(TYPE_FUNCTION | STOR_INLINE | STOR_STATIC, rtype, 0);
        next = &ftype->right;
        va_start(args, rtype);
        for(i = 0; i < parameters; i++) {
@@ -6579,6 +7236,9 @@ static void register_builtin_function(struct compile_state *state,
        def = triple(state, OP_LIST, ftype, 0, 0);
        first = label(state);
        RHS(def, 0) = first;
+       retvar = variable(state, &void_ptr_type);
+       retvar = flatten(state, first, retvar);
+       ret = triple(state, OP_RET, &void_type, read_expr(state, retvar), 0);
 
        /* Now string them together */
        param = ftype->right;
@@ -6597,7 +7257,7 @@ static void register_builtin_function(struct compile_state *state,
        }
        MISC(def, 0) = result;
        work = new_triple(state, op, rtype, -1, parameters);
-       for(i = 0, arg = first->next; i < parameters; i++, arg = arg->next) {
+       for(i = 0, arg = first->next->next; i < parameters; i++, arg = arg->next) {
                RHS(work, i) = read_expr(state, arg);
        }
        if (result && ((rtype->type & TYPE_MASK) == TYPE_STRUCT)) {
@@ -6632,19 +7292,27 @@ static void register_builtin_function(struct compile_state *state,
        }
        work = flatten(state, first, work);
        last = flatten(state, first, label(state));
+       ret  = flatten(state, first, ret);
        name_len = strlen(name);
        ident = lookup(state, name, name_len);
+       ftype->type_ident = ident;
        symbol(state, ident, &ident->sym_ident, def, ftype);
        
        state->file = file.prev;
        state->function = 0;
-#if 0
-       fprintf(stdout, "\n");
-       loc(stdout, state, 0);
-       fprintf(stdout, "\n__________ builtin_function _________\n");
-       print_triple(state, def);
-       fprintf(stdout, "__________ builtin_function _________ done\n\n");
-#endif
+       
+       if (!state->functions) {
+               state->functions = def;
+       } else {
+               insert_triple(state, state->functions, def);
+       }
+       if (state->compiler->debug & DEBUG_INLINE) {
+               fprintf(stdout, "\n");
+               loc(stdout, state, 0);
+               fprintf(stdout, "\n__________ %s _________\n", __FUNCTION__);
+               display_func(stdout, def);
+               fprintf(stdout, "__________ %s _________ done\n\n", __FUNCTION__);
+       }
 }
 
 static struct type *partial_struct(struct compile_state *state,
@@ -6685,7 +7353,7 @@ static struct type *register_builtin_type(struct compile_state *state,
                        field = field->right;
                }
                elements++;
-               symbol(state, ident, &ident->sym_struct, 0, type);
+               symbol(state, ident, &ident->sym_tag, 0, type);
                type->type_ident = ident;
                type->elements = elements;
        }
@@ -6697,8 +7365,32 @@ static struct type *register_builtin_type(struct compile_state *state,
 
 static void register_builtins(struct compile_state *state)
 {
+       struct type *div_type, *ldiv_type;
+       struct type *udiv_type, *uldiv_type;
        struct type *msr_type;
 
+       div_type = register_builtin_type(state, "__builtin_div_t",
+               partial_struct(state, "quot", &int_type,
+               partial_struct(state, "rem",  &int_type, 0)));
+       ldiv_type = register_builtin_type(state, "__builtin_ldiv_t",
+               partial_struct(state, "quot", &long_type,
+               partial_struct(state, "rem",  &long_type, 0)));
+       udiv_type = register_builtin_type(state, "__builtin_udiv_t",
+               partial_struct(state, "quot", &uint_type,
+               partial_struct(state, "rem",  &uint_type, 0)));
+       uldiv_type = register_builtin_type(state, "__builtin_uldiv_t",
+               partial_struct(state, "quot", &ulong_type,
+               partial_struct(state, "rem",  &ulong_type, 0)));
+
+       register_builtin_function(state, "__builtin_div",   OP_SDIVT, div_type,
+               &int_type, &int_type);
+       register_builtin_function(state, "__builtin_ldiv",  OP_SDIVT, ldiv_type,
+               &long_type, &long_type);
+       register_builtin_function(state, "__builtin_udiv",  OP_UDIVT, udiv_type,
+               &uint_type, &uint_type);
+       register_builtin_function(state, "__builtin_uldiv", OP_UDIVT, uldiv_type,
+               &ulong_type, &ulong_type);
+
        register_builtin_function(state, "__builtin_inb", OP_INB, &uchar_type, 
                &ushort_type);
        register_builtin_function(state, "__builtin_inw", OP_INW, &ushort_type,
@@ -6760,10 +7452,17 @@ static struct triple *call_expr(
        eat(state, TOK_LPAREN);
        /* Find the return type without any specifiers */
        type = clone_type(0, func->type->left);
-       def = new_triple(state, OP_CALL, func->type, -1, -1);
-       def->type = type;
-
-       pvals = TRIPLE_RHS(def->sizes);
+       /* Count the number of rhs entries for OP_FCALL */
+       param = func->type->right;
+       pvals = 0;
+       while((param->type & TYPE_MASK) == TYPE_PRODUCT) {
+               pvals++;
+               param = param->right;
+       }
+       if ((param->type & TYPE_MASK) != TYPE_VOID) {
+               pvals++;
+       }
+       def = new_triple(state, OP_FCALL, type, -1, pvals);
        MISC(def, 0) = func;
 
        param = func->type->right;
@@ -6863,7 +7562,7 @@ static struct triple *integer_constant(struct compile_state *state)
        errno = 0;
        decimal = (tk->val.str[0] != '0');
        val = strtoul(tk->val.str, &end, 0);
-       if ((val == ULONG_MAX) && (errno == ERANGE)) {
+       if ((val > ULONG_T_MAX) || ((val == ULONG_MAX) && (errno == ERANGE))) {
                error(state, 0, "Integer constant to large");
        }
        u = l = 0;
@@ -6887,25 +7586,25 @@ static struct triple *integer_constant(struct compile_state *state)
        }
        else if (l) {
                type = &long_type;
-               if (!decimal && (val > LONG_MAX)) {
+               if (!decimal && (val > LONG_T_MAX)) {
                        type = &ulong_type;
                }
        }
        else if (u) {
                type = &uint_type;
-               if (val > UINT_MAX) {
+               if (val > UINT_T_MAX) {
                        type = &ulong_type;
                }
        }
        else {
                type = &int_type;
-               if (!decimal && (val > INT_MAX) && (val <= UINT_MAX)) {
+               if (!decimal && (val > INT_T_MAX) && (val <= UINT_T_MAX)) {
                        type = &uint_type;
                }
-               else if (!decimal && (val > LONG_MAX)) {
+               else if (!decimal && (val > LONG_T_MAX)) {
                        type = &ulong_type;
                }
-               else if (val > INT_MAX) {
+               else if (val > INT_T_MAX) {
                        type = &long_type;
                }
        }
@@ -6925,7 +7624,6 @@ static struct triple *primary_expr(struct compile_state *state)
                /* Here ident is either:
                 * a varable name
                 * a function name
-                * an enumeration constant.
                 */
                eat(state, TOK_IDENT);
                ident = state->token[0].ident;
@@ -6936,11 +7634,17 @@ static struct triple *primary_expr(struct compile_state *state)
                break;
        }
        case TOK_ENUM_CONST:
+       {
+               struct hash_entry *ident;
                /* Here ident is an enumeration constant */
                eat(state, TOK_ENUM_CONST);
-               def = 0;
-               FINISHME();
+               ident = state->token[0].ident;
+               if (!ident->sym_ident) {
+                       error(state, 0, "%s undeclared", ident->name);
+               }
+               def = ident->sym_ident->def;
                break;
+       }
        case TOK_LPAREN:
                eat(state, TOK_LPAREN);
                def = expr(state);
@@ -7131,8 +7835,7 @@ static struct triple *cast_expr(struct compile_state *state)
                eat(state, TOK_LPAREN);
                type = type_name(state);
                eat(state, TOK_RPAREN);
-               def = read_expr(state, cast_expr(state));
-               def = triple(state, OP_COPY, type, def, 0);
+               def = mk_cast_expr(state, type, cast_expr(state));
        }
        else {
                def = unary_expr(state);
@@ -7425,25 +8128,31 @@ static struct triple *eval_const_expr(
        struct compile_state *state, struct triple *expr)
 {
        struct triple *def;
-       struct triple *head, *ptr;
-       head = label(state); /* dummy initial triple */
-       flatten(state, head, expr);
-       for(ptr = head->next; ptr != head; ptr = ptr->next) {
-               simplify(state, ptr);
-       }
-       /* Remove the constant value the tail of the list */
-       def = head->prev;
-       def->prev->next = def->next;
-       def->next->prev = def->prev;
-       def->next = def->prev = def;
-       if (!is_const(def)) {
-               internal_error(state, 0, "Not a constant expression");
-       }
-       /* Free the intermediate expressions */
-       while(head->next != head) {
-               release_triple(state, head->next);
+       if (is_const(expr)) {
+               def = expr;
+       } 
+       else {
+               /* If we don't start out as a constant simplify into one */
+               struct triple *head, *ptr;
+               head = label(state); /* dummy initial triple */
+               flatten(state, head, expr);
+               for(ptr = head->next; ptr != head; ptr = ptr->next) {
+                       simplify(state, ptr);
+               }
+               /* Remove the constant value the tail of the list */
+               def = head->prev;
+               def->prev->next = def->next;
+               def->next->prev = def->prev;
+               def->next = def->prev = def;
+               if (!is_const(def)) {
+                       error(state, 0, "Not a constant expression");
+               }
+               /* Free the intermediate expressions */
+               while(head->next != head) {
+                       release_triple(state, head->next);
+               }
+               free_triple(state, head);
        }
-       free_triple(state, head);
        return def;
 }
 
@@ -7747,7 +8456,7 @@ static void return_statement(struct compile_state *state, struct triple *first)
        /* Find the return variable */
        var = MISC(state->main_function, 0);
        /* Find the return destination */
-       dest = RHS(state->main_function, 0)->prev;
+       dest = state->i_return->sym_ident->def;
        mv = jmp = 0;
        /* If needed generate a jump instruction */
        if (!last) {
@@ -7793,55 +8502,157 @@ static void continue_statement(struct compile_state *state, struct triple *first
 
 static void goto_statement(struct compile_state *state, struct triple *first)
 {
-       FINISHME();
+       struct hash_entry *ident;
        eat(state, TOK_GOTO);
        eat(state, TOK_IDENT);
+       ident = state->token[0].ident;
+       if (!ident->sym_label) {
+               /* If this is a forward branch allocate the label now,
+                * it will be flattend in the appropriate location later.
+                */
+               struct triple *ins;
+               ins = label(state);
+               label_symbol(state, ident, ins);
+       }
        eat(state, TOK_SEMI);
-       error(state, 0, "goto is not implemeted");
-       FINISHME();
+
+       flatten(state, first, branch(state, ident->sym_label->def, 0));
 }
 
 static void labeled_statement(struct compile_state *state, struct triple *first)
 {
-       FINISHME();
+       struct triple *ins;
+       struct hash_entry *ident;
        eat(state, TOK_IDENT);
+
+       ident = state->token[0].ident;
+       if (ident->sym_label && ident->sym_label->def) {
+               ins = ident->sym_label->def;
+               put_occurance(ins->occurance);
+               ins->occurance = new_occurance(state);
+       }
+       else {
+               ins = label(state);
+               label_symbol(state, ident, ins);
+       }
+       if (ins->id & TRIPLE_FLAG_FLATTENED) {
+               error(state, 0, "label %s already defined", ident->name);
+       }
+       flatten(state, first, ins);
+
        eat(state, TOK_COLON);
        statement(state, first);
-       error(state, 0, "labeled statements are not implemented");
-       FINISHME();
 }
 
 static void switch_statement(struct compile_state *state, struct triple *first)
 {
-       FINISHME();
+       struct triple *value, *top, *end, *dbranch;
+       struct hash_entry *ident;
+
+       /* See if we have a valid switch statement */
        eat(state, TOK_SWITCH);
        eat(state, TOK_LPAREN);
-       expr(state);
+       value = expr(state);
+       integral(state, value);
+       value = read_expr(state, value);
        eat(state, TOK_RPAREN);
+       /* Generate the needed pieces */
+       top = label(state);
+       end = label(state);
+       dbranch = branch(state, end, 0);
+       /* Remember where case branches and break goes */
+       start_scope(state);
+       ident = state->i_switch;
+       symbol(state, ident, &ident->sym_ident, value, value->type);
+       ident = state->i_case;
+       symbol(state, ident, &ident->sym_ident, top, top->type);
+       ident = state->i_break;
+       symbol(state, ident, &ident->sym_ident, end, end->type);
+       ident = state->i_default;
+       symbol(state, ident, &ident->sym_ident, dbranch, dbranch->type);
+       /* Thread them together */
+       flatten(state, first, value);
+       flatten(state, first, top);
+       flatten(state, first, dbranch);
        statement(state, first);
-       error(state, 0, "switch statements are not implemented");
-       FINISHME();
+       flatten(state, first, end);
+       /* Cleanup the switch scope */
+       end_scope(state);
 }
 
 static void case_statement(struct compile_state *state, struct triple *first)
 {
-       FINISHME();
+       struct triple *cvalue, *dest, *test, *jmp;
+       struct triple *ptr, *value, *top, *dbranch;
+
+       /* See if w have a valid case statement */
        eat(state, TOK_CASE);
-       constant_expr(state);
+       cvalue = constant_expr(state);
+       integral(state, cvalue);
+       if (cvalue->op != OP_INTCONST) {
+               error(state, 0, "integer constant expected");
+       }
        eat(state, TOK_COLON);
+       if (!state->i_case->sym_ident) {
+               error(state, 0, "case statement not within a switch");
+       }
+
+       /* Lookup the interesting pieces */
+       top = state->i_case->sym_ident->def;
+       value = state->i_switch->sym_ident->def;
+       dbranch = state->i_default->sym_ident->def;
+
+       /* See if this case label has already been used */
+       for(ptr = top; ptr != dbranch; ptr = ptr->next) {
+               if (ptr->op != OP_EQ) {
+                       continue;
+               }
+               if (RHS(ptr, 1)->u.cval == cvalue->u.cval) {
+                       error(state, 0, "duplicate case %d statement",
+                               cvalue->u.cval);
+               }
+       }
+       /* Generate the needed pieces */
+       dest = label(state);
+       test = triple(state, OP_EQ, &int_type, value, cvalue);
+       jmp = branch(state, dest, test);
+       /* Thread the pieces together */
+       flatten(state, dbranch, test);
+       flatten(state, dbranch, jmp);
+       flatten(state, dbranch, label(state));
+       flatten(state, first, dest);
        statement(state, first);
-       error(state, 0, "case statements are not implemented");
-       FINISHME();
 }
 
 static void default_statement(struct compile_state *state, struct triple *first)
 {
-       FINISHME();
+       struct triple *dest;
+       struct triple *dbranch, *end;
+
+       /* See if we have a valid default statement */
        eat(state, TOK_DEFAULT);
        eat(state, TOK_COLON);
+
+       if (!state->i_case->sym_ident) {
+               error(state, 0, "default statement not within a switch");
+       }
+
+       /* Lookup the interesting pieces */
+       dbranch = state->i_default->sym_ident->def;
+       end = state->i_break->sym_ident->def;
+
+       /* See if a default statement has already happened */
+       if (TARG(dbranch, 0) != end) {
+               error(state, 0, "duplicate default statement");
+       }
+
+       /* Generate the needed pieces */
+       dest = label(state);
+
+       /* Thread the pieces together */
+       TARG(dbranch, 0) = dest;
+       flatten(state, first, dest);
        statement(state, first);
-       error(state, 0, "default statements are not implemented");
-       FINISHME();
 }
 
 static void asm_statement(struct compile_state *state, struct triple *first)
@@ -7950,7 +8761,6 @@ static void asm_statement(struct compile_state *state, struct triple *first)
                                error(state, 0, "Maximum clobber limit exceeded.");
                        }
                        clobber = string_constant(state);
-                       eat(state, TOK_RPAREN);
 
                        clob_param[clobbers].constraint = clobber;
                        if (peek(state) == TOK_COMMA) {
@@ -8018,20 +8828,21 @@ static void asm_statement(struct compile_state *state, struct triple *first)
                RHS(def, i) = read_expr(state,in_param[i].expr);
        }
        flatten(state, first, def);
-       for(i = 0; i < out; i++) {
+       for(i = 0; i < (out + clobbers); i++) {
+               struct type *type;
                struct triple *piece;
-               piece = triple(state, OP_PIECE, out_param[i].expr->type, def, 0);
+               type = (i < out)? out_param[i].expr->type : &void_type;
+               piece = triple(state, OP_PIECE, type, def, 0);
                piece->u.cval = i;
                LHS(def, i) = piece;
-               flatten(state, first,
-                       write_expr(state, out_param[i].expr, piece));
+               flatten(state, first, piece);
        }
-       for(; i - out < clobbers; i++) {
+       /* And write the helpers to their destinations */
+       for(i = 0; i < out; i++) {
                struct triple *piece;
-               piece = triple(state, OP_PIECE, &void_type, def, 0);
-               piece->u.cval = i;
-               LHS(def, i) = piece;
-               flatten(state, first, piece);
+               piece = LHS(def, i);
+               flatten(state, first,
+                       write_expr(state, out_param[i].expr, piece));
        }
 }
 
@@ -8151,7 +8962,7 @@ static struct type *param_decl(struct compile_state *state)
 static struct type *param_type_list(struct compile_state *state, struct type *type)
 {
        struct type *ftype, **next;
-       ftype = new_type(TYPE_FUNCTION, type, param_decl(state));
+       ftype = new_type(TYPE_FUNCTION | (type->type & STOR_MASK), type, param_decl(state));
        next = &ftype->right;
        while(peek(state) == TOK_COMMA) {
                eat(state, TOK_COMMA);
@@ -8284,56 +9095,93 @@ static struct type *typedef_name(
 }
 
 static struct type *enum_specifier(
-       struct compile_state *state, unsigned int specifiers)
+       struct compile_state *state, unsigned int spec)
 {
+       struct hash_entry *ident;
+       ulong_t base;
        int tok;
-       struct type *type;
-       type = 0;
-       FINISHME();
+       struct type *enum_type;
+       enum_type = 0;
+       ident = 0;
        eat(state, TOK_ENUM);
        tok = peek(state);
-       if (tok == TOK_IDENT) {
-               eat(state, TOK_IDENT);
+       if ((tok == TOK_IDENT) || (tok == TOK_ENUM_CONST) || (tok == TOK_TYPE_NAME)) {
+               eat(state, tok);
+               ident = state->token[0].ident;
+               
        }
-       if ((tok != TOK_IDENT) || (peek(state) == TOK_LBRACE)) {
+       base = 0;
+       if (!ident || (peek(state) == TOK_LBRACE)) {
+               struct type **next;
                eat(state, TOK_LBRACE);
+               enum_type = new_type(TYPE_ENUM | spec, 0, 0);
+               enum_type->type_ident = ident;
+               next = &enum_type->right;
                do {
+                       struct hash_entry *eident;
+                       struct triple *value;
+                       struct type *entry;
                        eat(state, TOK_IDENT);
+                       eident = state->token[0].ident;
+                       if (eident->sym_ident) {
+                               error(state, 0, "%s already declared", 
+                                       eident->name);
+                       }
+                       eident->tok = TOK_ENUM_CONST;
                        if (peek(state) == TOK_EQ) {
+                               struct triple *val;
                                eat(state, TOK_EQ);
-                               constant_expr(state);
-                       }
+                               val = constant_expr(state);
+                               integral(state, val);
+                               base = val->u.cval;
+                       }
+                       value = int_const(state, &int_type, base);
+                       symbol(state, eident, &eident->sym_ident, value, &int_type);
+                       entry = new_type(TYPE_LIST, 0, 0);
+                       entry->field_ident = eident;
+                       *next = entry;
+                       next = &entry->right;
+                       base += 1;
                        if (peek(state) == TOK_COMMA) {
                                eat(state, TOK_COMMA);
                        }
                } while(peek(state) != TOK_RBRACE);
                eat(state, TOK_RBRACE);
+               if (ident) {
+                       symbol(state, ident, &ident->sym_tag, 0, enum_type);
+               }
        }
-       FINISHME();
-       return type;
+       if (ident && ident->sym_tag &&
+               ident->sym_tag->type &&
+               ((ident->sym_tag->type->type & TYPE_MASK) == TYPE_ENUM)) {
+               enum_type = clone_type(spec, ident->sym_tag->type);
+       }
+       else if (ident && !enum_type) {
+               error(state, 0, "enum %s undeclared", ident->name);
+       }
+       return enum_type;
 }
 
-#if 0
 static struct type *struct_declarator(
        struct compile_state *state, struct type *type, struct hash_entry **ident)
 {
        int tok;
-#warning "struct_declarator is complicated because of bitfields, kill them?"
        tok = peek(state);
        if (tok != TOK_COLON) {
                type = declarator(state, type, ident, 1);
        }
        if ((tok == TOK_COLON) || (peek(state) == TOK_COLON)) {
+               struct triple *value;
                eat(state, TOK_COLON);
-               constant_expr(state);
+               value = constant_expr(state);
+#warning "FIXME implement bitfields to reduce register usage"
+               error(state, 0, "bitfields not yet implemented");
        }
-       FINISHME();
        return type;
 }
-#endif
 
 static struct type *struct_or_union_specifier(
-       struct compile_state *state, unsigned int specifiers)
+       struct compile_state *state, unsigned int spec)
 {
        struct type *struct_type;
        struct hash_entry *ident;
@@ -8357,25 +9205,25 @@ static struct type *struct_or_union_specifier(
                break;
        }
        tok = peek(state);
-       if ((tok == TOK_IDENT) || (tok == TOK_TYPE_NAME)) {
+       if ((tok == TOK_IDENT) || (tok == TOK_ENUM_CONST) || (tok == TOK_TYPE_NAME)) {
                eat(state, tok);
                ident = state->token[0].ident;
        }
        if (!ident || (peek(state) == TOK_LBRACE)) {
                ulong_t elements;
+               struct type **next;
                elements = 0;
                eat(state, TOK_LBRACE);
+               next = &struct_type;
                do {
                        struct type *base_type;
-                       struct type **next;
                        int done;
                        base_type = specifier_qualifier_list(state);
-                       next = &struct_type;
                        do {
                                struct type *type;
                                struct hash_entry *fident;
                                done = 1;
-                               type = declarator(state, base_type, &fident, 1);
+                               type = struct_declarator(state, base_type, &fident);
                                elements++;
                                if (peek(state) == TOK_COMMA) {
                                        done = 0;
@@ -8393,15 +9241,19 @@ static struct type *struct_or_union_specifier(
                        eat(state, TOK_SEMI);
                } while(peek(state) != TOK_RBRACE);
                eat(state, TOK_RBRACE);
-               struct_type = new_type(TYPE_STRUCT, struct_type, 0);
+               struct_type = new_type(TYPE_STRUCT | spec, struct_type, 0);
                struct_type->type_ident = ident;
                struct_type->elements = elements;
-               symbol(state, ident, &ident->sym_struct, 0, struct_type);
+               if (ident) {
+                       symbol(state, ident, &ident->sym_tag, 0, struct_type);
+               }
        }
-       if (ident && ident->sym_struct) {
-               struct_type = ident->sym_struct->type;
+       if (ident && ident->sym_tag && 
+               ident->sym_tag->type && 
+               ((ident->sym_tag->type->type & TYPE_MASK) == TYPE_STRUCT)) {
+               struct_type = clone_type(spec, ident->sym_tag->type);
        }
-       else if (ident && !ident->sym_struct) {
+       else if (ident && !struct_type) {
                error(state, 0, "struct %s undeclared", ident->name);
        }
        return struct_type;
@@ -8433,7 +9285,7 @@ static unsigned int storage_class_specifier_opt(struct compile_state *state)
                break;
        default:
                if (state->scope_depth <= GLOBAL_SCOPE_DEPTH) {
-                       specifiers = STOR_STATIC;
+                       specifiers = STOR_LOCAL;
                }
                else {
                        specifiers = STOR_AUTO;
@@ -8731,113 +9583,165 @@ static struct type *decl_specifiers(struct compile_state *state)
        return type;
 }
 
-static unsigned designator(struct compile_state *state)
+struct field_info {
+       struct type *type;
+       size_t offset;
+};
+
+static struct field_info designator(struct compile_state *state, struct type *type)
 {
        int tok;
-       unsigned index;
-       index = -1U;
+       struct field_info info;
+       info.offset = ~0U;
+       info.type = 0;
        do {
                switch(peek(state)) {
                case TOK_LBRACKET:
                {
                        struct triple *value;
+                       if ((type->type & TYPE_MASK) != TYPE_ARRAY) {
+                               error(state, 0, "Array designator not in array initializer");
+                       }
                        eat(state, TOK_LBRACKET);
                        value = constant_expr(state);
                        eat(state, TOK_RBRACKET);
-                       index = value->u.cval;
+
+                       info.type = type->left;
+                       info.offset = value->u.cval * size_of(state, info.type);
                        break;
                }
                case TOK_DOT:
+               {
+                       struct hash_entry *field;
+                       if ((type->type & TYPE_MASK) != TYPE_STRUCT) {
+                               error(state, 0, "Struct designator not in struct initializer");
+                       }
                        eat(state, TOK_DOT);
                        eat(state, TOK_IDENT);
-                       error(state, 0, "Struct Designators not currently supported");
+                       field = state->token[0].ident;
+                       info.offset = field_offset(state, type, field);
+                       info.type   = field_type(state, type, field);
                        break;
+               }
                default:
                        error(state, 0, "Invalid designator");
                }
                tok = peek(state);
        } while((tok == TOK_LBRACKET) || (tok == TOK_DOT));
        eat(state, TOK_EQ);
-       return index;
+       return info;
 }
 
 static struct triple *initializer(
        struct compile_state *state, struct type *type)
 {
        struct triple *result;
+#warning "FIXME more consistent initializer handling (where should eval_const_expr go?"
        if (peek(state) != TOK_LBRACE) {
                result = assignment_expr(state);
+               if (((type->type & TYPE_MASK) == TYPE_ARRAY) &&
+                       (type->elements == ELEMENT_COUNT_UNSPECIFIED) &&
+                       ((result->type->type & TYPE_MASK) == TYPE_ARRAY) &&
+                       (result->type->elements != ELEMENT_COUNT_UNSPECIFIED) &&
+                       (equiv_types(type->left, result->type->left))) {
+                       type->elements = result->type->elements;
+               }
+               if (is_stable(state, result) && 
+                       ((result->type->type & TYPE_MASK) == TYPE_ARRAY) &&
+                       (type->type & TYPE_MASK) != TYPE_ARRAY)
+               {
+                       result = array_to_pointer(state, result);
+               }
+               if (!is_init_compatible(state, type, result->type)) {
+                       error(state, 0, "Incompatible types in initializer");
+               }
+               if (!equiv_types(type, result->type)) {
+                       result = mk_cast_expr(state, type, result);
+               }
        }
        else {
                int comma;
-               unsigned index, max_index;
+               size_t max_offset;
+               struct field_info info;
                void *buf;
-               max_index = index = 0;
-               if ((type->type & TYPE_MASK) == TYPE_ARRAY) {
-                       max_index = type->elements;
-                       if (type->elements == ELEMENT_COUNT_UNSPECIFIED) {
-                               type->elements = 0;
-                       }
+               if (((type->type & TYPE_MASK) != TYPE_ARRAY) &&
+                       ((type->type & TYPE_MASK) != TYPE_STRUCT)) {
+                       internal_error(state, 0, "unknown initializer type");
+               }
+               info.offset = 0;
+               info.type = type->left;
+               if ((type->type & TYPE_MASK) == TYPE_STRUCT) {
+                       info.type = next_field(state, type, 0);
+               }
+               if (type->elements == ELEMENT_COUNT_UNSPECIFIED) {
+                       max_offset = 0;
                } else {
-                       error(state, 0, "Struct initializers not currently supported");
+                       max_offset = size_of(state, type);
                }
-               buf = xcmalloc(size_of(state, type), "initializer");
+               buf = xcmalloc(max_offset, "initializer");
                eat(state, TOK_LBRACE);
                do {
                        struct triple *value;
                        struct type *value_type;
                        size_t value_size;
+                       void *dest;
                        int tok;
                        comma = 0;
                        tok = peek(state);
                        if ((tok == TOK_LBRACKET) || (tok == TOK_DOT)) {
-                               index = designator(state);
+                               info = designator(state, type);
                        }
-                       if ((max_index != ELEMENT_COUNT_UNSPECIFIED) &&
-                               (index > max_index)) {
+                       if ((type->elements != ELEMENT_COUNT_UNSPECIFIED) &&
+                               (info.offset >= max_offset)) {
                                error(state, 0, "element beyond bounds");
                        }
-                       value_type = 0;
-                       if ((type->type & TYPE_MASK) == TYPE_ARRAY) {
-                               value_type = type->left;
-                       }
+                       value_type = info.type;
                        value = eval_const_expr(state, initializer(state, value_type));
                        value_size = size_of(state, value_type);
                        if (((type->type & TYPE_MASK) == TYPE_ARRAY) &&
-                               (max_index == ELEMENT_COUNT_UNSPECIFIED) &&
-                               (type->elements <= index)) {
+                               (type->elements == ELEMENT_COUNT_UNSPECIFIED) &&
+                               (max_offset <= info.offset)) {
                                void *old_buf;
                                size_t old_size;
                                old_buf = buf;
-                               old_size = size_of(state, type);
-                               type->elements = index + 1;
-                               buf = xmalloc(size_of(state, type), "initializer");
+                               old_size = max_offset;
+                               max_offset = info.offset + value_size;
+                               buf = xmalloc(max_offset, "initializer");
                                memcpy(buf, old_buf, old_size);
                                xfree(old_buf);
                        }
+                       dest = ((char *)buf) + info.offset;
                        if (value->op == OP_BLOBCONST) {
-                               memcpy((char *)buf + index * value_size, value->u.blob, value_size);
+                               memcpy(dest, value->u.blob, value_size);
                        }
                        else if ((value->op == OP_INTCONST) && (value_size == 1)) {
-                               *(((uint8_t *)buf) + index) = value->u.cval & 0xff;
+                               *((uint8_t *)dest) = value->u.cval & 0xff;
                        }
                        else if ((value->op == OP_INTCONST) && (value_size == 2)) {
-                               *(((uint16_t *)buf) + index) = value->u.cval & 0xffff;
+                               *((uint16_t *)dest) = value->u.cval & 0xffff;
                        }
                        else if ((value->op == OP_INTCONST) && (value_size == 4)) {
-                               *(((uint32_t *)buf) + index) = value->u.cval & 0xffffffff;
+                               *((uint32_t *)dest) = value->u.cval & 0xffffffff;
                        }
                        else {
-                               fprintf(stderr, "%d %d\n",
-                                       value->op, value_size);
                                internal_error(state, 0, "unhandled constant initializer");
                        }
+                       free_triple(state, value);
                        if (peek(state) == TOK_COMMA) {
                                eat(state, TOK_COMMA);
                                comma = 1;
                        }
-                       index += 1;
+                       info.offset += value_size;
+                       if ((type->type & TYPE_MASK) == TYPE_STRUCT) {
+                               info.type = next_field(state, type, info.type);
+                               info.offset = field_offset(state, type, 
+                                       info.type->field_ident);
+                       }
                } while(comma && (peek(state) != TOK_RBRACE));
+               if ((type->elements == ELEMENT_COUNT_UNSPECIFIED) &&
+                       ((type->type & TYPE_MASK) == TYPE_ARRAY)) {
+                       type->elements = max_offset / size_of(state, type->left);
+               }
                eat(state, TOK_RBRACE);
                result = triple(state, OP_BLOBCONST, type, 0, 0);
                result->u.blob = buf;
@@ -8845,10 +9749,34 @@ static struct triple *initializer(
        return result;
 }
 
+static void resolve_branches(struct compile_state *state)
+{
+       /* Make a second pass and finish anything outstanding
+        * with respect to branches.  The only outstanding item
+        * is to see if there are goto to labels that have not
+        * been defined and to error about them.
+        */
+       int i;
+       for(i = 0; i < HASH_TABLE_SIZE; i++) {
+               struct hash_entry *entry;
+               for(entry = state->hash_table[i]; entry; entry = entry->next) {
+                       struct triple *ins;
+                       if (!entry->sym_label) {
+                               continue;
+                       }
+                       ins = entry->sym_label->def;
+                       if (!(ins->id & TRIPLE_FLAG_FLATTENED)) {
+                               error(state, ins, "label `%s' used but not defined",
+                                       entry->name);
+                       }
+               }
+       }
+}
+
 static struct triple *function_definition(
        struct compile_state *state, struct type *type)
 {
-       struct triple *def, *tmp, *first, *end;
+       struct triple *def, *tmp, *first, *end, *retvar, *ret;
        struct hash_entry *ident;
        struct type *param;
        int i;
@@ -8889,6 +9817,17 @@ static struct triple *function_definition(
        /* Put a label at the very end of a function */
        end = label(state);
        flatten(state, first, end);
+       /* Remember where return goes */
+       ident = state->i_return;
+       symbol(state, ident, &ident->sym_ident, end, end->type);
+
+       /* Allocate a variable for the return address */
+       retvar = variable(state, &void_ptr_type);
+       retvar = flatten(state, end, retvar);
+
+       /* Add in the return instruction */
+       ret = triple(state, OP_RET, &void_type, read_expr(state, retvar), 0);
+       ret = flatten(state, first, ret);
 
        /* Walk through the parameters and create symbol table entries
         * for them.
@@ -8926,15 +9865,26 @@ static struct triple *function_definition(
        /* Now get the actual function definition */
        compound_statement(state, end);
 
+       /* Finish anything unfinished with branches */
+       resolve_branches(state);
+
        /* Remove the parameter scope */
        end_scope(state);
-#if 0
-       fprintf(stdout, "\n");
-       loc(stdout, state, 0);
-       fprintf(stdout, "\n__________ function_definition _________\n");
-       print_triple(state, def);
-       fprintf(stdout, "__________ function_definition _________ done\n\n");
-#endif
+
+
+       /* Remember I have defined a function */
+       if (!state->functions) {
+               state->functions = def;
+       } else {
+               insert_triple(state, state->functions, def);
+       }
+       if (state->compiler->debug & DEBUG_INLINE) {
+               fprintf(stdout, "\n");
+               loc(stdout, state, 0);
+               fprintf(stdout, "\n__________ %s _________\n", __FUNCTION__);
+               display_func(stdout, def);
+               fprintf(stdout, "__________ %s _________ done\n\n", __FUNCTION__);
+       }
 
        return def;
 }
@@ -8954,6 +9904,7 @@ static struct triple *do_decl(struct compile_state *state,
                type->type &= ~STOR_MASK;
                type->type |= STOR_AUTO;
                break;
+       case STOR_LOCAL:
        case STOR_EXTERN:
                type->type &= ~STOR_MASK;
                type->type |= STOR_STATIC;
@@ -8969,7 +9920,11 @@ static struct triple *do_decl(struct compile_state *state,
        default:
                internal_error(state, 0, "Undefined storage class");
        }
-       if (((type->type & STOR_MASK) == STOR_STATIC) &&
+       if ((type->type & TYPE_MASK) == TYPE_FUNCTION) {
+               error(state, 0, "Function prototypes not supported");
+       }
+       if (ident && 
+               ((type->type & STOR_MASK) == STOR_STATIC) &&
                ((type->type & QUAL_CONST) == 0)) {
                error(state, 0, "non const static variables not supported");
        }
@@ -8992,6 +9947,7 @@ static void decl(struct compile_state *state, struct triple *first)
        type = declarator(state, base_type, &ident, 0);
        if (global && ident && (peek(state) == TOK_LBRACE)) {
                /* function */
+               type->type_ident = ident;
                state->function = ident->name;
                def = function_definition(state, type);
                symbol(state, ident, &ident->sym_ident, def, type);
@@ -9046,151 +10002,597 @@ static void decls(struct compile_state *state)
        }
 }
 
-/*
- * Data structurs for optimation.
+/* 
+ * Function inlining
  */
 
-static void do_use_block(
-       struct block *used, struct block_set **head, struct block *user, 
-       int front)
+static struct triple *call(struct compile_state *state,
+       struct triple *retvar, struct triple *ret_addr, 
+       struct triple *targ, struct triple *ret)
 {
-       struct block_set **ptr, *new;
-       if (!used)
-               return;
-       if (!user)
-               return;
-       ptr = head;
-       while(*ptr) {
-               if ((*ptr)->member == user) {
-                       return;
-               }
-               ptr = &(*ptr)->next;
+       struct triple *call;
+
+       if (!retvar || !is_lvalue(state, retvar)) {
+               internal_error(state, 0, "writing to a non lvalue?");
        }
-       new = xcmalloc(sizeof(*new), "block_set");
-       new->member = user;
-       if (front) {
-               new->next = *head;
-               *head = new;
+       write_compatible(state, retvar->type, &void_ptr_type);
+
+       call = new_triple(state, OP_CALL, &void_type, 1, 0);
+       TARG(call, 0) = targ;
+       MISC(call, 0) = ret;
+       if (!targ || (targ->op != OP_LABEL)) {
+               internal_error(state, 0, "call not to a label");
        }
-       else {
-               new->next = 0;
-               *ptr = new;
+       if (!ret || (ret->op != OP_RET)) {
+               internal_error(state, 0, "call not matched with return");
        }
+       return call;
 }
-static void do_unuse_block(
-       struct block *used, struct block_set **head, struct block *unuser)
+
+static void mark_live_functions(struct compile_state *state, struct triple *first)
 {
-       struct block_set *use, **ptr;
-       ptr = head;
-       while(*ptr) {
-               use = *ptr;
-               if (use->member == unuser) {
-                       *ptr = use->next;
-                       memset(use, -1, sizeof(*use));
-                       xfree(use);
-               }
-               else {
-                       ptr = &use->next;
+       struct triple *ptr;
+       ptr = first;
+       do {
+               if (ptr->op == OP_FCALL) {
+                       struct triple *func;
+                       func = MISC(ptr, 0);
+                       if (func->u.cval++ == 0) {
+                               mark_live_functions(state, RHS(func, 0));
+                       }
                }
-       }
+               ptr = ptr->next;
+       } while(ptr != first);
 }
 
-static void use_block(struct block *used, struct block *user)
-{
-       /* Append new to the head of the list, print_block
-        * depends on this.
-        */
-       do_use_block(used, &used->use, user, 1); 
-       used->users++;
-}
-static void unuse_block(struct block *used, struct block *unuser)
+static void walk_functions(struct compile_state *state,
+       void (*cb)(struct compile_state *state, struct triple *func, void *arg),
+       void *arg)
 {
-       do_unuse_block(used, &used->use, unuser); 
-       used->users--;
+       struct triple *func, *first;
+       func = first = state->functions;
+       do {
+               cb(state, func, arg);
+               func = func->next;
+       } while(func != first);
 }
 
-static void idom_block(struct block *idom, struct block *user)
-{
-       do_use_block(idom, &idom->idominates, user, 0);
-}
 
-static void unidom_block(struct block *idom, struct block *unuser)
+static int local_triple(struct compile_state *state, 
+       struct triple *func, struct triple *ins)
 {
-       do_unuse_block(idom, &idom->idominates, unuser);
+       int local = (ins->id & TRIPLE_FLAG_LOCAL);
+#if 0
+       if (!local) {
+               fprintf(stderr, "global: ");
+               display_triple(stderr, ins);
+       }
+#endif
+       return local;
 }
 
-static void domf_block(struct block *block, struct block *domf)
+struct triple *copy_func(struct compile_state *state, struct triple *ofunc, 
+       struct occurance *base_occurance)
 {
-       do_use_block(block, &block->domfrontier, domf, 0);
-}
+       struct triple *nfunc;
+       struct triple *nfirst, *ofirst;
+       struct triple *new, *old;
 
-static void undomf_block(struct block *block, struct block *undomf)
-{
-       do_unuse_block(block, &block->domfrontier, undomf);
-}
+       if (state->compiler->debug & DEBUG_INLINE) {
+               fprintf(stdout, "\n");
+               loc(stdout, state, 0);
+               fprintf(stdout, "\n__________ %s _________\n", __FUNCTION__);
+               display_func(stdout, ofunc);
+               fprintf(stdout, "__________ %s _________ done\n\n", __FUNCTION__);
+       }
 
-static void ipdom_block(struct block *ipdom, struct block *user)
-{
-       do_use_block(ipdom, &ipdom->ipdominates, user, 0);
+       /* Make a new copy of the old function */
+       nfunc = triple(state, OP_LIST, ofunc->type, 0, 0);
+       nfirst = 0;
+       ofirst = old = RHS(ofunc, 0);
+       do {
+               struct triple *new;
+               struct occurance *occurance;
+               int old_lhs, old_rhs;
+               old_lhs = TRIPLE_LHS(old->sizes);
+               old_rhs = TRIPLE_RHS(old->sizes);
+               occurance = inline_occurance(state, base_occurance, old->occurance);
+               if (ofunc->u.cval && (old->op == OP_FCALL)) {
+                       MISC(old, 0)->u.cval += 1;
+               }
+               new = alloc_triple(state, old->op, old->type, old_lhs, old_rhs,
+                       occurance);
+               if (!triple_stores_block(state, new)) {
+                       memcpy(&new->u, &old->u, sizeof(new->u));
+               }
+               if (!nfirst) {
+                       RHS(nfunc, 0) = nfirst = new;
+               }
+               else {
+                       insert_triple(state, nfirst, new);
+               }
+               new->id |= TRIPLE_FLAG_FLATTENED;
+               
+               /* During the copy remember new as user of old */
+               use_triple(old, new);
+
+               /* Populate the return type if present */
+               if (old == MISC(ofunc, 0)) {
+                       MISC(nfunc, 0) = new;
+               }
+               /* Remember which instructions are local */
+               old->id |= TRIPLE_FLAG_LOCAL;
+               old = old->next;
+       } while(old != ofirst);
+
+       /* Make a second pass to fix up any unresolved references */
+       old = ofirst;
+       new = nfirst;
+       do {
+               struct triple **oexpr, **nexpr;
+               int count, i;
+               /* Lookup where the copy is, to join pointers */
+               count = TRIPLE_SIZE(old->sizes);
+               for(i = 0; i < count; i++) {
+                       oexpr = &old->param[i];
+                       nexpr = &new->param[i];
+                       if (*oexpr && !*nexpr) {
+                               if (!local_triple(state, ofunc, *oexpr)) {
+                                       *nexpr = *oexpr;
+                               }
+                               else if ((*oexpr)->use) {
+                                       *nexpr = (*oexpr)->use->member;
+                               }
+                               if (*nexpr == old) {
+                                       internal_error(state, 0, "new == old?");
+                               }
+                               use_triple(*nexpr, new);
+                       }
+                       if (!*nexpr && *oexpr) {
+                               internal_error(state, 0, "Could not copy %d\n", i);
+                       }
+               }
+               old = old->next;
+               new = new->next;
+       } while((old != ofirst) && (new != nfirst));
+       
+       /* Make a third pass to cleanup the extra useses */
+       old = ofirst;
+       new = nfirst;
+       do {
+               unuse_triple(old, new);
+               /* Forget which instructions are local */
+               old->id &= ~TRIPLE_FLAG_LOCAL;
+               old = old->next;
+               new = new->next;
+       } while ((old != ofirst) && (new != nfirst));
+       return nfunc;
 }
 
-static void unipdom_block(struct block *ipdom, struct block *unuser)
+static struct triple *flatten_inline_call(
+       struct compile_state *state, struct triple *first, struct triple *ptr)
 {
-       do_unuse_block(ipdom, &ipdom->ipdominates, unuser);
+       /* Inline the function call */
+       struct type *ptype;
+       struct triple *ofunc, *nfunc, *nfirst, *param, *result;
+       struct triple *end, *nend;
+       int pvals, i;
+
+       /* Find the triples */
+       ofunc = MISC(ptr, 0);
+       if (ofunc->op != OP_LIST) {
+               internal_error(state, 0, "improper function");
+       }
+       nfunc = copy_func(state, ofunc, ptr->occurance);
+       nfirst = RHS(nfunc, 0)->next->next;
+       /* Prepend the parameter reading into the new function list */
+       ptype = nfunc->type->right;
+       param = RHS(nfunc, 0)->next->next;
+       pvals = TRIPLE_RHS(ptr->sizes);
+       for(i = 0; i < pvals; i++) {
+               struct type *atype;
+               struct triple *arg;
+               atype = ptype;
+               if ((ptype->type & TYPE_MASK) == TYPE_PRODUCT) {
+                       atype = ptype->left;
+               }
+               while((param->type->type & TYPE_MASK) != (atype->type & TYPE_MASK)) {
+                       param = param->next;
+               }
+               arg = RHS(ptr, i);
+               flatten(state, nfirst, write_expr(state, param, arg));
+               ptype = ptype->right;
+               param = param->next;
+       }
+       result = 0;
+       if ((nfunc->type->left->type & TYPE_MASK) != TYPE_VOID) {
+               result = read_expr(state, MISC(nfunc,0));
+       }
+       if (state->compiler->debug & DEBUG_INLINE) {
+               fprintf(stdout, "\n");
+               loc(stdout, state, 0);
+               fprintf(stdout, "\n__________ %s _________\n", __FUNCTION__);
+               display_func(stdout, nfunc);
+               fprintf(stdout, "__________ %s _________ done\n\n", __FUNCTION__);
+       }
+
+       /* Get rid of the extra triples */
+       nfirst = RHS(nfunc, 0)->next->next;
+       release_triple(state, RHS(nfunc, 0)->prev->prev);
+       release_triple(state, RHS(nfunc, 0)->prev);
+       release_triple(state, RHS(nfunc, 0)->next);
+       free_triple(state, RHS(nfunc, 0));
+       RHS(nfunc, 0) = 0;
+       free_triple(state, nfunc);
+
+       /* Append the new function list onto the return list */
+       end = first->prev;
+       nend = nfirst->prev;
+       end->next    = nfirst;
+       nfirst->prev = end;
+       nend->next   = first;
+       first->prev  = nend;
+
+       return result;
 }
 
-static void ipdomf_block(struct block *block, struct block *ipdomf)
+static struct triple *flatten_function_call(
+       struct compile_state *state, struct triple *first, struct triple *ptr)
 {
-       do_use_block(block, &block->ipdomfrontier, ipdomf, 0);
+       /* Generate an ordinary function call */
+       struct triple *func, *func_first, *func_last, *retvar;
+       struct type *ptype;
+       struct triple *param;
+       struct triple *jmp;
+       struct triple *ret_addr, *ret_loc, *ret_set;
+       struct triple *result;
+       int pvals, i;
+
+       FINISHME();
+       /* Find the triples */
+       func = MISC(ptr, 0);
+       func_first = RHS(func, 0);
+       retvar = func_first->next;
+       func_last  = func_first->prev;
+
+       /* Generate some needed triples */
+       ret_loc = label(state);
+       ret_addr = triple(state, OP_ADDRCONST, &void_ptr_type, ret_loc, 0);
+
+       /* Pass the parameters to the new function */
+       ptype = func->type->right;
+       param = func_first->next->next;
+       pvals = TRIPLE_RHS(ptr->sizes);
+       for(i = 0; i < pvals; i++) {
+               struct type *atype;
+               struct triple *arg;
+               atype = ptype;
+               if ((ptype->type & TYPE_MASK) == TYPE_PRODUCT) {
+                       atype = ptype->left;
+               }
+               while((param->type->type & TYPE_MASK) != (atype->type & TYPE_MASK)) {
+                       param = param->next;
+               }
+               arg = RHS(ptr, i);
+               flatten(state, first, write_expr(state, param, arg));
+               ptype = ptype->right;
+               param = param->next;
+       }
+       
+       /* Thread the triples together */
+       ret_loc       = flatten(state, first, ret_loc);
+       ret_addr      = flatten(state, ret_loc, ret_addr);
+       ret_set       = flatten(state, ret_loc, write_expr(state, retvar, ret_addr));
+       jmp           = flatten(state, ret_loc, 
+               call(state, retvar, ret_addr, func_first, func_last));
+
+       /* Find the result */
+       result = 0;
+       if ((func->type->left->type & TYPE_MASK) != TYPE_VOID) {
+               result = read_expr(state, MISC(func, 0));
+       }
+
+       if (state->compiler->debug & DEBUG_INLINE) {
+               fprintf(stdout, "\n");
+               loc(stdout, state, 0);
+               fprintf(stdout, "\n__________ %s _________\n", __FUNCTION__);
+               display_func(stdout, func);
+               fprintf(stdout, "__________ %s _________ done\n\n", __FUNCTION__);
+       }
+
+       return result;
 }
 
-static void unipdomf_block(struct block *block, struct block *unipdomf)
+static void inline_functions(struct compile_state *state, struct triple *first)
 {
-       do_unuse_block(block, &block->ipdomfrontier, unipdomf);
+       struct triple *ptr, *next;
+       ptr = next = first;
+       do {
+               int do_inline;
+               struct triple *func, *prev, *new;
+               ptr = next;
+               prev = ptr->prev;
+               next = ptr->next;
+               if (ptr->op != OP_FCALL) {
+                       continue;
+               }
+               func = MISC(ptr, 0);
+               /* See if the function should be inlined */
+               switch(func->type->type & STOR_MASK) {
+               case STOR_STATIC | STOR_INLINE:
+               case STOR_LOCAL  | STOR_INLINE:
+               case STOR_EXTERN | STOR_INLINE:
+                       do_inline = 1;
+                       break;
+               default:
+                       do_inline = (func->u.cval == 1);
+                       break;
+               }
+               if (state->compiler->flags & COMPILER_ALWAYS_INLINE) {
+                       do_inline = 1;
+               }
+               if (!(state->compiler->flags & COMPILER_INLINE)) {
+                       do_inline = 0;
+               }
+               if (!do_inline) {
+                       continue;
+               }
+               if (state->compiler->debug & DEBUG_INLINE) {
+                       fprintf(stderr, "inlining %s\n",
+                               func->type->type_ident->name);
+               }
+
+               /* Update the function use counts */
+               func->u.cval -= 1;
+               /* Unhook the call and really inline it */
+               next->prev = prev;
+               prev->next = next;
+               ptr->next = ptr->prev = ptr;
+               
+               new = flatten(state, next, 
+                       flatten_inline_call(state, next, ptr));
+               if (new) {
+                       propogate_use(state, ptr, new);
+               }
+               release_triple(state, ptr);
+               next = prev->next;
+       } while (next != first);
+       ptr = next = first;
+       do {
+               struct triple *func, *prev, *new;
+               ptr = next;
+               prev = ptr->prev;
+               next = ptr->next;
+               if (ptr->op != OP_FCALL) {
+                       continue;
+               }
+               func = MISC(ptr, 0);
+               inline_functions(state, RHS(func, 0));
+               /* Unhook the call and really flatten it */
+               next->prev = prev;
+               prev->next = next;
+               ptr->next = ptr->prev = ptr;
+               new = flatten(state, next, 
+                       flatten_function_call(state, next, ptr));
+               if (new) {
+                       propogate_use(state, ptr, new);
+               }
+               release_triple(state, ptr);
+               next = prev->next;
+       } while(next != first);
 }
+       
+static void insert_function(struct compile_state *state,
+       struct triple *func, void *arg)
+{
+       struct triple *first, *end, *ffirst, *fend;
 
+       if (state->compiler->debug & DEBUG_INLINE) {
+               fprintf(stderr, "%s func count: %d\n", 
+                       func->type->type_ident->name, func->u.cval);
+       }
+       if (func->u.cval == 0) {
+               return;
+       }
+       if (state->compiler->flags & COMPILER_ALWAYS_INLINE) {
+               internal_error(state, func, "always inline failed\n");
+       }
 
+       /* Find the end points of the lists */
+       first  = arg;
+       end    = first->prev;
+       ffirst = RHS(func, 0);
+       fend   = ffirst->prev;
 
-static int do_walk_triple(struct compile_state *state,
-       struct triple *ptr, int depth,
-       int (*cb)(struct compile_state *state, struct triple *ptr, int depth)) 
+       /* splice the lists together */
+       end->next    = ffirst;
+       ffirst->prev = end;
+       fend->next   = first;
+       first->prev  = fend;
+}
+
+static void join_functions(struct compile_state *state)
 {
-       int result;
-       result = cb(state, ptr, depth);
-       if ((result == 0) && (ptr->op == OP_LIST)) {
-               struct triple *list;
-               list = ptr;
-               ptr = RHS(list, 0);
-               do {
-                       result = do_walk_triple(state, ptr, depth + 1, cb);
-                       if (ptr->next->prev != ptr) {
-                               internal_error(state, ptr->next, "bad prev");
-                       }
-                       ptr = ptr->next;
-                       
-               } while((result == 0) && (ptr != RHS(list, 0)));
+       struct triple *jmp, *start, *end, *call;
+       struct file_state file;
+
+       /* Dummy file state to get debug handing right */
+       memset(&file, 0, sizeof(file));
+       file.basename = "";
+       file.line = 0;
+       file.report_line = 0;
+       file.report_name = file.basename;
+       file.prev = state->file;
+       state->file = &file;
+       state->function = "";
+       
+       /* Lay down the basic program structure */
+       end = label(state);
+       start = label(state);
+       start = flatten(state, state->first, start);
+       end = flatten(state, state->first, end);
+       call = new_triple(state, OP_FCALL, &void_type, -1, 0);
+       MISC(call, 0) = state->main_function;
+       flatten(state, state->first, call);
+       
+       /* See which functions are called, and how often */
+       mark_live_functions(state, state->first);
+       inline_functions(state, state->first);
+       walk_functions(state, insert_function, end);
+
+       if (start->next != end) {
+               jmp = flatten(state, start, branch(state, end, 0));
        }
-       return result;
+
+       /* Done now cleanup */
+       state->file = file.prev;
+       state->function = 0;
 }
 
-static int walk_triple(
-       struct compile_state *state, 
-       struct triple *ptr, 
-       int (*cb)(struct compile_state *state, struct triple *ptr, int depth))
+/*
+ * Data structurs for optimation.
+ */
+
+
+static int do_use_block(
+       struct block *used, struct block_set **head, struct block *user, 
+       int front)
 {
-       return do_walk_triple(state, ptr, 0, cb);
+       struct block_set **ptr, *new;
+       if (!used)
+               return 0;
+       if (!user)
+               return 0;
+       ptr = head;
+       while(*ptr) {
+               if ((*ptr)->member == user) {
+                       return 0;
+               }
+               ptr = &(*ptr)->next;
+       }
+       new = xcmalloc(sizeof(*new), "block_set");
+       new->member = user;
+       if (front) {
+               new->next = *head;
+               *head = new;
+       }
+       else {
+               new->next = 0;
+               *ptr = new;
+       }
+       return 1;
 }
-
-static void do_print_prefix(int depth)
+static int do_unuse_block(
+       struct block *used, struct block_set **head, struct block *unuser)
 {
-       int i;
-       for(i = 0; i < depth; i++) {
-               printf("  ");
+       struct block_set *use, **ptr;
+       int count;
+       count = 0;
+       ptr = head;
+       while(*ptr) {
+               use = *ptr;
+               if (use->member == unuser) {
+                       *ptr = use->next;
+                       memset(use, -1, sizeof(*use));
+                       xfree(use);
+                       count += 1;
+               }
+               else {
+                       ptr = &use->next;
+               }
        }
+       return count;
+}
+
+static void use_block(struct block *used, struct block *user)
+{
+       int count;
+       /* Append new to the head of the list, print_block
+        * depends on this.
+        */
+       count = do_use_block(used, &used->use, user, 1); 
+       used->users += count;
+}
+static void unuse_block(struct block *used, struct block *unuser)
+{
+       int count;
+       count = do_unuse_block(used, &used->use, unuser); 
+       used->users -= count;
+}
+
+static void add_block_edge(struct block *block, struct block *edge, int front)
+{
+       int count;
+       count = do_use_block(block, &block->edges, edge, front);
+       block->edge_count += count;
+}
+
+static void remove_block_edge(struct block *block, struct block *edge)
+{
+       int count;
+       count = do_unuse_block(block, &block->edges, edge);
+       block->edge_count -= count;
+}
+
+static void idom_block(struct block *idom, struct block *user)
+{
+       do_use_block(idom, &idom->idominates, user, 0);
+}
+
+static void unidom_block(struct block *idom, struct block *unuser)
+{
+       do_unuse_block(idom, &idom->idominates, unuser);
+}
+
+static void domf_block(struct block *block, struct block *domf)
+{
+       do_use_block(block, &block->domfrontier, domf, 0);
+}
+
+static void undomf_block(struct block *block, struct block *undomf)
+{
+       do_unuse_block(block, &block->domfrontier, undomf);
+}
+
+static void ipdom_block(struct block *ipdom, struct block *user)
+{
+       do_use_block(ipdom, &ipdom->ipdominates, user, 0);
+}
+
+static void unipdom_block(struct block *ipdom, struct block *unuser)
+{
+       do_unuse_block(ipdom, &ipdom->ipdominates, unuser);
+}
+
+static void ipdomf_block(struct block *block, struct block *ipdomf)
+{
+       do_use_block(block, &block->ipdomfrontier, ipdomf, 0);
+}
+
+static void unipdomf_block(struct block *block, struct block *unipdomf)
+{
+       do_unuse_block(block, &block->ipdomfrontier, unipdomf);
+}
+
+static int walk_triples(
+       struct compile_state *state, 
+       int (*cb)(struct compile_state *state, struct triple *ptr))
+{
+       struct triple *ptr;
+       int result;
+       ptr = state->first;
+       do {
+               result = cb(state, ptr);
+               if (ptr->next->prev != ptr) {
+                       internal_error(state, ptr->next, "bad prev");
+               }
+               ptr = ptr->next;
+       } while((result == 0) && (ptr != state->first));
+       return result;
 }
 
 #define PRINT_LIST 1
-static int do_print_triple(struct compile_state *state, struct triple *ins, int depth)
+static int do_print_triple(struct compile_state *state, struct triple *ins)
 {
        int op;
        op = ins->op;
@@ -9202,34 +10604,22 @@ static int do_print_triple(struct compile_state *state, struct triple *ins, int
        if ((op == OP_LABEL) && (ins->use)) {
                printf("\n%p:\n", ins);
        }
-       do_print_prefix(depth);
        display_triple(stdout, ins);
 
-       if ((ins->op == OP_BRANCH) && ins->use) {
+       if (triple_is_branch(state, ins) && ins->use && (ins->op != OP_RET)) {
                internal_error(state, ins, "branch used?");
        }
-#if 0
-       {
-               struct triple_set *user;
-               for(user = ins->use; user; user = user->next) {
-                       printf("use: %p\n", user->member);
-               }
-       }
-#endif
        if (triple_is_branch(state, ins)) {
                printf("\n");
        }
        return 0;
 }
 
-static void print_triple(struct compile_state *state, struct triple *ins)
-{
-       walk_triple(state, ins, do_print_triple);
-}
-
 static void print_triples(struct compile_state *state)
 {
-       print_triple(state, state->main_function);
+       if (state->compiler->debug & DEBUG_TRIPLES) {
+               walk_triples(state, do_print_triple);
+       }
 }
 
 struct cf_block {
@@ -9237,12 +10627,14 @@ struct cf_block {
 };
 static void find_cf_blocks(struct cf_block *cf, struct block *block)
 {
+       struct block_set *edge;
        if (!block || (cf[block->vertex].block == block)) {
                return;
        }
        cf[block->vertex].block = block;
-       find_cf_blocks(cf, block->left);
-       find_cf_blocks(cf, block->right);
+       for(edge = block->edges; edge; edge = edge->next) {
+               find_cf_blocks(cf, edge->member);
+       }
 }
 
 static void print_control_flow(struct compile_state *state)
@@ -9255,15 +10647,13 @@ static void print_control_flow(struct compile_state *state)
 
        for(i = 1; i <= state->last_vertex; i++) {
                struct block *block;
+               struct block_set *edge;
                block = cf[i].block;
                if (!block)
                        continue;
                printf("(%p) %d:", block, block->vertex);
-               if (block->left) {
-                       printf(" %d", block->left->vertex);
-               }
-               if (block->right && (block->right != block->left)) {
-                       printf(" %d", block->right->vertex);
+               for(edge = block->edges; edge; edge = edge->next) {
+                       printf(" %d", edge->member->vertex);
                }
                printf("\n");
        }
@@ -9272,12 +10662,10 @@ static void print_control_flow(struct compile_state *state)
 }
 
 
-static struct block *basic_block(struct compile_state *state,
-       struct triple *first)
+static struct block *basic_block(struct compile_state *state, struct triple *first)
 {
        struct block *block;
        struct triple *ptr;
-       int op;
        if (first->op != OP_LABEL) {
                internal_error(state, 0, "block does not start with a label");
        }
@@ -9292,7 +10680,7 @@ static struct block *basic_block(struct compile_state *state,
        block->vertex = state->last_vertex;
        ptr = first;
        do {
-               if ((ptr != first) && (ptr->op == OP_LABEL) && ptr->use) {
+               if ((ptr != first) && (ptr->op == OP_LABEL) && (ptr->use)) { 
                        break;
                }
                block->last = ptr;
@@ -9300,33 +10688,62 @@ static struct block *basic_block(struct compile_state *state,
                if (triple_stores_block(state, ptr)) {
                        ptr->u.block = block;
                }
-               if (ptr->op == OP_BRANCH) {
+               if (triple_is_branch(state, ptr)) {
                        break;
                }
                ptr = ptr->next;
-       } while (ptr != RHS(state->main_function, 0));
-       if (ptr == RHS(state->main_function, 0))
-               return block;
-       op = ptr->op;
-       if (op == OP_LABEL) {
-               block->left = basic_block(state, ptr);
-               block->right = 0;
-               use_block(block->left, block);
-       }
-       else if (op == OP_BRANCH) {
-               block->left = 0;
-               /* Trace the branch target */
-               block->right = basic_block(state, TARG(ptr, 0));
-               use_block(block->right, block);
-               /* If there is a test trace the branch as well */
-               if (TRIPLE_RHS(ptr->sizes)) {
-                       block->left = basic_block(state, ptr->next);
-                       use_block(block->left, block);
+       } while (ptr != state->first);
+       if (ptr == state->first) {
+               /* The block has no outflowing edges */
+       }
+       else if (ptr->op == OP_LABEL) {
+               struct block *next;
+               next = basic_block(state, ptr);
+               add_block_edge(block, next, 0);
+               use_block(next, block);
+       }
+       else if (triple_is_branch(state, ptr)) {
+               struct triple **expr, *first;
+               struct block *child;
+               /* Find the branch targets.
+                * I special case the first branch as that magically
+                * avoids some difficult cases for the register allocator.
+                */
+               expr = triple_targ(state, ptr, 0);
+               if (!expr) {
+                       internal_error(state, ptr, "branch without targets");
+               }
+               first = *expr;
+               expr = triple_targ(state, ptr, expr);
+               for(; expr; expr = triple_targ(state, ptr, expr)) {
+                       if (!*expr) continue;
+                       child = basic_block(state, *expr);
+                       use_block(child, block);
+                       add_block_edge(block, child, 0);
+               }
+               if (first) {
+                       child = basic_block(state, first);
+                       use_block(child, block);
+                       add_block_edge(block, child, 1);
                }
        }
        else {
                internal_error(state, 0, "Bad basic block split");
        }
+#if 0
+{
+       struct block_set *edge;
+       fprintf(stderr, "basic_block: %10p [%2d] ( %10p - %10p )",
+               block, block->vertex, 
+               block->first, block->last);
+       for(edge = block->edges; edge; edge = edge->next) {
+               fprintf(stderr, " %10p [%2d]",
+                       edge->member ? edge->member->first : 0,
+                       edge->member ? edge->member->vertex : -1);
+       }
+       fprintf(stderr, "\n");
+}
+#endif
        return block;
 }
 
@@ -9338,11 +10755,11 @@ static void walk_blocks(struct compile_state *state,
        struct triple *ptr, *first;
        struct block *last_block;
        last_block = 0;
-       first = RHS(state->main_function, 0);
+       first = state->first;
        ptr = first;
        do {
-               struct block *block;
-               if (ptr->op == OP_LABEL) {
+               if (triple_stores_block(state, ptr)) {
+                       struct block *block;
                        block = ptr->u.block;
                        if (block && (block != last_block)) {
                                cb(state, block, arg);
@@ -9356,80 +10773,59 @@ static void walk_blocks(struct compile_state *state,
 static void print_block(
        struct compile_state *state, struct block *block, void *arg)
 {
+       struct block_set *user, *edge;
        struct triple *ptr;
        FILE *fp = arg;
 
-       fprintf(fp, "\nblock: %p (%d), %p<-%p %p<-%p\n", 
+       fprintf(fp, "\nblock: %p (%d) ",
                block, 
-               block->vertex,
-               block->left, 
-               block->left && block->left->use?block->left->use->member : 0,
-               block->right, 
-               block->right && block->right->use?block->right->use->member : 0);
+               block->vertex);
+
+       for(edge = block->edges; edge; edge = edge->next) {
+               fprintf(fp, " %p<-%p",
+                       edge->member,
+                       (edge->member && edge->member->use)?
+                       edge->member->use->member : 0);
+       }
+       fprintf(fp, "\n");
        if (block->first->op == OP_LABEL) {
                fprintf(fp, "%p:\n", block->first);
        }
        for(ptr = block->first; ; ptr = ptr->next) {
-               struct triple_set *user;
-               int op = ptr->op;
-               
-               if (triple_stores_block(state, ptr)) {
-                       if (ptr->u.block != block) {
-                               internal_error(state, ptr, 
-                                       "Wrong block pointer: %p\n",
-                                       ptr->u.block);
-                       }
-               }
-               if (op == OP_ADECL) {
-                       for(user = ptr->use; user; user = user->next) {
-                               if (!user->member->u.block) {
-                                       internal_error(state, user->member, 
-                                               "Use %p not in a block?\n",
-                                               user->member);
-                               }
-                       }
-               }
                display_triple(fp, ptr);
-
-#if 0
-               for(user = ptr->use; user; user = user->next) {
-                       fprintf(fp, "use: %p\n", user->member);
-               }
-#endif
-
-               /* Sanity checks... */
-               valid_ins(state, ptr);
-               for(user = ptr->use; user; user = user->next) {
-                       struct triple *use;
-                       use = user->member;
-                       valid_ins(state, use);
-                       if (triple_stores_block(state, user->member) &&
-                               !user->member->u.block) {
-                               internal_error(state, user->member,
-                                       "Use %p not in a block?",
-                                       user->member);
-                       }
-               }
-
                if (ptr == block->last)
                        break;
        }
-       fprintf(fp,"\n");
+       fprintf(fp, "users %d: ", block->users);
+       for(user = block->use; user; user = user->next) {
+               fprintf(fp, "%p (%d) ", 
+                       user->member,
+                       user->member->vertex);
+       }
+       fprintf(fp,"\n\n");
 }
 
 
-static void print_blocks(struct compile_state *state, FILE *fp)
+static void romcc_print_blocks(struct compile_state *state, FILE *fp)
 {
        fprintf(fp, "--------------- blocks ---------------\n");
        walk_blocks(state, print_block, fp);
 }
+static void print_blocks(struct compile_state *state, const char *func, FILE *fp)
+{
+       if (state->compiler->debug & DEBUG_BASIC_BLOCKS) {
+               fprintf(fp, "After %s\n", func);
+               romcc_print_blocks(state, fp);
+               print_control_flow(state);
+       }
+}
 
 static void prune_nonblock_triples(struct compile_state *state)
 {
        struct block *block;
        struct triple *first, *ins, *next;
        /* Delete the triples not in a basic block */
-       first = RHS(state->main_function, 0);
+       first = state->first;
        block = 0;
        ins = first;
        do {
@@ -9440,40 +10836,69 @@ static void prune_nonblock_triples(struct compile_state *state)
                if (!block) {
                        release_triple(state, ins);
                }
+               if (block && block->last == ins) {
+                       block = 0;
+               }
                ins = next;
        } while(ins != first);
 }
 
 static void setup_basic_blocks(struct compile_state *state)
 {
-       if (!triple_stores_block(state, RHS(state->main_function, 0)) ||
-               !triple_stores_block(state, RHS(state->main_function,0)->prev)) {
+       if (!triple_stores_block(state, state->first)) {
                internal_error(state, 0, "ins will not store block?");
        }
        /* Find the basic blocks */
        state->last_vertex = 0;
-       state->first_block = basic_block(state, RHS(state->main_function,0));
+       state->first_block = basic_block(state, state->first);
        /* Delete the triples not in a basic block */
        prune_nonblock_triples(state);
-       /* Find the last basic block */
-       state->last_block = RHS(state->main_function, 0)->prev->u.block;
-       if (!state->last_block) {
-               internal_error(state, 0, "end not used?");
-       }
-       /* Insert an extra unused edge from start to the end 
-        * This helps with reverse control flow calculations.
+
+       /* Find the last basic block.
+        *
+        * For purposes of reverse flow computation it is
+        * important that the last basic block is empty.
+        * This allows the control flow graph to be modified to
+        * have one unique starting block and one unique final block.
+        * With the insertion of a few extra edges.
+        *
+        * If the final block contained instructions it could contain
+        * phi functions from edges that would never contribute a
+        * value.  Which for now at least I consider a compile error.
         */
-       use_block(state->first_block, state->last_block);
+       state->last_block = block_of_triple(state, state->first->prev);
+       if ((state->last_block->first != state->last_block->last) ||
+               (state->last_block->last->op != OP_LABEL))
+       {
+               struct block *block, *prev_block;
+               struct triple *final;
+
+               prev_block = state->last_block;
+               
+               final = label(state);
+               flatten(state, state->first, final);
+               final->id |= TRIPLE_FLAG_VOLATILE;
+               use_triple(final, final);
+               block = basic_block(state, final);
+
+               state->last_block = block;
+
+               add_block_edge(prev_block, block, 0);
+               use_block(block, prev_block);
+       }
+
+#if 0
        /* If we are debugging print what I have just done */
-       if (state->debug & DEBUG_BASIC_BLOCKS) {
+       if (state->compiler->debug & DEBUG_BASIC_BLOCKS) {
                print_blocks(state, stdout);
                print_control_flow(state);
        }
+#endif
 }
 
 static void free_basic_block(struct compile_state *state, struct block *block)
 {
-       struct block_set *entry, *next;
+       struct block_set *edge, *entry;
        struct block *child;
        if (!block) {
                return;
@@ -9482,11 +10907,10 @@ static void free_basic_block(struct compile_state *state, struct block *block)
                return;
        }
        block->vertex = -1;
-       if (block->left) {
-               unuse_block(block->left, block);
-       }
-       if (block->right) {
-               unuse_block(block->right, block);
+       for(edge = block->edges; edge; edge = edge->next) {
+               if (edge->member) {
+                       unuse_block(edge->member, block);
+               }
        }
        if (block->idom) {
                unidom_block(block->idom, block);
@@ -9496,46 +10920,48 @@ static void free_basic_block(struct compile_state *state, struct block *block)
                unipdom_block(block->ipdom, block);
        }
        block->ipdom = 0;
-       for(entry = block->use; entry; entry = next) {
-               next = entry->next;
+       while((entry = block->use)) {
                child = entry->member;
                unuse_block(block, child);
-               if (child->left == block) {
-                       child->left = 0;
-               }
-               if (child->right == block) {
-                       child->right = 0;
+               if (child && (child->vertex != -1)) {
+                       for(edge = child->edges; edge; edge = edge->next) {
+                               edge->member = 0;
+                       }
                }
        }
-       for(entry = block->idominates; entry; entry = next) {
-               next = entry->next;
+       while((entry = block->idominates)) {
                child = entry->member;
                unidom_block(block, child);
-               child->idom = 0;
+               if (child && (child->vertex != -1)) {
+                       child->idom = 0;
+               }
        }
-       for(entry = block->domfrontier; entry; entry = next) {
-               next = entry->next;
+       while((entry = block->domfrontier)) {
                child = entry->member;
                undomf_block(block, child);
        }
-       for(entry = block->ipdominates; entry; entry = next) {
-               next = entry->next;
+       while((entry = block->ipdominates)) {
                child = entry->member;
                unipdom_block(block, child);
-               child->ipdom = 0;
+               if (child && (child->vertex != -1)) {
+                       child->ipdom = 0;
+               }
        }
-       for(entry = block->ipdomfrontier; entry; entry = next) {
-               next = entry->next;
+       while((entry = block->ipdomfrontier)) {
                child = entry->member;
                unipdomf_block(block, child);
        }
        if (block->users != 0) {
                internal_error(state, 0, "block still has users");
        }
-       free_basic_block(state, block->left);
-       block->left = 0;
-       free_basic_block(state, block->right);
-       block->right = 0;
+       while((edge = block->edges)) {
+               child = edge->member;
+               remove_block_edge(block, child);
+               
+               if (child && (child->vertex != -1)) {
+                       free_basic_block(state, child);
+               }
+       }
        memset(block, -1, sizeof(*block));
        xfree(block);
 }
@@ -9546,7 +10972,7 @@ static void free_basic_blocks(struct compile_state *state)
        free_basic_block(state, state->first_block);
        state->last_vertex = 0;
        state->first_block = state->last_block = 0;
-       first = RHS(state->main_function, 0);
+       first = state->first;
        ins = first;
        do {
                if (triple_stores_block(state, ins)) {
@@ -9598,6 +11024,7 @@ static void sdom_block(struct sdom_block *sdom, struct sdom_block *block)
 static int initialize_sdblock(struct sdom_block *sd,
        struct block *parent, struct block *block, int vertex)
 {
+       struct block_set *edge;
        if (!block || (sd[block->vertex].block == block)) {
                return vertex;
        }
@@ -9610,12 +11037,14 @@ static int initialize_sdblock(struct sdom_block *sd,
        sd[vertex].parent   = parent? &sd[parent->vertex] : 0;
        sd[vertex].ancestor = 0;
        sd[vertex].vertex   = vertex;
-       vertex = initialize_sdblock(sd, block, block->left, vertex);
-       vertex = initialize_sdblock(sd, block, block->right, vertex);
+       for(edge = block->edges; edge; edge = edge->next) {
+               vertex = initialize_sdblock(sd, block, edge->member, vertex);
+       }
        return vertex;
 }
 
-static int initialize_sdpblock(struct sdom_block *sd,
+static int initialize_spdblock(
+       struct compile_state *state, struct sdom_block *sd,
        struct block *parent, struct block *block, int vertex)
 {
        struct block_set *user;
@@ -9632,7 +11061,34 @@ static int initialize_sdpblock(struct sdom_block *sd,
        sd[vertex].ancestor = 0;
        sd[vertex].vertex   = vertex;
        for(user = block->use; user; user = user->next) {
-               vertex = initialize_sdpblock(sd, block, user->member, vertex);
+               vertex = initialize_spdblock(state, sd, block, user->member, vertex);
+       }
+       return vertex;
+}
+
+static int setup_spdblocks(struct compile_state *state, struct sdom_block *sd)
+{
+       struct block *block;
+       int vertex;
+       /* Setup as many sdpblocks as possible without using fake edges */
+       vertex = initialize_spdblock(state, sd, 0, state->last_block, 0);
+
+       /* Walk through the graph and find unconnected blocks.  Add a
+        * fake edge from the unconnected blocks to the end of the
+        * graph. 
+        */
+       block = state->first_block->last->next->u.block;
+       for(; block && block != state->first_block; block = block->last->next->u.block) {
+               if (sd[block->vertex].block == block) {
+                       continue;
+               }
+#if DEBUG_SDP_BLOCKS
+               fprintf(stderr, "Adding %d\n", vertex +1);
+#endif
+               add_block_edge(block, state->last_block, 0);
+               use_block(state->last_block, block);
+
+               vertex = initialize_spdblock(state, sd, state->last_block, block, vertex);
        }
        return vertex;
 }
@@ -9731,19 +11187,13 @@ static void compute_spdom(struct compile_state *state, struct sdom_block *sd)
         */
        for(i = state->last_vertex; i >= 2; i--) {
                struct sdom_block *u, *v, *parent, *next;
+               struct block_set *edge;
                struct block *block;
                block = sd[i].block;
                parent = sd[i].parent;
                /* Step 2 */
-               if (block->left) {
-                       v = &sd[block->left->vertex];
-                       u = !(v->ancestor)? v : (compress_ancestors(v), v->label);
-                       if (u->sdom->vertex < sd[i].sdom->vertex) {
-                               sd[i].sdom = u->sdom;
-                       }
-               }
-               if (block->right && (block->right != block->left)) {
-                       v = &sd[block->right->vertex];
+               for(edge = block->edges; edge; edge = edge->next) {
+                       v = &sd[edge->member->vertex];
                        u = !(v->ancestor)? v : (compress_ancestors(v), v->label);
                        if (u->sdom->vertex < sd[i].sdom->vertex) {
                                sd[i].sdom = u->sdom;
@@ -9872,10 +11322,15 @@ static void find_immediate_dominators(struct compile_state *state)
 static void find_post_dominators(struct compile_state *state)
 {
        struct sdom_block *sd;
+       int vertex;
        /* Step 1 initialize the basic block information */
        sd = xcmalloc(sizeof(*sd) * (state->last_vertex + 1), "sdom_state");
 
-       initialize_sdpblock(sd, 0, state->last_block, 0);
+       vertex = setup_spdblocks(state, sd);
+       if (vertex != state->last_vertex) {
+               internal_error(state, 0, "missing %d blocks\n",
+                       state->last_vertex - vertex);
+       }
 
        /* Step 2 compute the semidominators */
        /* Step 3 implicitly define the immediate dominator of each vertex */
@@ -9890,7 +11345,7 @@ static void find_post_dominators(struct compile_state *state)
 static void find_block_domf(struct compile_state *state, struct block *block)
 {
        struct block *child;
-       struct block_set *user;
+       struct block_set *user, *edge;
        if (block->domfrontier != 0) {
                internal_error(state, block->first, "domfrontier present?");
        }
@@ -9901,11 +11356,10 @@ static void find_block_domf(struct compile_state *state, struct block *block)
                }
                find_block_domf(state, child);
        }
-       if (block->left && block->left->idom != block) {
-               domf_block(block, block->left);
-       }
-       if (block->right && block->right->idom != block) {
-               domf_block(block, block->right);
+       for(edge = block->edges; edge; edge = edge->next) {
+               if (edge->member->idom != block) {
+                       domf_block(block, edge->member);
+               }
        }
        for(user = block->idominates; user; user = user->next) {
                struct block_set *frontier;
@@ -9932,13 +11386,12 @@ static void find_block_ipdomf(struct compile_state *state, struct block *block)
                }
                find_block_ipdomf(state, child);
        }
-       if (block->left && block->left->ipdom != block) {
-               ipdomf_block(block, block->left);
-       }
-       if (block->right && block->right->ipdom != block) {
-               ipdomf_block(block, block->right);
+       for(user = block->use; user; user = user->next) {
+               if (user->member->ipdom != block) {
+                       ipdomf_block(block, user->member);
+               }
        }
-       for(user = block->idominates; user; user = user->next) {
+       for(user = block->ipdominates; user; user = user->next) {
                struct block_set *frontier;
                child = user->member;
                for(frontier = child->ipdomfrontier; frontier; frontier = frontier->next) {
@@ -9965,17 +11418,58 @@ static void print_dominated(
        fprintf(fp,"\n");
 }
 
+static void print_dominated2(
+       struct compile_state *state, FILE *fp, int depth, struct block *block)
+{
+       struct block_set *user;
+       struct triple *ins;
+       struct occurance *ptr, *ptr2;
+       const char *filename1, *filename2;
+       int equal_filenames;
+       int i;
+       for(i = 0; i < depth; i++) {
+               fprintf(fp, "   ");
+       }
+       fprintf(fp, "%3d: %p (%p - %p) @", 
+               block->vertex, block, block->first, block->last);
+       ins = block->first;
+       while(ins != block->last && (ins->occurance->line == 0)) {
+               ins = ins->next;
+       }
+       ptr = ins->occurance;
+       ptr2 = block->last->occurance;
+       filename1 = ptr->filename? ptr->filename : "";
+       filename2 = ptr2->filename? ptr2->filename : "";
+       equal_filenames = (strcmp(filename1, filename2) == 0);
+       if ((ptr == ptr2) || (equal_filenames && ptr->line == ptr2->line)) {
+               fprintf(fp, " %s:%d", ptr->filename, ptr->line);
+       } else if (equal_filenames) {
+               fprintf(fp, " %s:(%d - %d)",
+                       ptr->filename, ptr->line, ptr2->line);
+       } else {
+               fprintf(fp, " (%s:%d - %s:%d)",
+                       ptr->filename, ptr->line,
+                       ptr2->filename, ptr2->line);
+       }
+       fprintf(fp, "\n");
+       for(user = block->idominates; user; user = user->next) {
+               print_dominated2(state, fp, depth + 1, user->member);
+       }
+}
+
 static void print_dominators(struct compile_state *state, FILE *fp)
 {
        fprintf(fp, "\ndominates\n");
        walk_blocks(state, print_dominated, fp);
+       fprintf(fp, "dominates\n");
+       print_dominated2(state, fp, 0, state->first_block);
 }
 
 
 static int print_frontiers(
        struct compile_state *state, struct block *block, int vertex)
 {
-       struct block_set *user;
+       struct block_set *user, *edge;
 
        if (!block || (block->vertex != vertex + 1)) {
                return vertex;
@@ -9987,9 +11481,10 @@ static int print_frontiers(
                printf(" %d", user->member->vertex);
        }
        printf("\n");
-
-       vertex = print_frontiers(state, block->left, vertex);
-       vertex = print_frontiers(state, block->right, vertex);
+       
+       for(edge = block->edges; edge; edge = edge->next) {
+               vertex = print_frontiers(state, edge->member, vertex);
+       }
        return vertex;
 }
 static void print_dominance_frontiers(struct compile_state *state)
@@ -10006,7 +11501,7 @@ static void analyze_idominators(struct compile_state *state)
        /* Find the dominance frontiers */
        find_block_domf(state, state->first_block);
        /* If debuging print the print what I have just found */
-       if (state->debug & DEBUG_FDOMINATORS) {
+       if (state->compiler->debug & DEBUG_FDOMINATORS) {
                print_dominators(state, stdout);
                print_dominance_frontiers(state);
                print_control_flow(state);
@@ -10071,7 +11566,7 @@ static void analyze_ipdominators(struct compile_state *state)
        /* Find the control dependencies (post dominance frontiers) */
        find_block_ipdomf(state, state->last_block);
        /* If debuging print the print what I have just found */
-       if (state->debug & DEBUG_RDOMINATORS) {
+       if (state->compiler->debug & DEBUG_RDOMINATORS) {
                print_ipdominators(state, stdout);
                print_ipdominance_frontiers(state);
                print_control_flow(state);
@@ -10108,6 +11603,13 @@ static int tdominates(struct compile_state *state,
        return result;
 }
 
+static void analyze_basic_blocks(struct compile_state *state)
+{
+       setup_basic_blocks(state);
+       analyze_idominators(state);
+       analyze_ipdominators(state);
+}
+
 static void insert_phi_operations(struct compile_state *state)
 {
        size_t size;
@@ -10115,24 +11617,26 @@ static void insert_phi_operations(struct compile_state *state)
        int *has_already, *work;
        struct block *work_list, **work_list_tail;
        int iter;
-       struct triple *var;
+       struct triple *var, *vnext;
 
        size = sizeof(int) * (state->last_vertex + 1);
        has_already = xcmalloc(size, "has_already");
        work =        xcmalloc(size, "work");
        iter = 0;
 
-       first = RHS(state->main_function, 0);
-       for(var = first->next; var != first ; var = var->next) {
+       first = state->first;
+       for(var = first->next; var != first ; var = vnext) {
                struct block *block;
-               struct triple_set *user;
+               struct triple_set *user, *unext;
+               vnext = var->next;
                if ((var->op != OP_ADECL) || !var->use) {
                        continue;
                }
                iter += 1;
                work_list = 0;
                work_list_tail = &work_list;
-               for(user = var->use; user; user = user->next) {
+               for(user = var->use; user; user = unext) {
+                       unext = user->next;
                        if (user->member->op == OP_READ) {
                                continue;
                        }
@@ -10143,6 +11647,8 @@ static void insert_phi_operations(struct compile_state *state)
                        block = user->member->u.block;
                        if (!block) {
                                warning(state, user->member, "dead code");
+                               release_triple(state, user->member);
+                               continue;
                        }
                        if (work[block->vertex] >= iter) {
                                continue;
@@ -10166,10 +11672,10 @@ static void insert_phi_operations(struct compile_state *state)
                                /* Count how many edges flow into this block */
                                in_edges = front->users;
                                /* Insert a phi function for this variable */
-                               get_occurance(front->first->occurance);
+                               get_occurance(var->occurance);
                                phi = alloc_triple(
                                        state, OP_PHI, var->type, -1, in_edges, 
-                                       front->first->occurance);
+                                       var->occurance);
                                phi->u.block = front;
                                MISC(phi, 0) = var;
                                use_triple(var, phi);
@@ -10179,6 +11685,7 @@ static void insert_phi_operations(struct compile_state *state)
                                        front->last = front->first->next;
                                }
                                has_already[front->vertex] = iter;
+                               transform_to_arch_instruction(state, phi);
 
                                /* If necessary plan to visit the basic block */
                                if (work[front->vertex] >= iter) {
@@ -10195,12 +11702,104 @@ static void insert_phi_operations(struct compile_state *state)
        xfree(work);
 }
 
+
+struct stack {
+       struct triple_set *top;
+       unsigned orig_id;
+};
+
+static int count_adecls(struct compile_state *state)
+{
+       struct triple *first, *ins;
+       int adecls = 0;
+       first = state->first;
+       ins = first;
+       do {
+               if (ins->op == OP_ADECL) {
+                       adecls += 1;
+               }
+               ins = ins->next;
+       } while(ins != first);
+       return adecls;
+}
+
+static void number_adecls(struct compile_state *state, struct stack *stacks)
+{
+       struct triple *first, *ins;
+       int adecls = 0;
+       first = state->first;
+       ins = first;
+       do {
+               if (ins->op == OP_ADECL) {
+                       adecls += 1;
+                       stacks[adecls].orig_id = ins->id;
+                       ins->id = adecls;
+               }
+               ins = ins->next;
+       } while(ins != first);
+}
+
+static void restore_adecls(struct compile_state *state, struct stack *stacks)
+{
+       struct triple *first, *ins;
+       first = state->first;
+       ins = first;
+       do {
+               if (ins->op == OP_ADECL) {
+                       ins->id = stacks[ins->id].orig_id;
+               }
+               ins = ins->next;
+       } while(ins != first);
+}
+
+static struct triple *peek_triple(struct stack *stacks, struct triple *var)
+{
+       struct triple_set *head;
+       struct triple *top_val;
+       top_val = 0;
+       head = stacks[var->id].top;
+       if (head) {
+               top_val = head->member;
+       }
+       return top_val;
+}
+
+static void push_triple(struct stack *stacks, struct triple *var, struct triple *val)
+{
+       struct triple_set *new;
+       /* Append new to the head of the list,
+        * it's the only sensible behavoir for a stack.
+        */
+       new = xcmalloc(sizeof(*new), "triple_set");
+       new->member = val;
+       new->next   = stacks[var->id].top;
+       stacks[var->id].top = new;
+}
+
+static void pop_triple(struct stack *stacks, struct triple *var, struct triple *oldval)
+{
+       struct triple_set *set, **ptr;
+       ptr = &stacks[var->id].top;
+       while(*ptr) {
+               set = *ptr;
+               if (set->member == oldval) {
+                       *ptr = set->next;
+                       xfree(set);
+                       /* Only free one occurance from the stack */
+                       return;
+               }
+               else {
+                       ptr = &set->next;
+               }
+       }
+}
+
 /*
  * C(V)
  * S(V)
  */
 static void fixup_block_phi_variables(
-       struct compile_state *state, struct block *parent, struct block *block)
+       struct compile_state *state, struct stack *stacks, struct block *parent, struct block *block)
 {
        struct block_set *set;
        struct triple *ptr;
@@ -10225,8 +11824,8 @@ static void fixup_block_phi_variables(
                                internal_error(state, ptr, "no var???");
                        }
                        /* Find the current value of the variable */
-                       val = var->use->member;
-                       if ((val->op == OP_WRITE) || (val->op == OP_READ)) {
+                       val = peek_triple(stacks, var);
+                       if (val && ((val->op == OP_WRITE) || (val->op == OP_READ))) {
                                internal_error(state, val, "bad value in phi");
                        }
                        if (edge >= TRIPLE_RHS(ptr->sizes)) {
@@ -10247,9 +11846,9 @@ static void fixup_block_phi_variables(
 
 
 static void rename_block_variables(
-       struct compile_state *state, struct block *block)
+       struct compile_state *state, struct stack *stacks, struct block *block)
 {
-       struct block_set *user;
+       struct block_set *user, *edge;
        struct triple *ptr, *next, *last;
        int done;
        if (!block)
@@ -10266,11 +11865,11 @@ static void rename_block_variables(
                        struct triple *var, *val;
                        var = RHS(ptr, 0);
                        unuse_triple(var, ptr);
-                       if (!var->use) {
+                       /* Find the current value of the variable */
+                       val = peek_triple(stacks, var);
+                       if (!val) {
                                error(state, ptr, "variable used without being set");
                        }
-                       /* Find the current value of the variable */
-                       val = var->use->member;
                        if ((val->op == OP_WRITE) || (val->op == OP_READ)) {
                                internal_error(state, val, "bad value in read");
                        }
@@ -10280,33 +11879,49 @@ static void rename_block_variables(
                }
                /* LHS(A) */
                if (ptr->op == OP_WRITE) {
-                       struct triple *var, *val;
-                       var = LHS(ptr, 0);
-                       val = RHS(ptr, 0);
+                       struct triple *var, *val, *tval;
+                       var = RHS(ptr, 0);
+                       tval = val = RHS(ptr, 1);
                        if ((val->op == OP_WRITE) || (val->op == OP_READ)) {
-                               internal_error(state, val, "bad value in write");
+                               internal_error(state, ptr, "bad value in write");
                        }
-                       propogate_use(state, ptr, val);
+                       /* Insert a copy if the types differ */
+                       if (!equiv_types(ptr->type, val->type)) {
+                               if (val->op == OP_INTCONST) {
+                                       tval = pre_triple(state, ptr, OP_INTCONST, ptr->type, 0, 0);
+                                       tval->u.cval = val->u.cval;
+                               }
+                               else {
+                                       tval = pre_triple(state, ptr, OP_COPY, ptr->type, val, 0);
+                                       use_triple(val, tval);
+                               }
+                               transform_to_arch_instruction(state, tval);
+                               unuse_triple(val, ptr);
+                               RHS(ptr, 1) = tval;
+                               use_triple(tval, ptr);
+                       }
+                       propogate_use(state, ptr, tval);
                        unuse_triple(var, ptr);
                        /* Push OP_WRITE ptr->right onto a stack of variable uses */
-                       push_triple(var, val);
+                       push_triple(stacks, var, tval);
                }
                if (ptr->op == OP_PHI) {
                        struct triple *var;
                        var = MISC(ptr, 0);
                        /* Push OP_PHI onto a stack of variable uses */
-                       push_triple(var, ptr);
+                       push_triple(stacks, var, ptr);
                }
                last = ptr;
        }
        block->last = last;
 
        /* Fixup PHI functions in the cf successors */
-       fixup_block_phi_variables(state, block, block->left);
-       fixup_block_phi_variables(state, block, block->right);
+       for(edge = block->edges; edge; edge = edge->next) {
+               fixup_block_phi_variables(state, stacks, block, edge->member);
+       }
        /* rename variables in the dominated nodes */
        for(user = block->idominates; user; user = user->next) {
-               rename_block_variables(state, user->member);
+               rename_block_variables(state, stacks, user->member);
        }
        /* pop the renamed variable stack */
        last = block->first;
@@ -10318,9 +11933,9 @@ static void rename_block_variables(
                }
                if (ptr->op == OP_WRITE) {
                        struct triple *var;
-                       var = LHS(ptr, 0);
+                       var = RHS(ptr, 0);
                        /* Pop OP_WRITE ptr->right from the stack of variable uses */
-                       pop_triple(var, RHS(ptr, 0));
+                       pop_triple(stacks, var, RHS(ptr, 1));
                        release_triple(state, ptr);
                        continue;
                }
@@ -10328,13 +11943,33 @@ static void rename_block_variables(
                        struct triple *var;
                        var = MISC(ptr, 0);
                        /* Pop OP_WRITE ptr->right from the stack of variable uses */
-                       pop_triple(var, ptr);
+                       pop_triple(stacks, var, ptr);
                }
                last = ptr;
        }
        block->last = last;
 }
 
+static void rename_variables(struct compile_state *state)
+{
+       struct stack *stacks;
+       int adecls;
+
+       /* Allocate stacks for the Variables */
+       adecls = count_adecls(state);
+       stacks = xcmalloc(sizeof(stacks[0])*(adecls + 1), "adecl stacks");
+
+       /* Give each adecl a stack */
+       number_adecls(state, stacks);
+
+       /* Rename the variables */
+       rename_block_variables(state, stacks, state->first_block);
+
+       /* Remove the stacks from the adecls */
+       restore_adecls(state, stacks);
+       xfree(stacks);
+}
+
 static void prune_block_variables(struct compile_state *state,
        struct block *block)
 {
@@ -10374,22 +12009,120 @@ static void prune_block_variables(struct compile_state *state,
        }
 }
 
+struct phi_triple {
+       struct triple *phi;
+       unsigned orig_id;
+       int alive;
+};
+
+static void keep_phi(struct compile_state *state, struct phi_triple *live, struct triple *phi)
+{
+       struct triple **slot;
+       int zrhs, i;
+       if (live[phi->id].alive) {
+               return;
+       }
+       live[phi->id].alive = 1;
+       zrhs = TRIPLE_RHS(phi->sizes);
+       slot = &RHS(phi, 0);
+       for(i = 0; i < zrhs; i++) {
+               struct triple *used;
+               used = slot[i];
+               if (used && (used->op == OP_PHI)) {
+                       keep_phi(state, live, used);
+               }
+       }
+}
+
+static void prune_unused_phis(struct compile_state *state)
+{
+       struct triple *first, *phi;
+       struct phi_triple *live;
+       int phis, i;
+       
+       /* Find the first instruction */
+       first = state->first;
+
+       /* Count how many phi functions I need to process */
+       phis = 0;
+       for(phi = first->next; phi != first; phi = phi->next) {
+               if (phi->op == OP_PHI) {
+                       phis += 1;
+               }
+       }
+       
+       /* Mark them all dead */
+       live = xcmalloc(sizeof(*live) * (phis + 1), "phi_triple");
+       phis = 0;
+       for(phi = first->next; phi != first; phi = phi->next) {
+               if (phi->op != OP_PHI) {
+                       continue;
+               }
+               live[phis].alive   = 0;
+               live[phis].orig_id = phi->id;
+               live[phis].phi     = phi;
+               phi->id = phis;
+               phis += 1;
+       }
+       
+       /* Mark phis alive that are used by non phis */
+       for(i = 0; i < phis; i++) {
+               struct triple_set *set;
+               for(set = live[i].phi->use; !live[i].alive && set; set = set->next) {
+                       if (set->member->op != OP_PHI) {
+                               keep_phi(state, live, live[i].phi);
+                               break;
+                       }
+               }
+       }
+
+       /* Delete the extraneous phis */
+       for(i = 0; i < phis; i++) {
+               struct triple **slot;
+               int zrhs, j;
+               if (!live[i].alive) {
+                       release_triple(state, live[i].phi);
+                       continue;
+               }
+               phi = live[i].phi;
+               slot = &RHS(phi, 0);
+               zrhs = TRIPLE_RHS(phi->sizes);
+               for(j = 0; j < zrhs; j++) {
+                       if(!slot[j]) {
+                               error(state, phi, "variable not set on all paths to use");
+                       }
+               }
+       }
+       xfree(live);
+}
+
 static void transform_to_ssa_form(struct compile_state *state)
 {
        insert_phi_operations(state);
-#if 0
-       printf("@%s:%d\n", __FILE__, __LINE__);
-       print_blocks(state, stdout);
-#endif
-       rename_block_variables(state, state->first_block);
+       rename_variables(state);
+
        prune_block_variables(state, state->first_block);
+       prune_unused_phis(state);
+
+       print_blocks(state, __func__, stdout);
 }
 
 
 static void clear_vertex(
        struct compile_state *state, struct block *block, void *arg)
 {
+       /* Clear the current blocks vertex and the vertex of all
+        * of the current blocks neighbors in case there are malformed
+        * blocks with now instructions at this point.
+        */
+       struct block_set *user, *edge;
        block->vertex = 0;
+       for(edge = block->edges; edge; edge = edge->next) {
+               edge->member->vertex = 0;
+       }
+       for(user = block->use; user; user = user->next) {
+               user->member->vertex = 0;
+       }
 }
 
 static void mark_live_block(
@@ -10414,7 +12147,7 @@ static void mark_live_block(
                        mark_live_block(state, (*targ)->u.block, next_vertex);
                }
        }
-       else if (block->last->next != RHS(state->main_function, 0)) {
+       else if (block->last->next != state->first) {
                struct triple *ins;
                ins = block->last->next;
                if (!triple_stores_block(state, ins)) {
@@ -10430,7 +12163,7 @@ static void transform_from_ssa_form(struct compile_state *state)
         * edges to blocks containting phi functions.
         */
        struct triple *first;
-       struct triple *phi, *next;
+       struct triple *phi, *var, *next;
        int next_vertex;
 
        /* Walk the control flow to see which blocks remain alive */
@@ -10439,21 +12172,28 @@ static void transform_from_ssa_form(struct compile_state *state)
        mark_live_block(state, state->first_block, &next_vertex);
 
        /* Walk all of the operations to find the phi functions */
-       first = RHS(state->main_function, 0);
+       first = state->first;
        for(phi = first->next; phi != first ; phi = next) {
                struct block_set *set;
                struct block *block;
                struct triple **slot;
-               struct triple *var, *read;
+               struct triple *var;
                struct triple_set *use, *use_next;
                int edge, used;
                next = phi->next;
                if (phi->op != OP_PHI) {
                        continue;
                }
+
                block = phi->u.block;
                slot  = &RHS(phi, 0);
 
+               /* If this phi is in a dead block just forget it */
+               if (block->vertex == 0) {
+                       release_triple(state, phi);
+                       continue;
+               }
+
                /* Forget uses from code in dead blocks */
                for(use = phi->use; use; use = use_next) {
                        struct block *ublock;
@@ -10471,53 +12211,61 @@ static void transform_from_ssa_form(struct compile_state *state)
                        }
                        unuse_triple(phi, use->member);
                }
-
                /* A variable to replace the phi function */
                var = post_triple(state, phi, OP_ADECL, phi->type, 0,0);
-               /* A read of the single value that is set into the variable */
-               read = post_triple(state, var, OP_READ, phi->type, var, 0);
-               use_triple(var, read);
 
-               /* Replaces uses of the phi with variable reads */
-               propogate_use(state, phi, read);
+               /* Replaces use of phi with var */
+               propogate_use(state, phi, var);
 
                /* Walk all of the incoming edges/blocks and insert moves.
                 */
+               used = 0;
                for(edge = 0, set = block->use; set; set = set->next, edge++) {
-                       struct block *eblock;
+                       struct block *eblock, *vblock;
                        struct triple *move;
-                       struct triple *val;
+                       struct triple *val, *base;
                        eblock = set->member;
                        val = slot[edge];
                        slot[edge] = 0;
                        unuse_triple(val, phi);
+                       vblock = block_of_triple(state, val);
+
+                       /* If we don't have a value that belongs in an OP_WRITE
+                        * continue on.
+                        */
+                       if (!val || (val == &zero_triple) || (val == phi) || 
+                               (!vblock) || (vblock->vertex == 0)) {
+                               continue;
+                       }
 
-                       if (!val || (val == &zero_triple) ||
-                               (block->vertex == 0) || (eblock->vertex == 0) ||
-                               (val == phi) || (val == read)) {
+                       /* If the value occurs in a dead block see if a replacement
+                        * block can be found.
+                        */
+                       while(eblock && (eblock->vertex == 0)) {
+                               eblock = eblock->idom;
+                       }
+                       /* If not continue on with the next value. */
+                       if (!eblock || (eblock->vertex == 0)) {
                                continue;
                        }
+
+                       /* If we have an empty incoming block ignore it. */
+                       if (!eblock->first) {
+                               internal_error(state, 0, "empty block?");
+                       }
                        
-                       move = post_triple(state, 
-                               val, OP_WRITE, phi->type, var, val);
+                       /* Make certain the write is placed in the edge block... */
+                       base = eblock->first;
+                       if (block_of_triple(state, val) == eblock) {
+                               base = val;
+                       }
+                       move = post_triple(state, base, OP_WRITE, var->type, var, val);
                        use_triple(val, move);
                        use_triple(var, move);
+                       used = 1;
                }               
-               /* See if there are any writers of var */
-               used = 0;
-               for(use = var->use; use; use = use->next) {
-                       struct triple **expr;
-                       expr = triple_lhs(state, use->member, 0);
-                       for(; expr; expr = triple_lhs(state, use->member, expr)) {
-                               if (*expr == var) {
-                                       used = 1;
-                               }
-                       }
-               }
                /* If var is not used free it */
                if (!used) {
-                       unuse_triple(var, read);
-                       free_triple(state, read);
                        free_triple(state, var);
                }
 
@@ -10525,8 +12273,75 @@ static void transform_from_ssa_form(struct compile_state *state)
                release_triple(state, phi);
        }
        
+       /* Walk all of the operations to find the adecls */
+       for(var = first->next; var != first ; var = var->next) {
+               struct triple_set *use, *use_next;
+               if (var->op != OP_ADECL) {
+                       continue;
+               }
+
+               /* Walk through all of the rhs uses of var and
+                * replace them with read of var.
+                */
+               for(use = var->use; use; use = use_next) {
+                       struct triple *read, *user;
+                       struct triple **slot;
+                       int zrhs, i, used;
+                       use_next = use->next;
+                       user = use->member;
+                       
+                       /* Generate a read of var */
+                       read = pre_triple(state, user, OP_READ, var->type, var, 0);
+                       use_triple(var, read);
+
+                       /* Find the rhs uses and see if they need to be replaced */
+                       used = 0;
+                       zrhs = TRIPLE_RHS(user->sizes);
+                       slot = &RHS(user, 0);
+                       for(i = 0; i < zrhs; i++) {
+                               if ((slot[i] == var) &&
+                                       ((i != 0) || (user->op != OP_WRITE))) 
+                               {
+                                       slot[i] = read;
+                                       used = 1;
+                               }
+                       }
+                       /* If we did use it cleanup the uses */
+                       if (used) {
+                               unuse_triple(var, user);
+                               use_triple(read, user);
+                       } 
+                       /* If we didn't use it release the extra triple */
+                       else {
+                               release_triple(state, read);
+                       }
+               }
+       }
 }
 
+#define HI() if (state->compiler->debug & DEBUG_REBUILD_SSA_FORM) { \
+       fprintf(stderr, "@ %s:%d\n", __FILE__, __LINE__); romcc_print_blocks(state, stderr); \
+       } 
+
+static void rebuild_ssa_form(struct compile_state *state)
+{
+HI();
+       transform_from_ssa_form(state);
+HI();
+       free_basic_blocks(state);
+       analyze_basic_blocks(state);
+HI();
+       insert_phi_operations(state);
+HI();
+       rename_variables(state);
+HI();
+       
+       prune_block_variables(state, state->first_block);
+HI();
+       prune_unused_phis(state);
+HI();
+}
+#undef HI
 
 /* 
  * Register conflict resolution
@@ -10619,7 +12434,7 @@ static struct reg_info find_lhs_post_color(
        struct triple_set *set;
        struct reg_info info;
        struct triple *lhs;
-#if 0
+#if DEBUG_TRIPLE_COLOR
        fprintf(stderr, "find_lhs_post_color(%p, %d)\n",
                ins, index);
 #endif
@@ -10663,7 +12478,7 @@ static struct reg_info find_lhs_post_color(
                        info.regcm &= rinfo.regcm;
                }
        }
-#if 0
+#if DEBUG_TRIPLE_COLOR
        fprintf(stderr, "find_lhs_post_color(%p, %d) -> ( %d, %x)\n",
                ins, index, info.reg, info.regcm);
 #endif
@@ -10675,7 +12490,7 @@ static struct reg_info find_rhs_post_color(
 {
        struct reg_info info, rinfo;
        int zlhs, i;
-#if 0
+#if DEBUG_TRIPLE_COLOR
        fprintf(stderr, "find_rhs_post_color(%p, %d)\n",
                ins, index);
 #endif
@@ -10698,7 +12513,7 @@ static struct reg_info find_rhs_post_color(
                        if (tinfo.reg >= MAX_REGISTERS) {
                                tinfo.reg = REG_UNSET;
                        }
-                       info.regcm &= linfo.reg;
+                       info.regcm &= linfo.regcm;
                        info.regcm &= tinfo.regcm;
                        if (info.reg != REG_UNSET) {
                                internal_error(state, ins, "register conflict");
@@ -10709,7 +12524,7 @@ static struct reg_info find_rhs_post_color(
                        info.reg = tinfo.reg;
                }
        }
-#if 0
+#if DEBUG_TRIPLE_COLOR
        fprintf(stderr, "find_rhs_post_color(%p, %d) -> ( %d, %x)\n",
                ins, index, info.reg, info.regcm);
 #endif
@@ -10720,7 +12535,7 @@ static struct reg_info find_lhs_color(
        struct compile_state *state, struct triple *ins, int index)
 {
        struct reg_info pre, post, info;
-#if 0
+#if DEBUG_TRIPLE_COLOR
        fprintf(stderr, "find_lhs_color(%p, %d)\n",
                ins, index);
 #endif
@@ -10736,9 +12551,10 @@ static struct reg_info find_lhs_color(
        if (info.reg == REG_UNSET) {
                info.reg = post.reg;
        }
-#if 0
-       fprintf(stderr, "find_lhs_color(%p, %d) -> ( %d, %x)\n",
-               ins, index, info.reg, info.regcm);
+#if DEBUG_TRIPLE_COLOR
+       fprintf(stderr, "find_lhs_color(%p, %d) -> ( %d, %x) ... (%d, %x) (%d, %x)\n",
+               ins, index, info.reg, info.regcm,
+               pre.reg, pre.regcm, post.reg, post.regcm);
 #endif
        return info;
 }
@@ -10773,22 +12589,38 @@ static struct triple *post_copy(struct compile_state *state, struct triple *ins)
        return out;
 }
 
-static struct triple *pre_copy(
-       struct compile_state *state, struct triple *ins, int index)
+static struct triple *typed_pre_copy(
+       struct compile_state *state, struct type *type, struct triple *ins, int index)
 {
        /* Carefully insert enough operations so that I can
         * enter any operation with a GPR32.
         */
        struct triple *in;
        struct triple **expr;
+       unsigned classes;
+       struct reg_info info;
+       if (ins->op == OP_PHI) {
+               internal_error(state, ins, "pre_copy on a phi?");
+       }
+       classes = arch_type_to_regcm(state, type);
+       info = arch_reg_rhs(state, ins, index);
        expr = &RHS(ins, index);
-       in = pre_triple(state, ins, OP_COPY, (*expr)->type, *expr, 0);
+       if ((info.regcm & classes) == 0) {
+               internal_error(state, ins, "pre_copy with no register classes");
+       }
+       in = pre_triple(state, ins, OP_COPY, type, *expr, 0);
        unuse_triple(*expr, ins);
        *expr = in;
        use_triple(RHS(in, 0), in);
        use_triple(in, ins);
        transform_to_arch_instruction(state, in);
        return in;
+       
+}
+static struct triple *pre_copy(
+       struct compile_state *state, struct triple *ins, int index)
+{
+       return typed_pre_copy(state, RHS(ins, index)->type, ins, index);
 }
 
 
@@ -10801,11 +12633,11 @@ static void insert_copies_to_phi(struct compile_state *state)
        struct triple *phi;
 
        /* Walk all of the operations to find the phi functions */
-       first = RHS(state->main_function, 0);
+       first = state->first;
        for(phi = first->next; phi != first ; phi = phi->next) {
                struct block_set *set;
                struct block *block;
-               struct triple **slot;
+               struct triple **slot, *copy;
                int edge;
                if (phi->op != OP_PHI) {
                        continue;
@@ -10813,6 +12645,13 @@ static void insert_copies_to_phi(struct compile_state *state)
                phi->id |= TRIPLE_FLAG_POST_SPLIT;
                block = phi->u.block;
                slot  = &RHS(phi, 0);
+               /* Phi's that feed into mandatory live range joins
+                * cause nasty complications.  Insert a copy of
+                * the phi value so I never have to deal with
+                * that in the rest of the code.
+                */
+               copy = post_copy(state, phi);
+               copy->id |= TRIPLE_FLAG_PRE_SPLIT;
                /* Walk all of the incoming edges/blocks and insert moves.
                 */
                for(edge = 0, set = block->use; set; set = set->next, edge++) {
@@ -10838,6 +12677,15 @@ static void insert_copies_to_phi(struct compile_state *state)
                        unuse_triple(val, phi);
                        use_triple(move, phi);
 
+                       /* Walk up the dominator tree until I have found the appropriate block */
+                       while(eblock && !tdominates(state, val, eblock->last)) {
+                               eblock = eblock->idom;
+                       }
+                       if (!eblock) {
+                               internal_error(state, phi, "Cannot find block dominated by %p",
+                                       val);
+                       }
+
                        /* Walk through the block backwards to find
                         * an appropriate location for the OP_COPY.
                         */
@@ -10865,6 +12713,7 @@ static void insert_copies_to_phi(struct compile_state *state)
                        transform_to_arch_instruction(state, move);
                }
        }
+       print_blocks(state, __func__, stdout);
 }
 
 struct triple_reg_set {
@@ -11102,16 +12951,12 @@ static struct reg_block *compute_variable_lifetimes(
                int i;
                change = 0;
                for(i = 1; i <= state->last_vertex; i++) {
+                       struct block_set *edge;
                        struct reg_block *rb;
                        rb = &blocks[i];
-                       /* Add the left successor's input set to in */
-                       if (rb->block->left) {
-                               change |= reg_in(state, blocks, rb, rb->block->left);
-                       }
-                       /* Add the right successor's input set to in */
-                       if ((rb->block->right) && 
-                               (rb->block->right != rb->block->left)) {
-                               change |= reg_in(state, blocks, rb, rb->block->right);
+                       /* Add the all successor's input set to in */
+                       for(edge = rb->block->edges; edge; edge = edge->next) {
+                               change |= reg_in(state, blocks, rb, edge->member);
                        }
                        /* Add use to in... */
                        change |= use_in(state, rb);
@@ -11221,7 +13066,7 @@ static int count_triples(struct compile_state *state)
 {
        struct triple *first, *ins;
        int triples = 0;
-       first = RHS(state->main_function, 0);
+       first = state->first;
        ins = first;
        do {
                triples++;
@@ -11229,11 +13074,13 @@ static int count_triples(struct compile_state *state)
        } while (ins != first);
        return triples;
 }
+
+
 struct dead_triple {
        struct triple *triple;
        struct dead_triple *work_next;
        struct block *block;
-       int color;
+       int old_id;
        int flags;
 #define TRIPLE_FLAG_ALIVE 1
 };
@@ -11258,7 +13105,7 @@ static void awaken(
                        triple->id);
        }
        if (triple->op == OP_NOOP) {
-               internal_warning(state, triple, "awakening noop?");
+               internal_error(state, triple, "awakening noop?");
                return;
        }
        dt = &dtriple[triple->id];
@@ -11276,13 +13123,18 @@ static void eliminate_inefectual_code(struct compile_state *state)
        struct block *block;
        struct dead_triple *dtriple, *work_list, **work_list_tail, *dt;
        int triples, i;
-       struct triple *first, *ins;
+       struct triple *first, *final, *ins;
+
+       if (!(state->compiler->flags & COMPILER_ELIMINATE_INEFECTUAL_CODE)) {
+               return;
+       }
 
        /* Setup the work list */
        work_list = 0;
        work_list_tail = &work_list;
 
-       first = RHS(state->main_function, 0);
+       first = state->first;
+       final = state->first->prev;
 
        /* Count how many triples I have */
        triples = count_triples(state);
@@ -11294,23 +13146,20 @@ static void eliminate_inefectual_code(struct compile_state *state)
        i = 1;
        block = 0;
        do {
-               if (ins->op == OP_LABEL) {
-                       block = ins->u.block;
-               }
                dtriple[i].triple = ins;
-               dtriple[i].block  = block;
+               dtriple[i].block  = block_of_triple(state, ins);
                dtriple[i].flags  = 0;
-               dtriple[i].color  = ins->id;
+               dtriple[i].old_id = ins->id;
                ins->id = i;
                /* See if it is an operation we always keep */
-#warning "FIXME handle the case of killing a branch instruction"
-               if (!triple_is_pure(state, ins) || triple_is_branch(state, ins)) {
+               if (!triple_is_pure(state, ins, dtriple[i].old_id)) {
                        awaken(state, dtriple, &ins, &work_list_tail);
                }
                i++;
                ins = ins->next;
        } while(ins != first);
        while(work_list) {
+               struct block *block;
                struct dead_triple *dt;
                struct block_set *user;
                struct triple **expr;
@@ -11319,6 +13168,13 @@ static void eliminate_inefectual_code(struct compile_state *state)
                if (!work_list) {
                        work_list_tail = &work_list;
                }
+               /* Make certain the block the current instruction is in lives */
+               block = block_of_triple(state, dt->triple);
+               awaken(state, dtriple, &block->first, &work_list_tail);
+               if (triple_is_branch(state, block->last)) {
+                       awaken(state, dtriple, &block->last, &work_list_tail);
+               }
+
                /* Wake up the data depencencies of this triple */
                expr = 0;
                do {
@@ -11340,7 +13196,13 @@ static void eliminate_inefectual_code(struct compile_state *state)
                } while(expr);
                /* Wake up the reverse control dependencies of this triple */
                for(user = dt->block->ipdomfrontier; user; user = user->next) {
-                       awaken(state, dtriple, &user->member->last, &work_list_tail);
+                       struct triple *last;
+                       last = user->member->last;
+                       while((last->op == OP_NOOP) && (last != user->member->first)) {
+                               internal_warning(state, last, "awakening noop?");
+                               last = last->prev;
+                       }
+                       awaken(state, dtriple, &last, &work_list_tail);
                }
        }
        for(dt = &dtriple[1]; dt <= &dtriple[triples]; dt++) {
@@ -11348,19 +13210,16 @@ static void eliminate_inefectual_code(struct compile_state *state)
                        (dt->flags & TRIPLE_FLAG_ALIVE)) {
                        internal_error(state, dt->triple, "noop effective?");
                }
-               dt->triple->id = dt->color;     /* Restore the color */
+               dt->triple->id = dt->old_id;    /* Restore the color */
                if (!(dt->flags & TRIPLE_FLAG_ALIVE)) {
-#warning "FIXME handle the case of killing a basic block"
-                       if (dt->block->first == dt->triple) {
-                               continue;
-                       }
-                       if (dt->block->last == dt->triple) {
-                               dt->block->last = dt->triple->prev;
-                       }
                        release_triple(state, dt->triple);
                }
        }
        xfree(dtriple);
+
+       rebuild_ssa_form(state);
+
+       print_blocks(state, __func__, stdout);
 }
 
 
@@ -11375,7 +13234,7 @@ static void insert_mandatory_copies(struct compile_state *state)
         * are inserting copies before instructions but that
         * case should be rare.
         */
-       first = RHS(state->main_function, 0);
+       first = state->first;
        ins = first;
        do {
                struct triple_set *entry, *next;
@@ -11425,6 +13284,14 @@ static void insert_mandatory_copies(struct compile_state *state)
                        if (regcm == 0) {
                                do_pre_copy = 1;
                        }
+                       /* Always use pre_copies for constants.
+                        * They do not take up any registers until a
+                        * copy places them in one.
+                        */
+                       if ((info.reg == REG_UNNEEDED) && 
+                               (rinfo.reg != REG_UNNEEDED)) {
+                               do_pre_copy = 1;
+                       }
                }
                do_post_copy =
                        !do_pre_copy &&
@@ -11435,7 +13302,7 @@ static void insert_mandatory_copies(struct compile_state *state)
 
                reg = info.reg;
                regcm = info.regcm;
-               /* Walk through the uses of insert and do a pre_copy or see if a post_copy is warranted */
+               /* Walk through the uses of ins and do a pre_copy or see if a post_copy is warranted */
                for(entry = ins->use; entry; entry = next) {
                        struct reg_info rinfo;
                        int i;
@@ -11508,6 +13375,8 @@ static void insert_mandatory_copies(struct compile_state *state)
        next:
                ins = ins->next;
        } while(ins != first);
+
+       print_blocks(state, __func__, stdout);
 }
 
 
@@ -11559,10 +13428,145 @@ struct reg_state {
        unsigned defs;
        unsigned ranges;
        int passes, max_passes;
-#define MAX_ALLOCATION_PASSES 100
 };
 
 
+
+struct print_interference_block_info {
+       struct reg_state *rstate;
+       FILE *fp;
+       int need_edges;
+};
+static void print_interference_block(
+       struct compile_state *state, struct block *block, void *arg)
+
+{
+       struct print_interference_block_info *info = arg;
+       struct reg_state *rstate = info->rstate;
+       struct block_set *edge;
+       FILE *fp = info->fp;
+       struct reg_block *rb;
+       struct triple *ptr;
+       int phi_present;
+       int done;
+       rb = &rstate->blocks[block->vertex];
+
+       fprintf(fp, "\nblock: %p (%d),",
+               block,  block->vertex);
+       for(edge = block->edges; edge; edge = edge->next) {
+               fprintf(fp, " %p<-%p",
+                       edge->member, 
+                       edge->member && edge->member->use?edge->member->use->member : 0);
+       }
+       fprintf(fp, "\n");
+       if (rb->in) {
+               struct triple_reg_set *in_set;
+               fprintf(fp, "        in:");
+               for(in_set = rb->in; in_set; in_set = in_set->next) {
+                       fprintf(fp, " %-10p", in_set->member);
+               }
+               fprintf(fp, "\n");
+       }
+       phi_present = 0;
+       for(done = 0, ptr = block->first; !done; ptr = ptr->next) {
+               done = (ptr == block->last);
+               if (ptr->op == OP_PHI) {
+                       phi_present = 1;
+                       break;
+               }
+       }
+       if (phi_present) {
+               int edge;
+               for(edge = 0; edge < block->users; edge++) {
+                       fprintf(fp, "     in(%d):", edge);
+                       for(done = 0, ptr = block->first; !done; ptr = ptr->next) {
+                               struct triple **slot;
+                               done = (ptr == block->last);
+                               if (ptr->op != OP_PHI) {
+                                       continue;
+                               }
+                               slot = &RHS(ptr, 0);
+                               fprintf(fp, " %-10p", slot[edge]);
+                       }
+                       fprintf(fp, "\n");
+               }
+       }
+       if (block->first->op == OP_LABEL) {
+               fprintf(fp, "%p:\n", block->first);
+       }
+       for(done = 0, ptr = block->first; !done; ptr = ptr->next) {
+               struct live_range *lr;
+               unsigned id;
+               int op;
+               op = ptr->op;
+               done = (ptr == block->last);
+               lr = rstate->lrd[ptr->id].lr;
+               
+               id = ptr->id;
+               ptr->id = rstate->lrd[id].orig_id;
+               SET_REG(ptr->id, lr->color);
+               display_triple(fp, ptr);
+               ptr->id = id;
+
+               if (triple_is_def(state, ptr) && (lr->defs == 0)) {
+                       internal_error(state, ptr, "lr has no defs!");
+               }
+               if (info->need_edges) {
+                       if (lr->defs) {
+                               struct live_range_def *lrd;
+                               fprintf(fp, "       range:");
+                               lrd = lr->defs;
+                               do {
+                                       fprintf(fp, " %-10p", lrd->def);
+                                       lrd = lrd->next;
+                               } while(lrd != lr->defs);
+                               fprintf(fp, "\n");
+                       }
+                       if (lr->edges > 0) {
+                               struct live_range_edge *edge;
+                               fprintf(fp, "       edges:");
+                               for(edge = lr->edges; edge; edge = edge->next) {
+                                       struct live_range_def *lrd;
+                                       lrd = edge->node->defs;
+                                       do {
+                                               fprintf(fp, " %-10p", lrd->def);
+                                               lrd = lrd->next;
+                                       } while(lrd != edge->node->defs);
+                                       fprintf(fp, "|");
+                               }
+                               fprintf(fp, "\n");
+                       }
+               }
+               /* Do a bunch of sanity checks */
+               valid_ins(state, ptr);
+               if ((ptr->id < 0) || (ptr->id > rstate->defs)) {
+                       internal_error(state, ptr, "Invalid triple id: %d",
+                               ptr->id);
+               }
+       }
+       if (rb->out) {
+               struct triple_reg_set *out_set;
+               fprintf(fp, "       out:");
+               for(out_set = rb->out; out_set; out_set = out_set->next) {
+                       fprintf(fp, " %-10p", out_set->member);
+               }
+               fprintf(fp, "\n");
+       }
+       fprintf(fp, "\n");
+}
+
+static void print_interference_blocks(
+       struct compile_state *state, struct reg_state *rstate, FILE *fp, int need_edges)
+{
+       struct print_interference_block_info info;
+       info.rstate = rstate;
+       info.fp = fp;
+       info.need_edges = need_edges;
+       fprintf(fp, "\nlive variables by block\n");
+       walk_blocks(state, print_interference_block, &info);
+
+}
+
 static unsigned regc_max_size(struct compile_state *state, int classes)
 {
        unsigned max_size;
@@ -11772,6 +13776,19 @@ static void remove_live_edges(struct reg_state *rstate, struct live_range *range
        }
 }
 
+static void transfer_live_edges(struct reg_state *rstate, 
+       struct live_range *dest, struct live_range *src)
+{
+       struct live_range_edge *edge, *next;
+       for(edge = src->edges; edge; edge = next) {
+               struct live_range *other;
+               next = edge->next;
+               other = edge->node;
+               remove_live_edge(rstate, src, other);
+               add_live_edge(rstate, dest, other);
+       }
+}
+
 
 /* Interference graph...
  * 
@@ -11860,6 +13877,21 @@ static struct live_range *coalesce_ranges(
                internal_error(state, lr1->defs->def,
                        "cannot coalesce live ranges with dissimilar register classes");
        }
+       if (state->compiler->debug & DEBUG_COALESCING) {
+               fprintf(stderr, "coalescing:");
+               lrd = lr1->defs;
+               do {
+                       fprintf(stderr, " %p", lrd->def);
+                       lrd = lrd->next;
+               } while(lrd != lr1->defs);
+               fprintf(stderr, " |");
+               lrd = lr2->defs;
+               do {
+                       fprintf(stderr, " %p", lrd->def);
+                       lrd = lrd->next;
+               } while(lrd != lr2->defs);
+               fprintf(stderr, "\n");
+       }
        /* If there is a clear dominate live range put it in lr1,
         * For purposes of this test phi functions are
         * considered dominated by the definitions that feed into
@@ -11887,7 +13919,7 @@ static struct live_range *coalesce_ranges(
                fprintf(stderr, "lr2 pre\n");
        }
 #endif
-#if 1
+#if 0
        fprintf(stderr, "coalesce color1(%p): %3d color2(%p) %3d\n",
                lr1->defs->def,
                lr1->color,
@@ -11895,9 +13927,14 @@ static struct live_range *coalesce_ranges(
                lr2->color);
 #endif
        
-       lr1->classes = classes;
        /* Append lr2 onto lr1 */
 #warning "FIXME should this be a merge instead of a splice?"
+       /* This FIXME item applies to the correctness of live_range_end 
+        * and to the necessity of making multiple passes of coalesce_live_ranges.
+        * A failure to find some coalesce opportunities in coaleace_live_ranges
+        * does not impact the correct of the compiler just the efficiency with
+        * which registers are allocated.
+        */
        head = lr1->defs;
        mid1 = lr1->defs->prev;
        mid2 = lr2->defs;
@@ -11928,6 +13965,9 @@ static struct live_range *coalesce_ranges(
        lr1->color   = color;
        lr1->classes = classes;
 
+       /* Keep the graph in sync by transfering the edges from lr2 to lr1 */
+       transfer_live_edges(rstate, lr1, lr2);
+
        return lr1;
 }
 
@@ -11969,14 +14009,14 @@ static void initialize_live_ranges(
        size_t count, size;
        int i, j;
 
-       first = RHS(state->main_function, 0);
+       first = state->first;
        /* First count how many instructions I have.
         */
        count = count_triples(state);
        /* Potentially I need one live range definitions for each
-        * instruction, plus an extra for the split routines.
+        * instruction.
         */
-       rstate->defs = count + 1;
+       rstate->defs = count;
        /* Potentially I need one live range for each instruction
         * plus an extra for the dummy live range.
         */
@@ -11998,7 +14038,6 @@ static void initialize_live_ranges(
                        struct reg_info info;
                        /* Find the architecture specific color information */
                        info = find_def_color(state, ins);
-
                        i++;
                        rstate->lr[i].defs    = &rstate->lrd[j];
                        rstate->lr[i].color   = info.reg;
@@ -12022,7 +14061,6 @@ static void initialize_live_ranges(
                ins = ins->next;
        } while(ins != first);
        rstate->ranges = i;
-       rstate->defs -= 1;
 
        /* Make a second pass to handle achitecture specific register
         * constraints.
@@ -12040,7 +14078,12 @@ static void initialize_live_ranges(
                        zlhs = 1;
                }
                zrhs = TRIPLE_RHS(ins->sizes);
-               
+
+               if (state->compiler->debug & DEBUG_COALESCING2) {
+                       fprintf(stderr, "mandatory coalesce: %p %d %d\n",
+                               ins, zlhs, zrhs);
+               }
+
                for(i = 0; i < zlhs; i++) {
                        struct reg_info linfo;
                        struct live_range_def *lhs;
@@ -12053,6 +14096,12 @@ static void initialize_live_ranges(
                        } else {
                                lhs = &rstate->lrd[LHS(ins, i)->id];
                        }
+
+                       if (state->compiler->debug & DEBUG_COALESCING2) {
+                               fprintf(stderr, "coalesce lhs(%d): %p %d\n",
+                                       i, lhs, linfo.reg);
+                       }
+
                        for(j = 0; j < zrhs; j++) {
                                struct reg_info rinfo;
                                struct live_range_def *rhs;
@@ -12060,7 +14109,13 @@ static void initialize_live_ranges(
                                if (rinfo.reg < MAX_REGISTERS) {
                                        continue;
                                }
-                               rhs = &rstate->lrd[RHS(ins, i)->id];
+                               rhs = &rstate->lrd[RHS(ins, j)->id];
+
+                               if (state->compiler->debug & DEBUG_COALESCING2) {
+                                       fprintf(stderr, "coalesce rhs(%d): %p %d\n",
+                                               j, rhs, rinfo.reg);
+                               }
+
                                if (rinfo.reg == linfo.reg) {
                                        coalesce_ranges(state, rstate, 
                                                lhs->lr, rhs->lr);
@@ -12352,6 +14407,7 @@ static void fix_coalesce_conflicts(struct compile_state *state,
        struct reg_block *blocks, struct triple_reg_set *live,
        struct reg_block *rb, struct triple *ins, void *arg)
 {
+       int *conflicts = arg;
        int zlhs, zrhs, i, j;
 
        /* See if we have a mandatory coalesce operation between
@@ -12391,12 +14447,22 @@ static void fix_coalesce_conflicts(struct compile_state *state,
                                struct triple *copy;
                                copy = pre_copy(state, ins, j);
                                copy->id |= TRIPLE_FLAG_PRE_SPLIT;
+                               (*conflicts)++;
                        }
                }
        }
        return;
 }
 
+static int correct_coalesce_conflicts(
+       struct compile_state *state, struct reg_block *blocks)
+{
+       int conflicts;
+       conflicts = 0;
+       walk_variable_lifetimes(state, blocks, fix_coalesce_conflicts, &conflicts);
+       return conflicts;
+}
+
 static void replace_set_use(struct compile_state *state,
        struct triple_reg_set *head, struct triple *orig, struct triple *new)
 {
@@ -12424,7 +14490,7 @@ static void replace_block_use(struct compile_state *state,
 static void color_instructions(struct compile_state *state)
 {
        struct triple *ins, *first;
-       first = RHS(state->main_function, 0);
+       first = state->first;
        ins = first;
        do {
                if (triple_is_def(state, ins)) {
@@ -12501,448 +14567,240 @@ static struct triple *resolve_tangle(
 }
 
 
-static void fix_tangles(struct compile_state *state,
-       struct reg_block *blocks, struct triple_reg_set *live,
-       struct reg_block *rb, struct triple *ins, void *arg)
-{
-       struct triple *tangle;
-       do {
-               char used[MAX_REGISTERS];
-               struct triple_reg_set *set;
-               tangle = 0;
-
-               /* Find out which registers have multiple uses at this point */
-               memset(used, 0, sizeof(used));
-               for(set = live; set; set = set->next) {
-                       struct reg_info info;
-                       info = read_lhs_color(state, set->member, 0);
-                       if (info.reg == REG_UNSET) {
-                               continue;
-                       }
-                       reg_inc_used(state, used, info.reg);
-               }
-               
-               /* Now find the least dominated definition of a register in
-                * conflict I have seen so far.
-                */
-               for(set = live; set; set = set->next) {
-                       struct reg_info info;
-                       info = read_lhs_color(state, set->member, 0);
-                       if (used[info.reg] < 2) {
-                               continue;
-                       }
-                       if (!tangle || tdominates(state, set->member, tangle)) {
-                               tangle = set->member;
-                       }
-               }
-               /* If I have found a tangle resolve it */
-               if (tangle) {
-                       struct triple *post_copy;
-                       post_copy = resolve_tangle(state, tangle);
-                       if (post_copy) {
-                               replace_block_use(state, blocks, tangle, post_copy);
-                       }
-                       if (post_copy && (tangle != ins)) {
-                               replace_set_use(state, live, tangle, post_copy);
-                       }
-               }
-       } while(tangle);
-       return;
-}
-
-static void correct_tangles(
-       struct compile_state *state, struct reg_block *blocks)
-{
-       color_instructions(state);
-       walk_variable_lifetimes(state, blocks, fix_tangles, 0);
-}
-
-struct least_conflict {
-       struct reg_state *rstate;
-       struct live_range *ref_range;
-       struct triple *ins;
-       struct triple_reg_set *live;
-       size_t count;
-       int constraints;
-};
-static void least_conflict(struct compile_state *state,
-       struct reg_block *blocks, struct triple_reg_set *live,
-       struct reg_block *rb, struct triple *ins, void *arg)
-{
-       struct least_conflict *conflict = arg;
-       struct live_range_edge *edge;
-       struct triple_reg_set *set;
-       size_t count;
-       int constraints;
-
-#warning "FIXME handle instructions with left hand sides..."
-       /* Only instructions that introduce a new definition
-        * can be the conflict instruction.
-        */
-       if (!triple_is_def(state, ins)) {
-               return;
-       }
-
-       /* See if live ranges at this instruction are a
-        * strict subset of the live ranges that are in conflict.
-        */
-       count = 0;
-       for(set = live; set; set = set->next) {
-               struct live_range *lr;
-               lr = conflict->rstate->lrd[set->member->id].lr;
-               /* Ignore it if there cannot be an edge between these two nodes */
-               if (!arch_regcm_intersect(conflict->ref_range->classes, lr->classes)) {
-                       continue;
-               }
-               for(edge = conflict->ref_range->edges; edge; edge = edge->next) {
-                       if (edge->node == lr) {
-                               break;
-                       }
-               }
-               if (!edge && (lr != conflict->ref_range)) {
-                       return;
-               }
-               count++;
-       }
-       if (count <= 1) {
-               return;
-       }
-
-#if 0
-       /* See if there is an uncolored member in this subset. 
-        */
-        for(set = live; set; set = set->next) {
-               struct live_range *lr;
-               lr = conflict->rstate->lrd[set->member->id].lr;
-               if (lr->color == REG_UNSET) {
-                       break;
-               }
-       }
-       if (!set && (conflict->ref_range != REG_UNSET)) {
-               return;
-       }
-#endif
-
-       /* See if any of the live registers are constrained,
-        * if not it won't be productive to pick this as
-        * a conflict instruction.
-        */
-       constraints = 0;
-       for(set = live; set; set = set->next) {
-               struct triple_set *uset;
-               struct reg_info info;
-               unsigned classes;
-               unsigned cur_size, size;
-               /* Skip this instruction */
-               if (set->member == ins) {
-                       continue;
-               }
-               /* Find how many registers this value can potentially 
-                * be assigned to.
-                */
-               classes = arch_type_to_regcm(state, set->member->type);
-               size = regc_max_size(state, classes);
-               
-               /* Find how many registers we allow this value to
-                * be assigned to.
-                */
-               info = arch_reg_lhs(state, set->member, 0);
-               
-               /* If the value does not live in a register it
-                * isn't constrained.
-                */
-               if (info.reg == REG_UNNEEDED) {
-                       continue;
-               }
-               
-               if ((info.reg == REG_UNSET) || (info.reg >= MAX_REGISTERS)) {
-                       cur_size = regc_max_size(state, info.regcm);
-               } else {
-                       cur_size = 1;
-               }
-
-               /* If there is no difference between potential and
-                * actual register count there is not a constraint
-                */
-               if (cur_size >= size) {
-                       continue;
-               }
-               
-               /* If this live_range feeds into conflict->inds
-                * it isn't a constraint we can relieve.
-                */
-               for(uset = set->member->use; uset; uset = uset->next) {
-                       if (uset->member == ins) {
-                               break;
-                       }
-               }
-               if (uset) {
-                       continue;
-               }
-               constraints = 1;
-               break;
-       }
-       /* Don't drop canidates with constraints */
-       if (conflict->constraints && !constraints) {
-               return;
-       }
-
-
-#if 0
-       fprintf(stderr, "conflict ins? %p %s count: %d constraints: %d\n",
-               ins, tops(ins->op), count, constraints);
-#endif
-       /* Find the instruction with the largest possible subset of
-        * conflict ranges and that dominates any other instruction
-        * with an equal sized set of conflicting ranges.
-        */
-       if ((count > conflict->count) ||
-               ((count == conflict->count) &&
-                       tdominates(state, ins, conflict->ins))) {
-               struct triple_reg_set *next;
-               /* Remember the canidate instruction */
-               conflict->ins = ins;
-               conflict->count = count;
-               conflict->constraints = constraints;
-               /* Free the old collection of live registers */
-               for(set = conflict->live; set; set = next) {
-                       next = set->next;
-                       do_triple_unset(&conflict->live, set->member);
-               }
-               conflict->live = 0;
-               /* Rember the registers that are alive but do not feed
-                * into or out of conflict->ins.
+static void fix_tangles(struct compile_state *state,
+       struct reg_block *blocks, struct triple_reg_set *live,
+       struct reg_block *rb, struct triple *ins, void *arg)
+{
+       int *tangles = arg;
+       struct triple *tangle;
+       do {
+               char used[MAX_REGISTERS];
+               struct triple_reg_set *set;
+               tangle = 0;
+
+               /* Find out which registers have multiple uses at this point */
+               memset(used, 0, sizeof(used));
+               for(set = live; set; set = set->next) {
+                       struct reg_info info;
+                       info = read_lhs_color(state, set->member, 0);
+                       if (info.reg == REG_UNSET) {
+                               continue;
+                       }
+                       reg_inc_used(state, used, info.reg);
+               }
+               
+               /* Now find the least dominated definition of a register in
+                * conflict I have seen so far.
                 */
                for(set = live; set; set = set->next) {
-                       struct triple **expr;
-                       if (set->member == ins) {
-                               goto next;
+                       struct reg_info info;
+                       info = read_lhs_color(state, set->member, 0);
+                       if (used[info.reg] < 2) {
+                               continue;
                        }
-                       expr = triple_rhs(state, ins, 0);
-                       for(;expr; expr = triple_rhs(state, ins, expr)) {
-                               if (*expr == set->member) {
-                                       goto next;
-                               }
+                       /* Changing copies that feed into phi functions
+                        * is incorrect.
+                        */
+                       if (set->member->use && 
+                               (set->member->use->member->op == OP_PHI)) {
+                               continue;
                        }
-                       expr = triple_lhs(state, ins, 0);
-                       for(; expr; expr = triple_lhs(state, ins, expr)) {
-                               if (*expr == set->member) {
-                                       goto next;
-                               }
+                       if (!tangle || tdominates(state, set->member, tangle)) {
+                               tangle = set->member;
                        }
-                       do_triple_set(&conflict->live, set->member, set->new);
-               next:
-                       ;
                }
-       }
+               /* If I have found a tangle resolve it */
+               if (tangle) {
+                       struct triple *post_copy;
+                       (*tangles)++;
+                       post_copy = resolve_tangle(state, tangle);
+                       if (post_copy) {
+                               replace_block_use(state, blocks, tangle, post_copy);
+                       }
+                       if (post_copy && (tangle != ins)) {
+                               replace_set_use(state, live, tangle, post_copy);
+                       }
+               }
+       } while(tangle);
        return;
 }
 
-static void find_range_conflict(struct compile_state *state,
-       struct reg_state *rstate, char *used, struct live_range *ref_range,
-       struct least_conflict *conflict)
+static int correct_tangles(
+       struct compile_state *state, struct reg_block *blocks)
 {
+       int tangles;
+       tangles = 0;
+       color_instructions(state);
+       walk_variable_lifetimes(state, blocks, fix_tangles, &tangles);
+       return tangles;
+}
 
-       /* there are 3 kinds ways conflicts can occure.
-        * 1) the life time of 2 values simply overlap.
-        * 2) the 2 values feed into the same instruction.
-        * 3) the 2 values feed into a phi function.
-        */
-
-       /* find the instruction where the problematic conflict comes
-        * into existance.  that the instruction where all of
-        * the values are alive, and among such instructions it is
-        * the least dominated one.
-        *
-        * a value is alive an an instruction if either;
-        * 1) the value defintion dominates the instruction and there
-        *    is a use at or after that instrction
-        * 2) the value definition feeds into a phi function in the
-        *    same block as the instruction.  and the phi function
-        *    is at or after the instruction.
-        */
-       memset(conflict, 0, sizeof(*conflict));
-       conflict->rstate      = rstate;
-       conflict->ref_range   = ref_range;
-       conflict->ins         = 0;
-       conflict->live        = 0;
-       conflict->count       = 0;
-       conflict->constraints = 0;
-       walk_variable_lifetimes(state, rstate->blocks, least_conflict, conflict);
 
-       if (!conflict->ins) {
-               internal_error(state, ref_range->defs->def, "No conflict ins?");
-       }
-       if (!conflict->live) {
-               internal_error(state, ref_range->defs->def, "No conflict live?");
-       }
-#if 0
-       fprintf(stderr, "conflict ins: %p %s count: %d constraints: %d\n", 
-               conflict->ins, tops(conflict->ins->op),
-               conflict->count, conflict->constraints);
-#endif
-       return;
-}
+static void ids_from_rstate(struct compile_state *state, struct reg_state *rstate);
+static void cleanup_rstate(struct compile_state *state, struct reg_state *rstate);
 
-static struct triple *split_constrained_range(struct compile_state *state, 
-       struct reg_state *rstate, char *used, struct least_conflict *conflict)
+struct triple *find_constrained_def(
+       struct compile_state *state, struct live_range *range, struct triple *constrained)
 {
-       unsigned constrained_size;
-       struct triple *new, *constrained;
-       struct triple_reg_set *cset;
-       /* Find a range that is having problems because it is
-        * artificially constrained.
-        */
-       constrained_size = ~0;
-       constrained = 0;
-       new = 0;
-       for(cset = conflict->live; cset; cset = cset->next) {
-               struct triple_set *set;
+       struct live_range_def *lrd, *lrd_next;
+       lrd_next = range->defs;
+       do {
                struct reg_info info;
-               unsigned classes;
-               unsigned cur_size, size;
-               /* Skip the live range that starts with conflict->ins */
-               if (cset->member == conflict->ins) {
-                       continue;
-               }
-               /* Find how many registers this value can potentially
-                * be assigned to.
-                */
-               classes = arch_type_to_regcm(state, cset->member->type);
-               size = regc_max_size(state, classes);
+               unsigned regcm;
 
-               /* Find how many registers we allow this value to
-                * be assigned to.
-                */
-               info = arch_reg_lhs(state, cset->member, 0);
+               lrd = lrd_next;
+               lrd_next = lrd->next;
 
-               /* If the register doesn't need a register 
-                * splitting it can't help.
+               regcm = arch_type_to_regcm(state, lrd->def->type);
+               info = find_lhs_color(state, lrd->def, 0);
+               regcm      = arch_regcm_reg_normalize(state, regcm);
+               info.regcm = arch_regcm_reg_normalize(state, info.regcm);
+               /* If the 2 register class masks are equal then
+                * the current register class is not constrained.
                 */
-               if (info.reg == REG_UNNEEDED) {
+               if (regcm == info.regcm) {
                        continue;
                }
-#warning "FIXME do I need a call to arch_reg_rhs around here somewhere?"
-               if ((info.reg == REG_UNSET) || (info.reg >= MAX_REGISTERS)) {
-                       cur_size = regc_max_size(state, info.regcm);
-               } else {
-                       cur_size = 1;
-               }
-               /* If this live_range feeds into conflict->ins
-                * splitting it is unlikely to help.
+               
+               /* If there is just one use.
+                * That use cannot accept a larger register class.
+                * There are no intervening definitions except
+                * definitions that feed into that use.
+                * Then a triple is not constrained.
+                * FIXME handle this case!
                 */
-               for(set = cset->member->use; set; set = set->next) {
-                       if (set->member == conflict->ins) {
-                               goto next;
-                       }
-               }
+#warning "FIXME ignore cases that cannot be fixed (a definition followed by a use)"
+               
 
-               /* If there is no difference between potential and
-                * actual register count there is nothing to do.
+               /* Of the constrained live ranges deal with the
+                * least dominated one first.
                 */
-               if (cur_size >= size) {
-                       continue;
+               if (state->compiler->debug & DEBUG_RANGE_CONFLICTS) {
+                       fprintf(stderr, "canidate: %p %-8s regcm: %x %x\n",
+                               lrd->def, tops(lrd->def->op), regcm, info.regcm);
                }
-               /* Of the constrained registers deal with the
-                * most constrained one first.
-                */
-               if (!constrained ||
-                       (size < constrained_size)) {
-                       constrained = cset->member;
-                       constrained_size = size;
+               if (!constrained || 
+                       tdominates(state, lrd->def, constrained))
+               {
+                       constrained = lrd->def;
                }
-       next:
-               ;
+       } while(lrd_next != range->defs);
+       return constrained;
+}
+
+static int split_constrained_ranges(
+       struct compile_state *state, struct reg_state *rstate, 
+       struct live_range *range)
+{
+       /* Walk through the edges in conflict and our current live
+        * range, and find definitions that are more severly constrained
+        * than they type of data they contain require.
+        * 
+        * Then pick one of those ranges and relax the constraints.
+        */
+       struct live_range_edge *edge;
+       struct triple *constrained;
+
+       constrained = 0;
+       for(edge = range->edges; edge; edge = edge->next) {
+               constrained = find_constrained_def(state, edge->node, constrained);
+       }
+#warning "FIXME should I call find_constrained_def here only if no previous constrained def was found?"
+       if (!constrained) {
+               constrained = find_constrained_def(state, range, constrained);
+       }
+
+       if (state->compiler->debug & DEBUG_RANGE_CONFLICTS) {
+               fprintf(stderr, "constrained: %p %-8s\n",
+                       constrained, tops(constrained->op));
        }
        if (constrained) {
-               new = post_copy(state, constrained);
-               new->id |= TRIPLE_FLAG_POST_SPLIT;
+               ids_from_rstate(state, rstate);
+               cleanup_rstate(state, rstate);
+               resolve_tangle(state, constrained);
        }
-       return new;
+       return !!constrained;
 }
-
+       
 static int split_ranges(
-       struct compile_state *state, struct reg_state *rstate, 
+       struct compile_state *state, struct reg_state *rstate,
        char *used, struct live_range *range)
 {
-       struct triple *new;
-
-#if 0
-       fprintf(stderr, "split_ranges %d %s %p\n", 
-               rstate->passes, tops(range->defs->def->op), range->defs->def);
-#endif
+       int split;
+       if (state->compiler->debug & DEBUG_RANGE_CONFLICTS) {
+               fprintf(stderr, "split_ranges %d %s %p\n", 
+                       rstate->passes, tops(range->defs->def->op), range->defs->def);
+       }
        if ((range->color == REG_UNNEEDED) ||
                (rstate->passes >= rstate->max_passes)) {
                return 0;
        }
-       new = 0;
-       /* If I can't allocate a register something needs to be split */
-       if (arch_select_free_register(state, used, range->classes) == REG_UNSET) {
-               struct least_conflict conflict;
-
-#if 0
-       fprintf(stderr, "find_range_conflict\n");
-#endif
-               /* Find where in the set of registers the conflict
-                * actually occurs.
-                */
-               find_range_conflict(state, rstate, used, range, &conflict);
+       split = split_constrained_ranges(state, rstate, range);
 
-               /* If a range has been artifically constrained split it */
-               new = split_constrained_range(state, rstate, used, &conflict);
-               
-               if (!new) {
-               /* Ideally I would split the live range that will not be used
-                * for the longest period of time in hopes that this will 
-                * (a) allow me to spill a register or
-                * (b) allow me to place a value in another register.
-                *
-                * So far I don't have a test case for this, the resolving
-                * of mandatory constraints has solved all of my
-                * know issues.  So I have choosen not to write any
-                * code until I cat get a better feel for cases where
-                * it would be useful to have.
-                *
-                */
+       /* Ideally I would split the live range that will not be used
+        * for the longest period of time in hopes that this will 
+        * (a) allow me to spill a register or
+        * (b) allow me to place a value in another register.
+        *
+        * So far I don't have a test case for this, the resolving
+        * of mandatory constraints has solved all of my
+        * know issues.  So I have choosen not to write any
+        * code until I cat get a better feel for cases where
+        * it would be useful to have.
+        *
+        */
 #warning "WISHLIST implement live range splitting..."
-#if 0
-                       print_blocks(state, stderr);
-                       print_dominators(state, stderr);
+       
+       if (!split && (state->compiler->debug & DEBUG_RANGE_CONFLICTS2)) {
+               print_interference_blocks(state, rstate, stderr, 0);
+               print_dominators(state, stderr);
+       }
+       return split;
+}
 
-#endif
-                       return 0;
-               }
+static FILE *cgdebug_fp(struct compile_state *state)
+{
+       FILE *fp;
+       fp = 0;
+       if (!fp && (state->compiler->debug & DEBUG_COLOR_GRAPH2)) {
+               fp = stderr;
        }
-       if (new) {
-               rstate->lrd[rstate->defs].orig_id = new->id;
-               new->id = rstate->defs;
-               rstate->defs++;
-#if 0
-               fprintf(stderr, "new: %p old: %s %p\n", 
-                       new, tops(RHS(new, 0)->op), RHS(new, 0));
-#endif
-#if 0
-               print_blocks(state, stderr);
-               print_dominators(state, stderr);
+       if (!fp && (state->compiler->debug & DEBUG_COLOR_GRAPH)) {
+               fp = stdout;
+       }
+       return fp;
+}
 
-#endif
-               return 1;
+static void cgdebug_printf(struct compile_state *state, const char *fmt, ...)
+{
+       FILE *fp;
+       fp = cgdebug_fp(state);
+       if (fp) {
+               va_list args;
+               va_start(args, fmt);
+               vfprintf(fp, fmt, args);
+               va_end(args);
        }
-       return 0;
 }
 
-#if DEBUG_COLOR_GRAPH > 1
-#define cgdebug_printf(...) fprintf(stdout, __VA_ARGS__)
-#define cgdebug_flush() fflush(stdout)
-#elif DEBUG_COLOR_GRAPH == 1
-#define cgdebug_printf(...) fprintf(stderr, __VA_ARGS__)
-#define cgdebug_flush() fflush(stderr)
-#else
-#define cgdebug_printf(...)
-#define cgdebug_flush()
-#endif
+static void cgdebug_flush(struct compile_state *state)
+{
+       FILE *fp;
+       fp = cgdebug_fp(state);
+       if (fp) {
+               fflush(fp);
+       }
+}
+
+static void cgdebug_loc(struct compile_state *state, struct triple *ins)
+{
+       FILE *fp;
+       fp = cgdebug_fp(state);
+       if (fp) {
+               loc(fp, state, ins);
+       }
+}
 
-       
 static int select_free_color(struct compile_state *state, 
        struct reg_state *rstate, struct live_range *range)
 {
@@ -12966,26 +14824,24 @@ static int select_free_color(struct compile_state *state,
                }
                reg_fill_used(state, used, edge->node->color);
        }
-#if DEBUG_COLOR_GRAPH > 1
-       {
+
+       if (state->compiler->debug & DEBUG_COLOR_GRAPH2) {
                int i;
                i = 0;
                for(edge = range->edges; edge; edge = edge->next) {
                        i++;
                }
-               cgdebug_printf("\n%s edges: %d @%s:%d.%d\n", 
-                       tops(range->def->op), i, 
-                       range->def->filename, range->def->line, range->def->col);
+               cgdebug_printf(state, "\n%s edges: %d", 
+                       tops(range->defs->def->op), i);
+               cgdebug_loc(state, range->defs->def);
+               cgdebug_printf(state, "\n");
                for(i = 0; i < MAX_REGISTERS; i++) {
                        if (used[i]) {
-                               cgdebug_printf("used: %s\n",
+                               cgdebug_printf(state, "used: %s\n",
                                        arch_reg_str(i));
                        }
                }
        }       
-#endif
-
-#warning "FIXME detect conflicts caused by the source and destination being the same register"
 
        /* If a color is already assigned see if it will work */
        if (range->color != REG_UNSET) {
@@ -13027,6 +14883,7 @@ static int select_free_color(struct compile_state *state,
                entry = lrd->def->use;
                for(;(range->color == REG_UNSET) && entry; entry = entry->next) {
                        struct live_range_def *insd;
+                       unsigned regcm;
                        insd = &rstate->lrd[entry->member->id];
                        if (insd->lr->defs == 0) {
                                continue;
@@ -13035,8 +14892,11 @@ static int select_free_color(struct compile_state *state,
                                !interfere(rstate, range, insd->lr)) {
                                phi = insd;
                        }
-                       if ((insd->lr->color == REG_UNSET) ||
-                               ((insd->lr->classes & range->classes) == 0) ||
+                       if (insd->lr->color == REG_UNSET) {
+                               continue;
+                       }
+                       regcm = insd->lr->classes;
+                       if (((regcm & range->classes) == 0) ||
                                (used[insd->lr->color])) {
                                continue;
                        }
@@ -13059,12 +14919,16 @@ static int select_free_color(struct compile_state *state,
                        expr = triple_rhs(state, phi->def, 0);
                        for(; expr; expr = triple_rhs(state, phi->def, expr)) {
                                struct live_range *lr;
+                               unsigned regcm;
                                if (!*expr) {
                                        continue;
                                }
                                lr = rstate->lrd[(*expr)->id].lr;
-                               if ((lr->color == REG_UNSET) || 
-                                       ((lr->classes & range->classes) == 0) ||
+                               if (lr->color == REG_UNSET) {
+                                       continue;
+                               }
+                               regcm = lr->classes;
+                               if (((regcm & range->classes) == 0) ||
                                        (used[lr->color])) {
                                        continue;
                                }
@@ -13081,12 +14945,16 @@ static int select_free_color(struct compile_state *state,
                expr = triple_rhs(state, lrd->def, 0);
                for(; expr; expr = triple_rhs(state, lrd->def, expr)) {
                        struct live_range *lr;
+                       unsigned regcm;
                        if (!*expr) {
                                continue;
                        }
                        lr = rstate->lrd[(*expr)->id].lr;
-                       if ((lr->color == -1) || 
-                               ((lr->classes & range->classes) == 0) ||
+                       if (lr->color == REG_UNSET) {
+                               continue;
+                       }
+                       regcm = lr->classes;
+                       if (((regcm & range->classes) == 0) ||
                                (used[lr->color])) {
                                continue;
                        }
@@ -13115,16 +14983,16 @@ static int select_free_color(struct compile_state *state,
                                arch_reg_str(edge->node->color));
                        lrd = edge->node->defs;
                        do {
-                               warning(state, lrd->def, " %s",
-                                       tops(lrd->def->op));
+                               warning(state, lrd->def, " %s %p",
+                                       tops(lrd->def->op), lrd->def);
                                lrd = lrd->next;
                        } while(lrd != edge->node->defs);
                }
                warning(state, range->defs->def, "range: ");
                lrd = range->defs;
                do {
-                       warning(state, lrd->def, " %s",
-                               tops(lrd->def->op));
+                       warning(state, lrd->def, " %s %p",
+                               tops(lrd->def->op), lrd->def);
                        lrd = lrd->next;
                } while(lrd != range->defs);
                        
@@ -13136,14 +15004,10 @@ static int select_free_color(struct compile_state *state,
                                        arch_reg_str(i));
                        }
                }
-#if DEBUG_COLOR_GRAPH < 2
                error(state, range->defs->def, "too few registers");
-#else
-               internal_error(state, range->defs->def, "too few registers");
-#endif
        }
-       range->classes = arch_reg_regcm(state, range->color);
-       if (range->color == -1) {
+       range->classes &= arch_reg_regcm(state, range->color);
+       if ((range->color == REG_UNSET) || (range->classes == 0)) {
                internal_error(state, range->defs->def, "select_free_color did not?");
        }
        return 1;
@@ -13155,7 +15019,7 @@ static int color_graph(struct compile_state *state, struct reg_state *rstate)
        struct live_range_edge *edge;
        struct live_range *range;
        if (rstate->low) {
-               cgdebug_printf("Lo: ");
+               cgdebug_printf(state, "Lo: ");
                range = rstate->low;
                if (*range->group_prev != range) {
                        internal_error(state, 0, "lo: *prev != range?");
@@ -13172,7 +15036,7 @@ static int color_graph(struct compile_state *state, struct reg_state *rstate)
                }
        }
        else if (rstate->high) {
-               cgdebug_printf("Hi: ");
+               cgdebug_printf(state, "Hi: ");
                range = rstate->high;
                if (*range->group_prev != range) {
                        internal_error(state, 0, "hi: *prev != range?");
@@ -13191,7 +15055,7 @@ static int color_graph(struct compile_state *state, struct reg_state *rstate)
        else {
                return 1;
        }
-       cgdebug_printf(" %d\n", range - rstate->lr);
+       cgdebug_printf(state, " %d\n", range - rstate->lr);
        range->group_prev = 0;
        for(edge = range->edges; edge; edge = edge->next) {
                struct live_range *node;
@@ -13209,7 +15073,7 @@ static int color_graph(struct compile_state *state, struct reg_state *rstate)
                        if (&node->group_next == rstate->high_tail) {
                                rstate->high_tail = node->group_prev;
                        }
-                       cgdebug_printf("Moving...%d to low\n", node - rstate->lr);
+                       cgdebug_printf(state, "Moving...%d to low\n", node - rstate->lr);
                        node->group_prev  = rstate->low_tail;
                        node->group_next  = 0;
                        *rstate->low_tail = node;
@@ -13222,12 +15086,11 @@ static int color_graph(struct compile_state *state, struct reg_state *rstate)
        }
        colored = color_graph(state, rstate);
        if (colored) {
-               cgdebug_printf("Coloring %d @%s:%d.%d:", 
-                       range - rstate->lr,
-                       range->def->filename, range->def->line, range->def->col);
-               cgdebug_flush();
+               cgdebug_printf(state, "Coloring %d @", range - rstate->lr);
+               cgdebug_loc(state, range->defs->def);
+               cgdebug_flush(state);
                colored = select_free_color(state, rstate, range);
-               cgdebug_printf(" %s\n", arch_reg_str(range->color));
+               cgdebug_printf(state, " %s\n", arch_reg_str(range->color));
        }
        return colored;
 }
@@ -13238,7 +15101,7 @@ static void verify_colors(struct compile_state *state, struct reg_state *rstate)
        struct live_range_edge *edge;
        struct triple *ins, *first;
        char used[MAX_REGISTERS];
-       first = RHS(state->main_function, 0);
+       first = state->first;
        ins = first;
        do {
                if (triple_is_def(state, ins)) {
@@ -13246,192 +15109,44 @@ static void verify_colors(struct compile_state *state, struct reg_state *rstate)
                                internal_error(state, ins, 
                                        "triple without a live range def");
                        }
-                       lr = rstate->lrd[ins->id].lr;
-                       if (lr->color == REG_UNSET) {
-                               internal_error(state, ins,
-                                       "triple without a color");
-                       }
-                       /* Find the registers used by the edges */
-                       memset(used, 0, sizeof(used));
-                       for(edge = lr->edges; edge; edge = edge->next) {
-                               if (edge->node->color == REG_UNSET) {
-                                       internal_error(state, 0,
-                                               "live range without a color");
-                       }
-                               reg_fill_used(state, used, edge->node->color);
-                       }
-                       if (used[lr->color]) {
-                               internal_error(state, ins,
-                                       "triple with already used color");
-                       }
-               }
-               ins = ins->next;
-       } while(ins != first);
-}
-
-static void color_triples(struct compile_state *state, struct reg_state *rstate)
-{
-       struct live_range *lr;
-       struct triple *first, *ins;
-       first = RHS(state->main_function, 0);
-       ins = first;
-       do {
-               if ((ins->id < 0) || (ins->id > rstate->defs)) {
-                       internal_error(state, ins, 
-                               "triple without a live range");
-               }
-               lr = rstate->lrd[ins->id].lr;
-               SET_REG(ins->id, lr->color);
-               ins = ins->next;
-       } while (ins != first);
-}
-
-static void print_interference_block(
-       struct compile_state *state, struct block *block, void *arg)
-
-{
-       struct reg_state *rstate = arg;
-       struct reg_block *rb;
-       struct triple *ptr;
-       int phi_present;
-       int done;
-       rb = &rstate->blocks[block->vertex];
-
-       printf("\nblock: %p (%d), %p<-%p %p<-%p\n", 
-               block, 
-               block->vertex,
-               block->left, 
-               block->left && block->left->use?block->left->use->member : 0,
-               block->right, 
-               block->right && block->right->use?block->right->use->member : 0);
-       if (rb->in) {
-               struct triple_reg_set *in_set;
-               printf("        in:");
-               for(in_set = rb->in; in_set; in_set = in_set->next) {
-                       printf(" %-10p", in_set->member);
-               }
-               printf("\n");
-       }
-       phi_present = 0;
-       for(done = 0, ptr = block->first; !done; ptr = ptr->next) {
-               done = (ptr == block->last);
-               if (ptr->op == OP_PHI) {
-                       phi_present = 1;
-                       break;
-               }
-       }
-       if (phi_present) {
-               int edge;
-               for(edge = 0; edge < block->users; edge++) {
-                       printf("     in(%d):", edge);
-                       for(done = 0, ptr = block->first; !done; ptr = ptr->next) {
-                               struct triple **slot;
-                               done = (ptr == block->last);
-                               if (ptr->op != OP_PHI) {
-                                       continue;
-                               }
-                               slot = &RHS(ptr, 0);
-                               printf(" %-10p", slot[edge]);
-                       }
-                       printf("\n");
-               }
-       }
-       if (block->first->op == OP_LABEL) {
-               printf("%p:\n", block->first);
-       }
-       for(done = 0, ptr = block->first; !done; ptr = ptr->next) {
-               struct triple_set *user;
-               struct live_range *lr;
-               unsigned id;
-               int op;
-               op = ptr->op;
-               done = (ptr == block->last);
-               lr = rstate->lrd[ptr->id].lr;
-               
-               if (triple_stores_block(state, ptr)) {
-                       if (ptr->u.block != block) {
-                               internal_error(state, ptr, 
-                                       "Wrong block pointer: %p",
-                                       ptr->u.block);
-                       }
-               }
-               if (op == OP_ADECL) {
-                       for(user = ptr->use; user; user = user->next) {
-                               if (!user->member->u.block) {
-                                       internal_error(state, user->member, 
-                                               "Use %p not in a block?",
-                                               user->member);
-                               }
-                               
-                       }
-               }
-               id = ptr->id;
-               ptr->id = rstate->lrd[id].orig_id;
-               SET_REG(ptr->id, lr->color);
-               display_triple(stdout, ptr);
-               ptr->id = id;
-
-               if (triple_is_def(state, ptr) && (lr->defs == 0)) {
-                       internal_error(state, ptr, "lr has no defs!");
-               }
-
-               if (lr->defs) {
-                       struct live_range_def *lrd;
-                       printf("       range:");
-                       lrd = lr->defs;
-                       do {
-                               printf(" %-10p", lrd->def);
-                               lrd = lrd->next;
-                       } while(lrd != lr->defs);
-                       printf("\n");
-               }
-               if (lr->edges > 0) {
-                       struct live_range_edge *edge;
-                       printf("       edges:");
+                       lr = rstate->lrd[ins->id].lr;
+                       if (lr->color == REG_UNSET) {
+                               internal_error(state, ins,
+                                       "triple without a color");
+                       }
+                       /* Find the registers used by the edges */
+                       memset(used, 0, sizeof(used));
                        for(edge = lr->edges; edge; edge = edge->next) {
-                               struct live_range_def *lrd;
-                               lrd = edge->node->defs;
-                               do {
-                                       printf(" %-10p", lrd->def);
-                                       lrd = lrd->next;
-                               } while(lrd != edge->node->defs);
-                               printf("|");
+                               if (edge->node->color == REG_UNSET) {
+                                       internal_error(state, 0,
+                                               "live range without a color");
                        }
-                       printf("\n");
-               }
-               /* Do a bunch of sanity checks */
-               valid_ins(state, ptr);
-               if ((ptr->id < 0) || (ptr->id > rstate->defs)) {
-                       internal_error(state, ptr, "Invalid triple id: %d",
-                               ptr->id);
-               }
-               for(user = ptr->use; user; user = user->next) {
-                       struct triple *use;
-                       struct live_range *ulr;
-                       use = user->member;
-                       valid_ins(state, use);
-                       if ((use->id < 0) || (use->id > rstate->defs)) {
-                               internal_error(state, use, "Invalid triple id: %d",
-                                       use->id);
+                               reg_fill_used(state, used, edge->node->color);
                        }
-                       ulr = rstate->lrd[user->member->id].lr;
-                       if (triple_stores_block(state, user->member) &&
-                               !user->member->u.block) {
-                               internal_error(state, user->member,
-                                       "Use %p not in a block?",
-                                       user->member);
+                       if (used[lr->color]) {
+                               internal_error(state, ins,
+                                       "triple with already used color");
                        }
                }
-       }
-       if (rb->out) {
-               struct triple_reg_set *out_set;
-               printf("       out:");
-               for(out_set = rb->out; out_set; out_set = out_set->next) {
-                       printf(" %-10p", out_set->member);
+               ins = ins->next;
+       } while(ins != first);
+}
+
+static void color_triples(struct compile_state *state, struct reg_state *rstate)
+{
+       struct live_range *lr;
+       struct triple *first, *ins;
+       first = state->first;
+       ins = first;
+       do {
+               if ((ins->id < 0) || (ins->id > rstate->defs)) {
+                       internal_error(state, ins, 
+                               "triple without a live range");
                }
-               printf("\n");
-       }
-       printf("\n");
+               lr = rstate->lrd[ins->id].lr;
+               SET_REG(ins->id, lr->color);
+               ins = ins->next;
+       } while (ins != first);
 }
 
 static struct live_range *merge_sort_lr(
@@ -13494,11 +15209,12 @@ static void ids_from_rstate(struct compile_state *state,
                return;
        }
        /* Display the graph if desired */
-       if (state->debug & DEBUG_INTERFERENCE) {
-               print_blocks(state, stdout);
+       if (state->compiler->debug & DEBUG_INTERFERENCE) {
+               print_interference_blocks(state, rstate, stdout, 0);
                print_control_flow(state);
+               fflush(stdout);
        }
-       first = RHS(state->main_function, 0);
+       first = state->first;
        ins = first;
        do {
                if (ins->id) {
@@ -13536,6 +15252,7 @@ static void cleanup_rstate(struct compile_state *state, struct reg_state *rstate
        rstate->blocks = 0;
 }
 
+static void verify_consistency(struct compile_state *state);
 static void allocate_registers(struct compile_state *state)
 {
        struct reg_state rstate;
@@ -13543,12 +15260,19 @@ static void allocate_registers(struct compile_state *state)
 
        /* Clear out the reg_state */
        memset(&rstate, 0, sizeof(rstate));
-       rstate.max_passes = MAX_ALLOCATION_PASSES;
+       rstate.max_passes = state->compiler->max_allocation_passes;
 
        do {
                struct live_range **point, **next;
+               int conflicts;
+               int tangles;
                int coalesced;
 
+               if (state->compiler->debug & DEBUG_RANGE_CONFLICTS) {
+                       fprintf(stderr, "pass: %d\n", rstate.passes);
+                       fflush(stderr);
+               }
+
                /* Restore ids */
                ids_from_rstate(state, &rstate);
 
@@ -13559,50 +15283,69 @@ static void allocate_registers(struct compile_state *state)
                rstate.blocks = compute_variable_lifetimes(state);
 
                /* Fix invalid mandatory live range coalesce conflicts */
-               walk_variable_lifetimes(
-                       state, rstate.blocks, fix_coalesce_conflicts, 0);
+               conflicts = correct_coalesce_conflicts(state, rstate.blocks);
 
-               /* Fix two simultaneous uses of the same register */
-               correct_tangles(state, rstate.blocks);
-
-               if (state->debug & DEBUG_INSERTED_COPIES) {
-                       printf("After resolve_tangles\n");
-                       print_blocks(state, stdout);
-                       print_control_flow(state);
-               }
+               /* Fix two simultaneous uses of the same register.
+                * In a few pathlogical cases a partial untangle moves
+                * the tangle to a part of the graph we won't revisit.
+                * So we keep looping until we have no more tangle fixes
+                * to apply.
+                */
+               do {
+                       tangles = correct_tangles(state, rstate.blocks);
+               } while(tangles);
 
                
+               print_blocks(state, "resolve_tangles", stdout);
+               verify_consistency(state);
+               
                /* Allocate and initialize the live ranges */
                initialize_live_ranges(state, &rstate);
-               
+
+               /* Note current doing coalescing in a loop appears to 
+                * buys me nothing.  The code is left this way in case
+                * there is some value in it.  Or if a future bugfix
+                *  yields some benefit.
+                */
                do {
-                       /* Forget previous live range edge calculations */
+                       if (state->compiler->debug & DEBUG_COALESCING) {
+                               fprintf(stderr, "coalescing\n");
+                       }
+
+                       /* Remove any previous live edge calculations */
                        cleanup_live_edges(&rstate);
 
-#if 0
-                       fprintf(stderr, "coalescing\n");
-#endif                 
                        /* Compute the interference graph */
                        walk_variable_lifetimes(
                                state, rstate.blocks, graph_ins, &rstate);
-               
+                       
                        /* Display the interference graph if desired */
-                       if (state->debug & DEBUG_INTERFERENCE) {
-                               printf("\nlive variables by block\n");
-                               walk_blocks(state, print_interference_block, &rstate);
+                       if (state->compiler->debug & DEBUG_INTERFERENCE) {
+                               print_interference_blocks(state, &rstate, stdout, 1);
                                printf("\nlive variables by instruction\n");
                                walk_variable_lifetimes(
                                        state, rstate.blocks, 
                                        print_interference_ins, &rstate);
                        }
-#if DEBUG_CONSISTENCY
-                       /* Verify the interference graph */
-                       walk_variable_lifetimes(
-                               state, rstate.blocks, verify_graph_ins, &rstate);
-#endif
                        
                        coalesced = coalesce_live_ranges(state, &rstate);
+
+                       if (state->compiler->debug & DEBUG_COALESCING) {
+                               fprintf(stderr, "coalesced: %d\n", coalesced);
+                       }
                } while(coalesced);
+
+#if DEBUG_CONSISTENCY > 1
+# if 0
+               fprintf(stderr, "verify_graph_ins...\n");
+# endif
+               /* Verify the interference graph */
+               walk_variable_lifetimes(
+                       state, rstate.blocks, verify_graph_ins, &rstate);
+# if 0
+               fprintf(stderr, "verify_graph_ins done\n");
+#endif
+#endif
                        
                /* Build the groups low and high.  But with the nodes
                 * first sorted by degree order.
@@ -13629,7 +15372,7 @@ static void allocate_registers(struct compile_state *state)
                         */
                        if ((range->degree < regc_max_size(state, range->classes)) ||
                                (range->color != REG_UNSET)) {
-                               cgdebug_printf("Lo: %5d degree %5d%s\n", 
+                               cgdebug_printf(state, "Lo: %5d degree %5d%s\n", 
                                        range - rstate.lr, range->degree,
                                        (range->color != REG_UNSET) ? " (colored)": "");
                                *range->group_prev = range->group_next;
@@ -13646,7 +15389,7 @@ static void allocate_registers(struct compile_state *state)
                                next = point;
                        }
                        else {
-                               cgdebug_printf("hi: %5d degree %5d%s\n", 
+                               cgdebug_printf(state, "hi: %5d degree %5d%s\n", 
                                        range - rstate.lr, range->degree,
                                        (range->color != REG_UNSET) ? " (colored)": "");
                        }
@@ -13664,6 +15407,9 @@ static void allocate_registers(struct compile_state *state)
 
        /* Cleanup the temporary data structures */
        cleanup_rstate(state, &rstate);
+
+       /* Display the new graph */
+       print_blocks(state, __func__, stdout);
 }
 
 /* Sparce Conditional Constant Propogation
@@ -13698,11 +15444,12 @@ struct flow_edge {
        struct flow_edge *out_next;
        int executable;
 };
+#define MAX_FLOW_BLOCK_EDGES 3
 struct flow_block {
        struct block *block;
        struct flow_edge *in;
        struct flow_edge *out;
-       struct flow_edge left, right;
+       struct flow_edge *edges;
 };
 
 struct scc_state {
@@ -13718,6 +15465,22 @@ struct scc_state {
 static void scc_add_fedge(struct compile_state *state, struct scc_state *scc, 
        struct flow_edge *fedge)
 {
+       if (state->compiler->debug & DEBUG_SCC_TRANSFORM2) {
+               fprintf(stderr, "adding fedge: %p (%4d -> %5d)\n",
+                       fedge,
+                       fedge->src->block?fedge->src->block->last->id: 0,
+                       fedge->dst->block?fedge->dst->block->first->id: 0);
+       }
+       if ((fedge == scc->flow_work_list) ||
+               (fedge->work_next != fedge) ||
+               (fedge->work_prev != fedge)) {
+
+               if (state->compiler->debug & DEBUG_SCC_TRANSFORM2) {
+                       fprintf(stderr, "dupped fedge: %p\n",
+                               fedge);
+               }
+               return;
+       }
        if (!scc->flow_work_list) {
                scc->flow_work_list = fedge;
                fedge->work_next = fedge->work_prev = fedge;
@@ -13745,6 +15508,7 @@ static struct flow_edge *scc_next_fedge(
                } else {
                        scc->flow_work_list = 0;
                }
+               fedge->work_next = fedge->work_prev = fedge;
        }
        return fedge;
 }
@@ -13752,6 +15516,22 @@ static struct flow_edge *scc_next_fedge(
 static void scc_add_sedge(struct compile_state *state, struct scc_state *scc,
        struct ssa_edge *sedge)
 {
+       if (state->compiler->debug & DEBUG_SCC_TRANSFORM2) {
+               fprintf(stderr, "adding sedge: %5d (%4d -> %5d)\n",
+                       sedge - scc->ssa_edges,
+                       sedge->src->def->id,
+                       sedge->dst->def->id);
+       }
+       if ((sedge == scc->ssa_work_list) ||
+               (sedge->work_next != sedge) ||
+               (sedge->work_prev != sedge)) {
+
+               if (state->compiler->debug & DEBUG_SCC_TRANSFORM2) {
+                       fprintf(stderr, "dupped sedge: %5d\n",
+                               sedge - scc->ssa_edges);
+               }
+               return;
+       }
        if (!scc->ssa_work_list) {
                scc->ssa_work_list = sedge;
                sedge->work_next = sedge->work_prev = sedge;
@@ -13779,6 +15559,7 @@ static struct ssa_edge *scc_next_sedge(
                } else {
                        scc->ssa_work_list = 0;
                }
+               sedge->work_next = sedge->work_prev = sedge;
        }
        return sedge;
 }
@@ -13795,7 +15576,7 @@ static void initialize_scc_state(
        memset(scc, 0, sizeof(*scc));
 
        /* Inialize pass zero find out how much memory we need */
-       first = RHS(state->main_function, 0);
+       first = state->first;
        ins = first;
        ins_count = ssa_edge_count = 0;
        do {
@@ -13806,10 +15587,10 @@ static void initialize_scc_state(
                }
                ins = ins->next;
        } while(ins != first);
-#if DEBUG_SCC
-       fprintf(stderr, "ins_count: %d ssa_edge_count: %d vertex_count: %d\n",
-               ins_count, ssa_edge_count, state->last_vertex);
-#endif
+       if (state->compiler->debug & DEBUG_SCC_TRANSFORM) {
+               fprintf(stderr, "ins_count: %d ssa_edge_count: %d vertex_count: %d\n",
+                       ins_count, ssa_edge_count, state->last_vertex);
+       }
        scc->ins_count   = ins_count;
        scc->lattice     = 
                xcmalloc(sizeof(*scc->lattice)*(ins_count + 1), "lattice");
@@ -13834,6 +15615,8 @@ static void initialize_scc_state(
                        block->vertex = fblock_index;
                        fblock = &scc->flow_blocks[fblock_index];
                        fblock->block = block;
+                       fblock->edges = xcmalloc(sizeof(*fblock->edges)*block->edge_count,
+                               "flow_edges");
                }
                {
                        struct lattice_node *lnode;
@@ -13853,6 +15636,25 @@ static void initialize_scc_state(
        fblock = 0;
        ins = first;
        do {
+               {
+                       struct triple_set *edge;
+                       struct ssa_edge **stail;
+                       struct lattice_node *lnode;
+                       lnode = &scc->lattice[ins->id];
+                       lnode->out = 0;
+                       stail = &lnode->out;
+                       for(edge = ins->use; edge; edge = edge->next) {
+                               struct ssa_edge *sedge;
+                               ssa_edge_index += 1;
+                               sedge = &scc->ssa_edges[ssa_edge_index];
+                               *stail = sedge;
+                               stail = &sedge->out_next;
+                               sedge->src = lnode;
+                               sedge->dst = &scc->lattice[edge->member->id];
+                               sedge->work_next = sedge->work_prev = sedge;
+                               sedge->out_next = 0;
+                       }
+               }
                if ((ins->op == OP_LABEL) && (block != ins->u.block)) {
                        struct flow_edge *fedge, **ftail;
                        struct block_set *bedge;
@@ -13861,29 +15663,35 @@ static void initialize_scc_state(
                        fblock->in = 0;
                        fblock->out = 0;
                        ftail = &fblock->out;
-                       if (block->left) {
-                               fblock->left.dst = &scc->flow_blocks[block->left->vertex];
-                               if (fblock->left.dst->block != block->left) {
-                                       internal_error(state, 0, "block mismatch");
-                               }
-                               fblock->left.out_next = 0;
-                               *ftail = &fblock->left;
-                               ftail = &fblock->left.out_next;
-                       }
-                       if (block->right) {
-                               fblock->right.dst = &scc->flow_blocks[block->right->vertex];
-                               if (fblock->right.dst->block != block->right) {
+
+                       fedge = fblock->edges;
+                       bedge = block->edges;
+                       for(; bedge; bedge = bedge->next, fedge++) {
+                               fedge->dst = &scc->flow_blocks[bedge->member->vertex];
+                               if (fedge->dst->block != bedge->member) {
                                        internal_error(state, 0, "block mismatch");
                                }
-                               fblock->right.out_next = 0;
-                               *ftail = &fblock->right;
-                               ftail = &fblock->right.out_next;
+                               *ftail = fedge;
+                               ftail = &fedge->out_next;
+                               fedge->out_next = 0;
                        }
                        for(fedge = fblock->out; fedge; fedge = fedge->out_next) {
                                fedge->src = fblock;
                                fedge->work_next = fedge->work_prev = fedge;
                                fedge->executable = 0;
                        }
+               }
+               ins = ins->next;
+       } while (ins != first);
+       block = 0;
+       fblock = 0;
+       ins = first;
+       do {
+               if ((ins->op  == OP_LABEL) && (block != ins->u.block)) {
+                       struct flow_edge **ftail;
+                       struct block_set *bedge;
+                       block = ins->u.block;
+                       fblock = &scc->flow_blocks[block->vertex];
                        ftail = &fblock->in;
                        for(bedge = block->use; bedge; bedge = bedge->next) {
                                struct block *src_block;
@@ -13891,36 +15699,19 @@ static void initialize_scc_state(
                                struct flow_edge *sfedge;
                                src_block = bedge->member;
                                sfblock = &scc->flow_blocks[src_block->vertex];
-                               sfedge = 0;
-                               if (src_block->left == block) {
-                                       sfedge = &sfblock->left;
-                               } else {
-                                       sfedge = &sfblock->right;
+                               for(sfedge = sfblock->out; sfedge; sfedge = sfedge->out_next) {
+                                       if (sfedge->dst == fblock) {
+                                               break;
+                                       }
+                               }
+                               if (!sfedge) {
+                                       internal_error(state, 0, "edge mismatch");
                                }
                                *ftail = sfedge;
                                ftail = &sfedge->in_next;
                                sfedge->in_next = 0;
                        }
                }
-               {
-                       struct triple_set *edge;
-                       struct ssa_edge **stail;
-                       struct lattice_node *lnode;
-                       lnode = &scc->lattice[ins->id];
-                       lnode->out = 0;
-                       stail = &lnode->out;
-                       for(edge = ins->use; edge; edge = edge->next) {
-                               struct ssa_edge *sedge;
-                               ssa_edge_index += 1;
-                               sedge = &scc->ssa_edges[ssa_edge_index];
-                               *stail = sedge;
-                               stail = &sedge->out_next;
-                               sedge->src = lnode;
-                               sedge->dst = &scc->lattice[edge->member->id];
-                               sedge->work_next = sedge->work_prev = sedge;
-                               sedge->out_next = 0;
-                       }
-               }
                ins = ins->next;
        } while(ins != first);
        /* Setup a dummy block 0 as a node above the start node */
@@ -13929,10 +15720,11 @@ static void initialize_scc_state(
                struct flow_edge *fedge;
                fblock = &scc->flow_blocks[0];
                fblock->block = 0;
+               fblock->edges = xcmalloc(sizeof(*fblock->edges)*1, "flow_edges");
                fblock->in = 0;
-               fblock->out = &fblock->left;
+               fblock->out = fblock->edges;
                dst = &scc->flow_blocks[state->first_block->vertex];
-               fedge = &fblock->left;
+               fedge = fblock->edges;
                fedge->src        = fblock;
                fedge->dst        = dst;
                fedge->work_next  = fedge;
@@ -13947,16 +15739,25 @@ static void initialize_scc_state(
                scc->ssa_work_list  = 0;
                scc_add_fedge(state, scc, fedge);
        }
-#if DEBUG_SCC
-       fprintf(stderr, "ins_index: %d ssa_edge_index: %d fblock_index: %d\n",
-               ins_index, ssa_edge_index, fblock_index);
-#endif
+       if (state->compiler->debug & DEBUG_SCC_TRANSFORM) {
+               fprintf(stderr, "ins_index: %d ssa_edge_index: %d fblock_index: %d\n",
+                       ins_index, ssa_edge_index, fblock_index);
+       }
 }
 
        
 static void free_scc_state(
        struct compile_state *state, struct scc_state *scc)
 {
+       int i;
+       for(i = 0; i < state->last_vertex + 1; i++) {
+               struct flow_block *fblock;
+               fblock = &scc->flow_blocks[i];
+               if (fblock->edges) {
+                       xfree(fblock->edges);
+                       fblock->edges = 0;
+               }
+       }
        xfree(scc->flow_blocks);
        xfree(scc->ssa_edges);
        xfree(scc->lattice);
@@ -14015,12 +15816,41 @@ static int lval_changed(struct compile_state *state,
 
 }
 
+static void scc_debug_lnode(
+       struct compile_state *state, struct lattice_node *lnode, int changed)
+{
+       if (state->compiler->debug & DEBUG_SCC_TRANSFORM) {
+               FILE *fp = stderr;
+               struct triple *val, **expr;
+               val = lnode->val? lnode->val : lnode->def;
+               fprintf(fp, "%p %s %3d %10s (",
+                       lnode->def, 
+                       ((lnode->def->op == OP_PHI)? "phi: ": "expr:"),
+                       lnode->def->id,
+                       tops(lnode->def->op));
+               expr = triple_rhs(state, lnode->def, 0);
+               for(;expr;expr = triple_rhs(state, lnode->def, expr)) {
+                       if (*expr) {
+                               fprintf(fp, " %d", (*expr)->id);
+                       }
+               }
+               if (val->op == OP_INTCONST) {
+                       fprintf(fp, " <0x%08lx>", (unsigned long)(val->u.cval));
+               }
+               fprintf(fp, " ) -> %s %s\n",
+                       ((!lnode->val)? "lo": is_const(lnode->val)? "const": "hi"),
+                       changed? "changed" : ""
+                       );
+       }
+}
+
 static void scc_visit_phi(struct compile_state *state, struct scc_state *scc, 
        struct lattice_node *lnode)
 {
        struct lattice_node *tmp;
        struct triple **slot, *old;
        struct flow_edge *fedge;
+       int changed;
        int index;
        if (lnode->def->op != OP_PHI) {
                internal_error(state, lnode->def, "not phi");
@@ -14033,6 +15863,13 @@ static void scc_visit_phi(struct compile_state *state, struct scc_state *scc,
        slot = &RHS(lnode->def, 0);
        index = 0;
        for(fedge = lnode->fblock->in; fedge; index++, fedge = fedge->in_next) {
+               if (state->compiler->debug & DEBUG_SCC_TRANSFORM) {
+                       fprintf(stderr, "Examining edge: %d vertex: %d executable: %d\n", 
+                               index,
+                               fedge->dst->block->vertex,
+                               fedge->executable
+                               );
+               }
                if (!fedge->executable) {
                        continue;
                }
@@ -14061,13 +15898,11 @@ static void scc_visit_phi(struct compile_state *state, struct scc_state *scc,
                        break;
                }
        }
-#if DEBUG_SCC
-       fprintf(stderr, "phi: %d -> %s\n",
-               lnode->def->id,
-               (!lnode->val)? "lo": is_const(lnode->val)? "const": "hi");
-#endif
+       changed = lval_changed(state, old, lnode);
+       scc_debug_lnode(state, lnode, changed);
+
        /* If the lattice value has changed update the work lists. */
-       if (lval_changed(state, old, lnode)) {
+       if (changed) {
                struct ssa_edge *sedge;
                for(sedge = lnode->out; sedge; sedge = sedge->out_next) {
                        scc_add_sedge(state, scc, sedge);
@@ -14106,7 +15941,7 @@ static int compute_lnode_val(struct compile_state *state, struct scc_state *scc,
                        *vexpr = (tmp->val)? tmp->val : tmp->def;
                }
        }
-       if (scratch->op == OP_BRANCH) {
+       if (triple_is_branch(state, scratch)) {
                scratch->next = lnode->def->next;
        }
        /* Recompute the value */
@@ -14121,7 +15956,7 @@ static int compute_lnode_val(struct compile_state *state, struct scc_state *scc,
        }
        if ((scratch->prev != scratch) ||
                ((scratch->next != scratch) &&
-                       ((lnode->def->op != OP_BRANCH) ||
+                       (!triple_is_branch(state, lnode->def) ||
                                (scratch->next != lnode->def->next)))) {
                internal_error(state, lnode->def, "scratch in list?");
        }
@@ -14157,16 +15992,28 @@ static int compute_lnode_val(struct compile_state *state, struct scc_state *scc,
        /* Find the cases that are always lattice lo */
        if (lnode->val && 
                triple_is_def(state, lnode->val) &&
-               !triple_is_pure(state, lnode->val)) {
+               !triple_is_pure(state, lnode->val, lnode->old_id)) {
                lnode->val = 0;
        }
-       if (lnode->val && 
-               (lnode->val->op == OP_SDECL) && 
-               (lnode->val != lnode->def)) {
-               internal_error(state, lnode->def, "bad sdecl");
-       }
        /* See if the lattice value has changed */
        changed = lval_changed(state, old, lnode);
+       /* See if this value should not change */
+       if (lnode->val && 
+               ((      !triple_is_def(state, lnode->def)  &&
+                       !triple_is_cond_branch(state, lnode->def)) ||
+                       (lnode->def->op == OP_PIECE))) {
+#warning "FIXME constant propogate through expressions with multiple left hand sides"
+               if (changed) {
+                       internal_warning(state, lnode->def, "non def changes value?");
+               }
+               lnode->val = 0;
+       }
+       /* Report what has just happened */
+       if (state->compiler->debug & DEBUG_SCC_TRANSFORM2) {
+               display_triple_changes(stderr, scratch, lnode->def);
+       }
+
+       /* See if we need to free the scratch value */
        if (lnode->val != scratch) {
                xfree(scratch);
        }
@@ -14177,10 +16024,11 @@ static void scc_visit_branch(struct compile_state *state, struct scc_state *scc,
        struct lattice_node *lnode)
 {
        struct lattice_node *cond;
-#if DEBUG_SCC
-       {
+       struct flow_edge *left, *right;
+       if (state->compiler->debug & DEBUG_SCC_TRANSFORM) {
                struct flow_edge *fedge;
-               fprintf(stderr, "branch: %d (",
+               fprintf(stderr, "%s: %d (",
+                       tops(lnode->def->op),
                        lnode->def->id);
                
                for(fedge = lnode->fblock->out; fedge; fedge = fedge->out_next) {
@@ -14193,29 +16041,43 @@ static void scc_visit_branch(struct compile_state *state, struct scc_state *scc,
                }
                fprintf(stderr, "\n");
        }
-#endif
-       if (lnode->def->op != OP_BRANCH) {
+       if (!triple_is_branch(state, lnode->def)) {
                internal_error(state, lnode->def, "not branch");
        }
        /* This only applies to conditional branches */
-       if (TRIPLE_RHS(lnode->def->sizes) == 0) {
+       if (!triple_is_cond_branch(state, lnode->def)) {
                return;
        }
        cond = triple_to_lattice(state, scc, RHS(lnode->def,0));
+       for(left = cond->fblock->out; left; left = left->out_next) {
+               if (left->dst->block->first == lnode->def->next) {
+                       break;
+               }
+       }
+       if (!left) {
+               internal_error(state, lnode->def, "Cannot find left branch edge");
+       }
+       for(right = cond->fblock->out; right; right = right->out_next) {
+               if (right->dst->block->first == TARG(lnode->def, 0)) {
+                       break;
+               }
+       }
+       if (!right) {
+               internal_error(state, lnode->def, "Cannot find right branch edge");
+       }
        if (cond->val && !is_const(cond->val)) {
 #warning "FIXME do I need to do something here?"
                warning(state, cond->def, "condition not constant?");
                return;
        }
        if (cond->val == 0) {
-               scc_add_fedge(state, scc, cond->fblock->out);
-               scc_add_fedge(state, scc, cond->fblock->out->out_next);
+               scc_add_fedge(state, scc, left);
+               scc_add_fedge(state, scc, right);
        }
        else if (cond->val->u.cval) {
-               scc_add_fedge(state, scc, cond->fblock->out->out_next);
-               
+               scc_add_fedge(state, scc, right);
        } else {
-               scc_add_fedge(state, scc, cond->fblock->out);
+               scc_add_fedge(state, scc, left);
        }
 
 }
@@ -14226,24 +16088,10 @@ static void scc_visit_expr(struct compile_state *state, struct scc_state *scc,
        int changed;
 
        changed = compute_lnode_val(state, scc, lnode);
-#if DEBUG_SCC
-       {
-               struct triple **expr;
-               fprintf(stderr, "expr: %3d %10s (",
-                       lnode->def->id, tops(lnode->def->op));
-               expr = triple_rhs(state, lnode->def, 0);
-               for(;expr;expr = triple_rhs(state, lnode->def, expr)) {
-                       if (*expr) {
-                               fprintf(stderr, " %d", (*expr)->id);
-                       }
-               }
-               fprintf(stderr, " ) -> %s\n",
-                       (!lnode->val)? "lo": is_const(lnode->val)? "const": "hi");
-       }
-#endif
-       if (lnode->def->op == OP_BRANCH) {
-               scc_visit_branch(state, scc, lnode);
+       scc_debug_lnode(state, lnode, changed);
 
+       if (triple_is_branch(state, lnode->def)) {
+               scc_visit_branch(state, scc, lnode);
        }
        else if (changed) {
                struct ssa_edge *sedge;
@@ -14257,19 +16105,37 @@ static void scc_writeback_values(
        struct compile_state *state, struct scc_state *scc)
 {
        struct triple *first, *ins;
-       first = RHS(state->main_function, 0);
+       first = state->first;
        ins = first;
        do {
                struct lattice_node *lnode;
                lnode = triple_to_lattice(state, scc, ins);
+
+               if (state->compiler->debug & DEBUG_SCC_TRANSFORM) {
+                       if (lnode->val && 
+                               !is_const(lnode->val) &&
+                               !triple_is_uncond_branch(state, lnode->val) &&
+                               (lnode->val->op != OP_NOOP)) 
+                       {
+                               struct flow_edge *fedge;
+                               int executable;
+                               executable = 0;
+                               for(fedge = lnode->fblock->in; 
+                                   !executable && fedge; fedge = fedge->in_next) {
+                                       executable |= fedge->executable;
+                               }
+                               if (executable) {
+                                       internal_warning(state, lnode->val,
+                                               "lattice node %d %s->%s still high?",
+                                               ins->id, 
+                                               tops(lnode->def->op),
+                                               tops(lnode->val->op));
+                               }
+                       }
+               }
+
                /* Restore id */
                ins->id = lnode->old_id;
-#if DEBUG_SCC
-               if (lnode->val && !is_const(lnode->val)) {
-                       warning(state, lnode->def, 
-                               "lattice node still high?");
-               }
-#endif
                if (lnode->val && (lnode->val != ins)) {
                        /* See if it something I know how to write back */
                        switch(lnode->val->op) {
@@ -14302,6 +16168,9 @@ static void scc_writeback_values(
 static void scc_transform(struct compile_state *state)
 {
        struct scc_state scc;
+       if (!(state->compiler->flags & COMPILER_SCC_TRANSFORM)) {
+               return;
+       }
 
        initialize_scc_state(state, &scc);
 
@@ -14313,7 +16182,7 @@ static void scc_transform(struct compile_state *state)
                        struct block *block;
                        struct triple *ptr;
                        struct flow_block *fblock;
-                       int time;
+                       int reps;
                        int done;
                        if (fedge->executable) {
                                continue;
@@ -14327,17 +16196,18 @@ static void scc_transform(struct compile_state *state)
                        fedge->executable = 1;
                        fblock = fedge->dst;
                        block = fblock->block;
-                       time = 0;
+                       reps = 0;
                        for(fptr = fblock->in; fptr; fptr = fptr->in_next) {
                                if (fptr->executable) {
-                                       time++;
+                                       reps++;
                                }
                        }
-#if DEBUG_SCC
-                       fprintf(stderr, "vertex: %d time: %d\n", 
-                               block->vertex, time);
                        
-#endif
+                       if (state->compiler->debug & DEBUG_SCC_TRANSFORM) {
+                               fprintf(stderr, "vertex: %d reps: %d\n", 
+                                       block->vertex, reps);
+                       }
+
                        done = 0;
                        for(ptr = block->first; !done; ptr = ptr->next) {
                                struct lattice_node *lnode;
@@ -14346,12 +16216,16 @@ static void scc_transform(struct compile_state *state)
                                if (ptr->op == OP_PHI) {
                                        scc_visit_phi(state, &scc, lnode);
                                }
-                               else if (time == 1) {
+                               else if (reps == 1) {
                                        scc_visit_expr(state, &scc, lnode);
                                }
                        }
-                       if (fblock->out && !fblock->out->out_next) {
-                               scc_add_fedge(state, &scc, fblock->out);
+                       /* Add unconditional branch edges */
+                       if (!triple_is_cond_branch(state, fblock->block->last)) {
+                               struct flow_edge *out;
+                               for(out = fblock->out; out; out = out->out_next) {
+                                       scc_add_fedge(state, &scc, out);
+                               }
                        }
                }
                while((sedge = scc_next_sedge(state, &scc))) {
@@ -14359,12 +16233,14 @@ static void scc_transform(struct compile_state *state)
                        struct flow_block *fblock;
                        lnode = sedge->dst;
                        fblock = lnode->fblock;
-#if DEBUG_SCC
-                       fprintf(stderr, "sedge: %5d (%5d -> %5d)\n",
-                               sedge - scc.ssa_edges,
-                               sedge->src->def->id,
-                               sedge->dst->def->id);
-#endif
+
+                       if (state->compiler->debug & DEBUG_SCC_TRANSFORM) {
+                               fprintf(stderr, "sedge: %5d (%5d -> %5d)\n",
+                                       sedge - scc.ssa_edges,
+                                       sedge->src->def->id,
+                                       sedge->dst->def->id);
+                       }
+
                        if (lnode->def->op == OP_PHI) {
                                scc_visit_phi(state, &scc, lnode);
                        }
@@ -14383,17 +16259,22 @@ static void scc_transform(struct compile_state *state)
        
        scc_writeback_values(state, &scc);
        free_scc_state(state, &scc);
+       rebuild_ssa_form(state);
+       
+       print_blocks(state, __func__, stdout);
 }
 
 
 static void transform_to_arch_instructions(struct compile_state *state)
 {
        struct triple *ins, *first;
-       first = RHS(state->main_function, 0);
+       first = state->first;
        ins = first;
        do {
                ins = transform_to_arch_instruction(state, ins);
        } while(ins != first);
+       
+       print_blocks(state, __func__, stdout);
 }
 
 #if DEBUG_CONSISTENCY
@@ -14401,7 +16282,7 @@ static void verify_uses(struct compile_state *state)
 {
        struct triple *first, *ins;
        struct triple_set *set;
-       first = RHS(state->main_function, 0);
+       first = state->first;
        ins = first;
        do {
                struct triple **expr;
@@ -14435,22 +16316,113 @@ static void verify_uses(struct compile_state *state)
        } while(ins != first);
        
 }
+static void verify_blocks_present(struct compile_state *state)
+{
+       struct triple *first, *ins;
+       if (!state->first_block) {
+               return;
+       }
+       first = state->first;
+       ins = first;
+       do {
+               valid_ins(state, ins);
+               if (triple_stores_block(state, ins)) {
+                       if (!ins->u.block) {
+                               internal_error(state, ins, 
+                                       "%p not in a block?\n", ins);
+                       }
+               }
+               ins = ins->next;
+       } while(ins != first);
+       
+       
+}
+
+static int edge_present(struct compile_state *state, struct block *block, struct triple *edge)
+{
+       struct block_set *bedge;
+       struct block *targ;
+       targ = block_of_triple(state, edge);
+       for(bedge = block->edges; bedge; bedge = bedge->next) {
+               if (bedge->member == targ) {
+                       return 1;
+               }
+       }
+       return 0;
+}
+
 static void verify_blocks(struct compile_state *state)
 {
        struct triple *ins;
        struct block *block;
+       int blocks;
        block = state->first_block;
        if (!block) {
                return;
        }
+       blocks = 0;
        do {
+               int users;
+               struct block_set *user, *edge;
+               blocks++;
                for(ins = block->first; ins != block->last->next; ins = ins->next) {
-                       if (!triple_stores_block(state, ins)) {
+                       if (triple_stores_block(state, ins) && (ins->u.block != block)) {
+                               internal_error(state, ins, "inconsitent block specified");
+                       }
+                       valid_ins(state, ins);
+               }
+               users = 0;
+               for(user = block->use; user; user = user->next) {
+                       users++;
+                       if (!user->member->first) {
+                               internal_error(state, block->first, "user is empty");
+                       }
+                       if ((block == state->last_block) &&
+                               (user->member == state->first_block)) {
                                continue;
                        }
-                       if (ins->u.block != block) {
-                               internal_error(state, ins, "inconsitent block specified");
+                       for(edge = user->member->edges; edge; edge = edge->next) {
+                               if (edge->member == block) {
+                                       break;
+                               }
+                       }
+                       if (!edge) {
+                               internal_error(state, user->member->first,
+                                       "user does not use block");
+                       }
+               }
+               if (triple_is_branch(state, block->last)) {
+                       struct triple **expr;
+                       expr = triple_targ(state, block->last, 0);
+                       for(;expr; expr = triple_targ(state, block->last, expr)) {
+                               if (*expr && !edge_present(state, block, *expr)) {
+                                       internal_error(state, block->last, "no edge to targ");
+                               }
+                       }
+               }
+               if (!triple_is_uncond_branch(state, block->last) &&
+                       (block != state->last_block) &&
+                       !edge_present(state, block, block->last->next)) {
+                       internal_error(state, block->last, "no edge to block->last->next");
+               }
+               for(edge = block->edges; edge; edge = edge->next) {
+                       for(user = edge->member->use; user; user = user->next) {
+                               if (user->member == block) {
+                                       break;
+                               }
+                       }
+                       if (!user || user->member != block) {
+                               internal_error(state, block->first,
+                                       "block does not use edge");
                        }
+                       if (!edge->member->first) {
+                               internal_error(state, block->first, "edge block is empty");
+                       }
+               }
+               if (block->users != users) {
+                       internal_error(state, block->first, 
+                               "computed users %d != stored users %d\n",
+                               users, block->users);
                }
                if (!triple_stores_block(state, block->last->next)) {
                        internal_error(state, block->last->next, 
@@ -14462,6 +16434,10 @@ static void verify_blocks(struct compile_state *state)
                                "bad next block");
                }
        } while(block != state->first_block);
+       if (blocks != state->last_vertex) {
+               internal_error(state, 0, "computed blocks != stored blocks %d\n",
+                       blocks, state->last_vertex);
+       }
 }
 
 static void verify_domination(struct compile_state *state)
@@ -14472,25 +16448,68 @@ static void verify_domination(struct compile_state *state)
                return;
        }
        
-       first = RHS(state->main_function, 0);
+       first = state->first;
        ins = first;
        do {
                for(set = ins->use; set; set = set->next) {
-                       struct triple **expr;
-                       if (set->member->op == OP_PHI) {
-                               continue;
-                       }
-                       /* See if the use is on the righ hand side */
-                       expr = triple_rhs(state, set->member, 0);
-                       for(; expr ; expr = triple_rhs(state, set->member, expr)) {
-                               if (*expr == ins) {
+                       struct triple **slot;
+                       struct triple *use_point;
+                       int i, zrhs;
+                       use_point = 0;
+                       zrhs = TRIPLE_RHS(set->member->sizes);
+                       slot = &RHS(set->member, 0);
+                       /* See if the use is on the right hand side */
+                       for(i = 0; i < zrhs; i++) {
+                               if (slot[i] == ins) {
                                        break;
                                }
                        }
-                       if (expr &&
-                               !tdominates(state, ins, set->member)) {
-                               internal_error(state, set->member, 
-                                       "non dominated rhs use?");
+                       if (i < zrhs) {
+                               use_point = set->member;
+                               if (set->member->op == OP_PHI) {
+                                       struct block_set *bset;
+                                       int edge;
+                                       bset = set->member->u.block->use;
+                                       for(edge = 0; bset && (edge < i); edge++) {
+                                               bset = bset->next;
+                                       }
+                                       if (!bset) {
+                                               internal_error(state, set->member, 
+                                                       "no edge for phi rhs %d\n", i);
+                                       }
+                                       use_point = bset->member->last;
+                               }
+                       }
+                       if (use_point &&
+                               !tdominates(state, ins, use_point)) {
+                               internal_error(state, use_point, 
+                                       "non dominated rhs use point?");
+                       }
+               }
+               ins = ins->next;
+       } while(ins != first);
+}
+
+static void verify_rhs(struct compile_state *state)
+{
+       struct triple *first, *ins;
+       first = state->first;
+       ins = first;
+       do {
+               struct triple **slot;
+               int zrhs, i;
+               zrhs = TRIPLE_RHS(ins->sizes);
+               slot = &RHS(ins, 0);
+               for(i = 0; i < zrhs; i++) {
+                       if (slot[i] == 0) {
+                               internal_error(state, ins,
+                                       "missing rhs %d on %s",
+                                       i, tops(ins->op));
+                       }
+                       if ((ins->op != OP_PHI) && (slot[i] == ins)) {
+                               internal_error(state, ins,
+                                       "ins == rhs[%d] on %s",
+                                       i, tops(ins->op));
                        }
                }
                ins = ins->next;
@@ -14500,15 +16519,12 @@ static void verify_domination(struct compile_state *state)
 static void verify_piece(struct compile_state *state)
 {
        struct triple *first, *ins;
-       first = RHS(state->main_function, 0);
+       first = state->first;
        ins = first;
        do {
                struct triple *ptr;
                int lhs, i;
                lhs = TRIPLE_LHS(ins->sizes);
-               if ((ins->op == OP_WRITE) || (ins->op == OP_STORE)) {
-                       lhs = 0;
-               }
                for(ptr = ins->next, i = 0; i < lhs; i++, ptr = ptr->next) {
                        if (ptr != LHS(ins, i)) {
                                internal_error(state, ins, "malformed lhs on %s",
@@ -14526,11 +16542,12 @@ static void verify_piece(struct compile_state *state)
                ins = ins->next;
        } while(ins != first);
 }
+
 static void verify_ins_colors(struct compile_state *state)
 {
        struct triple *first, *ins;
        
-       first = RHS(state->main_function, 0);
+       first = state->first;
        ins = first;
        do {
                ins = ins->next;
@@ -14539,53 +16556,51 @@ static void verify_ins_colors(struct compile_state *state)
 static void verify_consistency(struct compile_state *state)
 {
        verify_uses(state);
+       verify_blocks_present(state);
        verify_blocks(state);
        verify_domination(state);
+       verify_rhs(state);
        verify_piece(state);
        verify_ins_colors(state);
 }
 #else 
-#define verify_consistency(state) do {} while(0)
-#endif /* DEBUG_USES */
+static void verify_consistency(struct compile_state *state) {}
+#endif /* DEBUG_CONSISTENCY */
 
 static void optimize(struct compile_state *state)
 {
-       if (state->debug & DEBUG_TRIPLES) {
-               print_triples(state);
-       }
+       /* Dump what the instruction graph intially looks like */
+       print_triples(state);
+
        /* Replace structures with simpler data types */
        flatten_structures(state);
-       if (state->debug & DEBUG_TRIPLES) {
-               print_triples(state);
-       }
+       print_triples(state);
+
        verify_consistency(state);
        /* Analize the intermediate code */
-       setup_basic_blocks(state);
-       analyze_idominators(state);
-       analyze_ipdominators(state);
-       /* Transform the code to ssa form */
+       analyze_basic_blocks(state);
+
+       /* Transform the code to ssa form. */
+       /*
+        * The transformation to ssa form puts a phi function
+        * on each of edge of a dominance frontier where that
+        * phi function might be needed.  At -O2 if we don't
+        * eleminate the excess phi functions we can get an
+        * exponential code size growth.  So I kill the extra
+        * phi functions early and I kill them often.
+        */
        transform_to_ssa_form(state);
        verify_consistency(state);
-       if (state->debug & DEBUG_CODE_ELIMINATION) {
-               fprintf(stdout, "After transform_to_ssa_form\n");
-               print_blocks(state, stdout);
-       }
+
+       /* Remove dead code */
+       eliminate_inefectual_code(state);
+       verify_consistency(state);
+
        /* Do strength reduction and simple constant optimizations */
-       if (state->optimize >= 1) {
-               simplify_all(state);
-       }
+       simplify_all(state);
        verify_consistency(state);
        /* Propogate constants throughout the code */
-       if (state->optimize >= 2) {
-#warning "FIXME fix scc_transform"
-               scc_transform(state);
-               transform_from_ssa_form(state);
-               free_basic_blocks(state);
-               setup_basic_blocks(state);
-               analyze_idominators(state);
-               analyze_ipdominators(state);
-               transform_to_ssa_form(state);
-       }
+       scc_transform(state);
        verify_consistency(state);
 #warning "WISHLIST implement single use constants (least possible register pressure)"
 #warning "WISHLIST implement induction variable elimination"
@@ -14594,42 +16609,21 @@ static void optimize(struct compile_state *state)
         */
        transform_to_arch_instructions(state);
        verify_consistency(state);
-       if (state->debug & DEBUG_ARCH_CODE) {
-               printf("After transform_to_arch_instructions\n");
-               print_blocks(state, stdout);
-               print_control_flow(state);
-       }
+
+       /* Remove dead code */
        eliminate_inefectual_code(state);
        verify_consistency(state);
-       if (state->debug & DEBUG_CODE_ELIMINATION) {
-               printf("After eliminate_inefectual_code\n");
-               print_blocks(state, stdout);
-               print_control_flow(state);
-       }
-       verify_consistency(state);
+
        /* Color all of the variables to see if they will fit in registers */
        insert_copies_to_phi(state);
-       if (state->debug & DEBUG_INSERTED_COPIES) {
-               printf("After insert_copies_to_phi\n");
-               print_blocks(state, stdout);
-               print_control_flow(state);
-       }
        verify_consistency(state);
+
        insert_mandatory_copies(state);
-       if (state->debug & DEBUG_INSERTED_COPIES) {
-               printf("After insert_mandatory_copies\n");
-               print_blocks(state, stdout);
-               print_control_flow(state);
-       }
        verify_consistency(state);
+
        allocate_registers(state);
        verify_consistency(state);
-       if (state->debug & DEBUG_INTERMEDIATE_CODE) {
-               print_blocks(state, stdout);
-       }
-       if (state->debug & DEBUG_CONTROL_FLOW) {
-               print_control_flow(state);
-       }
+
        /* Remove the optimization information.
         * This is more to check for memory consistency than to free memory.
         */
@@ -14689,48 +16683,46 @@ static void print_op_asm(struct compile_state *state,
  */
 #define X86_4_8BIT_GPRS 1
 
-/* Recognized x86 cpu variants */
-#define BAD_CPU      0
-#define CPU_I386     1
-#define CPU_P3       2
-#define CPU_P4       3
-#define CPU_K7       4
-#define CPU_K8       5
-
-#define CPU_DEFAULT  CPU_I386
+/* x86 featrues */
+#define X86_MMX_REGS (1<<0)
+#define X86_XMM_REGS (1<<1)
 
 /* The x86 register classes */
-#define REGC_FLAGS    0
-#define REGC_GPR8     1
-#define REGC_GPR16    2
-#define REGC_GPR32    3
-#define REGC_GPR64    4
-#define REGC_MMX      5
-#define REGC_XMM      6
-#define REGC_GPR32_8  7
-#define REGC_GPR16_8  8
-#define REGC_IMM32    9
-#define REGC_IMM16   10
-#define REGC_IMM8    11
+#define REGC_FLAGS       0
+#define REGC_GPR8        1
+#define REGC_GPR16       2
+#define REGC_GPR32       3
+#define REGC_DIVIDEND64  4
+#define REGC_DIVIDEND32  5
+#define REGC_MMX         6
+#define REGC_XMM         7
+#define REGC_GPR32_8     8
+#define REGC_GPR16_8     9
+#define REGC_GPR8_LO    10
+#define REGC_IMM32      11
+#define REGC_IMM16      12
+#define REGC_IMM8       13
 #define LAST_REGC  REGC_IMM8
 #if LAST_REGC >= MAX_REGC
 #error "MAX_REGC is to low"
 #endif
 
 /* Register class masks */
-#define REGCM_FLAGS   (1 << REGC_FLAGS)
-#define REGCM_GPR8    (1 << REGC_GPR8)
-#define REGCM_GPR16   (1 << REGC_GPR16)
-#define REGCM_GPR32   (1 << REGC_GPR32)
-#define REGCM_GPR64   (1 << REGC_GPR64)
-#define REGCM_MMX     (1 << REGC_MMX)
-#define REGCM_XMM     (1 << REGC_XMM)
-#define REGCM_GPR32_8 (1 << REGC_GPR32_8)
-#define REGCM_GPR16_8 (1 << REGC_GPR16_8)
-#define REGCM_IMM32   (1 << REGC_IMM32)
-#define REGCM_IMM16   (1 << REGC_IMM16)
-#define REGCM_IMM8    (1 << REGC_IMM8)
-#define REGCM_ALL     ((1 << (LAST_REGC + 1)) - 1)
+#define REGCM_FLAGS      (1 << REGC_FLAGS)
+#define REGCM_GPR8       (1 << REGC_GPR8)
+#define REGCM_GPR16      (1 << REGC_GPR16)
+#define REGCM_GPR32      (1 << REGC_GPR32)
+#define REGCM_DIVIDEND64 (1 << REGC_DIVIDEND64)
+#define REGCM_DIVIDEND32 (1 << REGC_DIVIDEND32)
+#define REGCM_MMX        (1 << REGC_MMX)
+#define REGCM_XMM        (1 << REGC_XMM)
+#define REGCM_GPR32_8    (1 << REGC_GPR32_8)
+#define REGCM_GPR16_8    (1 << REGC_GPR16_8)
+#define REGCM_GPR8_LO    (1 << REGC_GPR8_LO)
+#define REGCM_IMM32      (1 << REGC_IMM32)
+#define REGCM_IMM16      (1 << REGC_IMM16)
+#define REGCM_IMM8       (1 << REGC_IMM8)
+#define REGCM_ALL        ((1 << (LAST_REGC + 1)) - 1)
 
 /* The x86 registers */
 #define REG_EFLAGS  2
@@ -14744,12 +16736,10 @@ static void print_op_asm(struct compile_state *state,
 #define REG_BH      8
 #define REG_CH      9
 #define REG_DH      10
+#define REGC_GPR8_LO_FIRST REG_AL
+#define REGC_GPR8_LO_LAST  REG_DL
 #define REGC_GPR8_FIRST  REG_AL
-#if X86_4_8BIT_GPRS
-#define REGC_GPR8_LAST   REG_DL
-#else 
 #define REGC_GPR8_LAST   REG_DH
-#endif
 #define REG_AX     11
 #define REG_BX     12
 #define REG_CX     13
@@ -14771,26 +16761,29 @@ static void print_op_asm(struct compile_state *state,
 #define REGC_GPR32_FIRST REG_EAX
 #define REGC_GPR32_LAST  REG_ESP
 #define REG_EDXEAX 27
-#define REGC_GPR64_FIRST REG_EDXEAX
-#define REGC_GPR64_LAST  REG_EDXEAX
-#define REG_MMX0   28
-#define REG_MMX1   29
-#define REG_MMX2   30
-#define REG_MMX3   31
-#define REG_MMX4   32
-#define REG_MMX5   33
-#define REG_MMX6   34
-#define REG_MMX7   35
+#define REGC_DIVIDEND64_FIRST REG_EDXEAX
+#define REGC_DIVIDEND64_LAST  REG_EDXEAX
+#define REG_DXAX   28
+#define REGC_DIVIDEND32_FIRST REG_DXAX
+#define REGC_DIVIDEND32_LAST  REG_DXAX
+#define REG_MMX0   29
+#define REG_MMX1   30
+#define REG_MMX2   31
+#define REG_MMX3   32
+#define REG_MMX4   33
+#define REG_MMX5   34
+#define REG_MMX6   35
+#define REG_MMX7   36
 #define REGC_MMX_FIRST REG_MMX0
 #define REGC_MMX_LAST  REG_MMX7
-#define REG_XMM0   36
-#define REG_XMM1   37
-#define REG_XMM2   38
-#define REG_XMM3   39
-#define REG_XMM4   40
-#define REG_XMM5   41
-#define REG_XMM6   42
-#define REG_XMM7   43
+#define REG_XMM0   37
+#define REG_XMM1   38
+#define REG_XMM2   39
+#define REG_XMM3   40
+#define REG_XMM4   41
+#define REG_XMM5   42
+#define REG_XMM6   43
+#define REG_XMM7   44
 #define REGC_XMM_FIRST REG_XMM0
 #define REGC_XMM_LAST  REG_XMM7
 #warning "WISHLIST figure out how to use pinsrw and pextrw to better use extended regs"
@@ -14814,57 +16807,82 @@ static void print_op_asm(struct compile_state *state,
 
 
 static unsigned regc_size[LAST_REGC +1] = {
-       [REGC_FLAGS]   = REGC_FLAGS_LAST   - REGC_FLAGS_FIRST + 1,
-       [REGC_GPR8]    = REGC_GPR8_LAST    - REGC_GPR8_FIRST + 1,
-       [REGC_GPR16]   = REGC_GPR16_LAST   - REGC_GPR16_FIRST + 1,
-       [REGC_GPR32]   = REGC_GPR32_LAST   - REGC_GPR32_FIRST + 1,
-       [REGC_GPR64]   = REGC_GPR64_LAST   - REGC_GPR64_FIRST + 1,
-       [REGC_MMX]     = REGC_MMX_LAST     - REGC_MMX_FIRST + 1,
-       [REGC_XMM]     = REGC_XMM_LAST     - REGC_XMM_FIRST + 1,
-       [REGC_GPR32_8] = REGC_GPR32_8_LAST - REGC_GPR32_8_FIRST + 1,
-       [REGC_GPR16_8] = REGC_GPR16_8_LAST - REGC_GPR16_8_FIRST + 1,
-       [REGC_IMM32]   = 0,
-       [REGC_IMM16]   = 0,
-       [REGC_IMM8]    = 0,
+       [REGC_FLAGS]      = REGC_FLAGS_LAST      - REGC_FLAGS_FIRST + 1,
+       [REGC_GPR8]       = REGC_GPR8_LAST       - REGC_GPR8_FIRST + 1,
+       [REGC_GPR16]      = REGC_GPR16_LAST      - REGC_GPR16_FIRST + 1,
+       [REGC_GPR32]      = REGC_GPR32_LAST      - REGC_GPR32_FIRST + 1,
+       [REGC_DIVIDEND64] = REGC_DIVIDEND64_LAST - REGC_DIVIDEND64_FIRST + 1,
+       [REGC_DIVIDEND32] = REGC_DIVIDEND32_LAST - REGC_DIVIDEND32_FIRST + 1,
+       [REGC_MMX]        = REGC_MMX_LAST        - REGC_MMX_FIRST + 1,
+       [REGC_XMM]        = REGC_XMM_LAST        - REGC_XMM_FIRST + 1,
+       [REGC_GPR32_8]    = REGC_GPR32_8_LAST    - REGC_GPR32_8_FIRST + 1,
+       [REGC_GPR16_8]    = REGC_GPR16_8_LAST    - REGC_GPR16_8_FIRST + 1,
+       [REGC_GPR8_LO]    = REGC_GPR8_LO_LAST    - REGC_GPR8_LO_FIRST + 1,
+       [REGC_IMM32]      = 0,
+       [REGC_IMM16]      = 0,
+       [REGC_IMM8]       = 0,
 };
 
 static const struct {
        int first, last;
 } regcm_bound[LAST_REGC + 1] = {
-       [REGC_FLAGS]   = { REGC_FLAGS_FIRST,   REGC_FLAGS_LAST },
-       [REGC_GPR8]    = { REGC_GPR8_FIRST,    REGC_GPR8_LAST },
-       [REGC_GPR16]   = { REGC_GPR16_FIRST,   REGC_GPR16_LAST },
-       [REGC_GPR32]   = { REGC_GPR32_FIRST,   REGC_GPR32_LAST },
-       [REGC_GPR64]   = { REGC_GPR64_FIRST,   REGC_GPR64_LAST },
-       [REGC_MMX]     = { REGC_MMX_FIRST,     REGC_MMX_LAST },
-       [REGC_XMM]     = { REGC_XMM_FIRST,     REGC_XMM_LAST },
-       [REGC_GPR32_8] = { REGC_GPR32_8_FIRST, REGC_GPR32_8_LAST },
-       [REGC_GPR16_8] = { REGC_GPR16_8_FIRST, REGC_GPR16_8_LAST },
-       [REGC_IMM32]   = { REGC_IMM32_FIRST,   REGC_IMM32_LAST },
-       [REGC_IMM16]   = { REGC_IMM16_FIRST,   REGC_IMM16_LAST },
-       [REGC_IMM8]    = { REGC_IMM8_FIRST,    REGC_IMM8_LAST },
+       [REGC_FLAGS]      = { REGC_FLAGS_FIRST,      REGC_FLAGS_LAST },
+       [REGC_GPR8]       = { REGC_GPR8_FIRST,       REGC_GPR8_LAST },
+       [REGC_GPR16]      = { REGC_GPR16_FIRST,      REGC_GPR16_LAST },
+       [REGC_GPR32]      = { REGC_GPR32_FIRST,      REGC_GPR32_LAST },
+       [REGC_DIVIDEND64] = { REGC_DIVIDEND64_FIRST, REGC_DIVIDEND64_LAST },
+       [REGC_DIVIDEND32] = { REGC_DIVIDEND32_FIRST, REGC_DIVIDEND32_LAST },
+       [REGC_MMX]        = { REGC_MMX_FIRST,        REGC_MMX_LAST },
+       [REGC_XMM]        = { REGC_XMM_FIRST,        REGC_XMM_LAST },
+       [REGC_GPR32_8]    = { REGC_GPR32_8_FIRST,    REGC_GPR32_8_LAST },
+       [REGC_GPR16_8]    = { REGC_GPR16_8_FIRST,    REGC_GPR16_8_LAST },
+       [REGC_GPR8_LO]    = { REGC_GPR8_LO_FIRST,    REGC_GPR8_LO_LAST },
+       [REGC_IMM32]      = { REGC_IMM32_FIRST,      REGC_IMM32_LAST },
+       [REGC_IMM16]      = { REGC_IMM16_FIRST,      REGC_IMM16_LAST },
+       [REGC_IMM8]       = { REGC_IMM8_FIRST,       REGC_IMM8_LAST },
 };
 
-static int arch_encode_cpu(const char *cpu)
-{
-       struct cpu {
-               const char *name;
-               int cpu;
-       } cpus[] = {
-               { "i386", CPU_I386 },
-               { "p3",   CPU_P3 },
-               { "p4",   CPU_P4 },
-               { "k7",   CPU_K7 },
-               { "k8",   CPU_K8 },
-               {  0,     BAD_CPU }
+static void init_arch_state(struct arch_state *arch)
+{
+       memset(arch, 0, sizeof(*arch));
+       arch->features = 0;
+}
+
+static int arch_encode_flag(struct arch_state *arch, const char *flag)
+{
+       static const struct compiler_flag flags[] = {
+               { "mmx", X86_MMX_REGS },
+               { "sse", X86_XMM_REGS },
+               { 0,     0 },
        };
-       struct cpu *ptr;
-       for(ptr = cpus; ptr->name; ptr++) {
-               if (strcmp(ptr->name, cpu) == 0) {
-                       break;
-               }
+       static const struct compiler_flag cpus[] = {
+               { "i386", 0 },
+               { "p2",   X86_MMX_REGS },
+               { "p3",   X86_MMX_REGS | X86_XMM_REGS },
+               { "p4",   X86_MMX_REGS | X86_XMM_REGS },
+               { "k7",   X86_MMX_REGS },
+               { "k8",   X86_MMX_REGS | X86_XMM_REGS },
+               { "c3",   X86_MMX_REGS },
+               { "c3-2", X86_MMX_REGS | X86_XMM_REGS }, /* Nehemiah */
+               {  0,     0 }
+       };
+       int result;
+       int act;
+
+       act = 1;
+       result = -1;
+       if (strncmp(flag, "no-", 3) == 0) {
+               flag += 3;
+               act = 0;
+       }
+       if (act && strncmp(flag, "cpu=", 4) == 0) {
+               flag += 4;
+               result = set_flag(cpus, &arch->features, 1, flag);
+       }
+       else {
+               result = set_flag(flags, &arch->features, act, flag);
        }
-       return ptr->cpu;
+       return result;
 }
 
 static unsigned arch_regc_size(struct compile_state *state, int class)
@@ -14874,11 +16892,13 @@ static unsigned arch_regc_size(struct compile_state *state, int class)
        }
        return regc_size[class];
 }
+
 static int arch_regcm_intersect(unsigned regcm1, unsigned regcm2)
 {
        /* See if two register classes may have overlapping registers */
-       unsigned gpr_mask = REGCM_GPR8 | REGCM_GPR16_8 | REGCM_GPR16 |
-               REGCM_GPR32_8 | REGCM_GPR32 | REGCM_GPR64;
+       unsigned gpr_mask = REGCM_GPR8 | REGCM_GPR8_LO | REGCM_GPR16_8 | REGCM_GPR16 |
+               REGCM_GPR32_8 | REGCM_GPR32 | 
+               REGCM_DIVIDEND32 | REGCM_DIVIDEND64;
 
        /* Special case for the immediates */
        if ((regcm1 & (REGCM_IMM32 | REGCM_IMM16 | REGCM_IMM8)) &&
@@ -14905,6 +16925,7 @@ static void arch_reg_equivs(
 #endif
                *equiv++ = REG_AX;
                *equiv++ = REG_EAX;
+               *equiv++ = REG_DXAX;
                *equiv++ = REG_EDXEAX;
                break;
        case REG_AH:
@@ -14913,6 +16934,7 @@ static void arch_reg_equivs(
 #endif
                *equiv++ = REG_AX;
                *equiv++ = REG_EAX;
+               *equiv++ = REG_DXAX;
                *equiv++ = REG_EDXEAX;
                break;
        case REG_BL:  
@@ -14951,6 +16973,7 @@ static void arch_reg_equivs(
 #endif
                *equiv++ = REG_DX;
                *equiv++ = REG_EDX;
+               *equiv++ = REG_DXAX;
                *equiv++ = REG_EDXEAX;
                break;
        case REG_DH:
@@ -14959,12 +16982,14 @@ static void arch_reg_equivs(
 #endif
                *equiv++ = REG_DX;
                *equiv++ = REG_EDX;
+               *equiv++ = REG_DXAX;
                *equiv++ = REG_EDXEAX;
                break;
        case REG_AX:
                *equiv++ = REG_AL;
                *equiv++ = REG_AH;
                *equiv++ = REG_EAX;
+               *equiv++ = REG_DXAX;
                *equiv++ = REG_EDXEAX;
                break;
        case REG_BX:
@@ -14981,6 +17006,7 @@ static void arch_reg_equivs(
                *equiv++ = REG_DL;
                *equiv++ = REG_DH;
                *equiv++ = REG_EDX;
+               *equiv++ = REG_DXAX;
                *equiv++ = REG_EDXEAX;
                break;
        case REG_SI:  
@@ -14999,6 +17025,7 @@ static void arch_reg_equivs(
                *equiv++ = REG_AL;
                *equiv++ = REG_AH;
                *equiv++ = REG_AX;
+               *equiv++ = REG_DXAX;
                *equiv++ = REG_EDXEAX;
                break;
        case REG_EBX:
@@ -15015,6 +17042,7 @@ static void arch_reg_equivs(
                *equiv++ = REG_DL;
                *equiv++ = REG_DH;
                *equiv++ = REG_DX;
+               *equiv++ = REG_DXAX;
                *equiv++ = REG_EDXEAX;
                break;
        case REG_ESI: 
@@ -15029,6 +17057,17 @@ static void arch_reg_equivs(
        case REG_ESP: 
                *equiv++ = REG_SP;
                break;
+       case REG_DXAX: 
+               *equiv++ = REG_AL;
+               *equiv++ = REG_AH;
+               *equiv++ = REG_DL;
+               *equiv++ = REG_DH;
+               *equiv++ = REG_AX;
+               *equiv++ = REG_DX;
+               *equiv++ = REG_EAX;
+               *equiv++ = REG_EDX;
+               *equiv++ = REG_EDXEAX;
+               break;
        case REG_EDXEAX: 
                *equiv++ = REG_AL;
                *equiv++ = REG_AH;
@@ -15038,6 +17077,7 @@ static void arch_reg_equivs(
                *equiv++ = REG_DX;
                *equiv++ = REG_EAX;
                *equiv++ = REG_EDX;
+               *equiv++ = REG_DXAX;
                break;
        }
        *equiv++ = REG_UNSET; 
@@ -15046,25 +17086,17 @@ static void arch_reg_equivs(
 static unsigned arch_avail_mask(struct compile_state *state)
 {
        unsigned avail_mask;
-       avail_mask = REGCM_GPR8 | REGCM_GPR16_8 | REGCM_GPR16 | 
-               REGCM_GPR32 | REGCM_GPR32_8 | REGCM_GPR64 |
+       /* REGCM_GPR8 is not available */
+       avail_mask = REGCM_GPR8_LO | REGCM_GPR16_8 | REGCM_GPR16 | 
+               REGCM_GPR32 | REGCM_GPR32_8 | 
+               REGCM_DIVIDEND32 | REGCM_DIVIDEND64 |
                REGCM_IMM32 | REGCM_IMM16 | REGCM_IMM8 | REGCM_FLAGS;
-       switch(state->cpu) {
-       case CPU_P3:
-       case CPU_K7:
+       if (state->arch->features & X86_MMX_REGS) {
                avail_mask |= REGCM_MMX;
-               break;
-       case CPU_P4:
-       case CPU_K8:
-               avail_mask |= REGCM_MMX | REGCM_XMM;
-               break;
        }
-#if 0
-       /* Don't enable 8 bit values until I can force both operands
-        * to be 8bits simultaneously.
-        */
-       avail_mask &= ~(REGCM_GPR8 | REGCM_GPR16_8 | REGCM_GPR16);
-#endif
+       if (state->arch->features & X86_XMM_REGS) {
+               avail_mask |= REGCM_XMM;
+       }
        return avail_mask;
 }
 
@@ -15073,7 +17105,6 @@ static unsigned arch_regcm_normalize(struct compile_state *state, unsigned regcm
        unsigned mask, result;
        int class, class2;
        result = regcm;
-       result &= arch_avail_mask(state);
 
        for(class = 0, mask = 1; mask; mask <<= 1, class++) {
                if ((result & mask) == 0) {
@@ -15089,9 +17120,20 @@ static unsigned arch_regcm_normalize(struct compile_state *state, unsigned regcm
                        }
                }
        }
+       result &= arch_avail_mask(state);
        return result;
 }
 
+static unsigned arch_regcm_reg_normalize(struct compile_state *state, unsigned regcm)
+{
+       /* Like arch_regcm_normalize except immediate register classes are excluded */
+       regcm = arch_regcm_normalize(state, regcm);
+       /* Remove the immediate register classes */
+       regcm &= ~(REGCM_IMM32 | REGCM_IMM16 | REGCM_IMM8);
+       return regcm;
+       
+}
+
 static unsigned arch_reg_regcm(struct compile_state *state, int reg)
 {
        unsigned mask;
@@ -15117,19 +17159,19 @@ static struct reg_info arch_reg_constraint(
                unsigned int mask;
                unsigned int reg;
        } constraints[] = {
-               { 'r', REGCM_GPR32, REG_UNSET },
-               { 'g', REGCM_GPR32, REG_UNSET },
-               { 'p', REGCM_GPR32, REG_UNSET },
-               { 'q', REGCM_GPR8 REG_UNSET },
+               { 'r', REGCM_GPR32,   REG_UNSET },
+               { 'g', REGCM_GPR32,   REG_UNSET },
+               { 'p', REGCM_GPR32,   REG_UNSET },
+               { 'q', REGCM_GPR8_LO, REG_UNSET },
                { 'Q', REGCM_GPR32_8, REG_UNSET },
-               { 'x', REGCM_XMM,   REG_UNSET },
-               { 'y', REGCM_MMX,   REG_UNSET },
-               { 'a', REGCM_GPR32, REG_EAX },
-               { 'b', REGCM_GPR32, REG_EBX },
-               { 'c', REGCM_GPR32, REG_ECX },
-               { 'd', REGCM_GPR32, REG_EDX },
-               { 'D', REGCM_GPR32, REG_EDI },
-               { 'S', REGCM_GPR32, REG_ESI },
+               { 'x', REGCM_XMM,     REG_UNSET },
+               { 'y', REGCM_MMX,     REG_UNSET },
+               { 'a', REGCM_GPR32,   REG_EAX },
+               { 'b', REGCM_GPR32,   REG_EBX },
+               { 'c', REGCM_GPR32,   REG_ECX },
+               { 'd', REGCM_GPR32,   REG_EDX },
+               { 'D', REGCM_GPR32,   REG_EDI },
+               { 'S', REGCM_GPR32,   REG_ESI },
                { '\0', 0, REG_UNSET },
        };
        unsigned int regcm;
@@ -15246,21 +17288,28 @@ static int do_select_reg(struct compile_state *state,
 static int arch_select_free_register(
        struct compile_state *state, char *used, int classes)
 {
-       /* Preference: flags, 8bit gprs, 32bit gprs, other 32bit reg
-        * other types of registers.
+       /* Live ranges with the most neighbors are colored first.
+        *
+        * Generally it does not matter which colors are given
+        * as the register allocator attempts to color live ranges
+        * in an order where you are guaranteed not to run out of colors.
+        *
+        * Occasionally the register allocator cannot find an order
+        * of register selection that will find a free color.  To
+        * increase the odds the register allocator will work when
+        * it guesses first give out registers from register classes
+        * least likely to run out of registers.
+        * 
         */
        int i, reg;
        reg = REG_UNSET;
-       for(i = REGC_FLAGS_FIRST; (reg == REG_UNSET) && (i <= REGC_FLAGS_LAST); i++) {
-               reg = do_select_reg(state, used, i, classes);
-       }
-       for(i = REGC_GPR32_FIRST; (reg == REG_UNSET) && (i <= REGC_GPR32_LAST); i++) {
+       for(i = REGC_XMM_FIRST; (reg == REG_UNSET) && (i <= REGC_XMM_LAST); i++) {
                reg = do_select_reg(state, used, i, classes);
        }
        for(i = REGC_MMX_FIRST; (reg == REG_UNSET) && (i <= REGC_MMX_LAST); i++) {
                reg = do_select_reg(state, used, i, classes);
        }
-       for(i = REGC_XMM_FIRST; (reg == REG_UNSET) && (i <= REGC_XMM_LAST); i++) {
+       for(i = REGC_GPR32_LAST; (reg == REG_UNSET) && (i >= REGC_GPR32_FIRST); i--) {
                reg = do_select_reg(state, used, i, classes);
        }
        for(i = REGC_GPR16_FIRST; (reg == REG_UNSET) && (i <= REGC_GPR16_LAST); i++) {
@@ -15269,7 +17318,16 @@ static int arch_select_free_register(
        for(i = REGC_GPR8_FIRST; (reg == REG_UNSET) && (i <= REGC_GPR8_LAST); i++) {
                reg = do_select_reg(state, used, i, classes);
        }
-       for(i = REGC_GPR64_FIRST; (reg == REG_UNSET) && (i <= REGC_GPR64_LAST); i++) {
+       for(i = REGC_GPR8_LO_FIRST; (reg == REG_UNSET) && (i <= REGC_GPR8_LO_LAST); i++) {
+               reg = do_select_reg(state, used, i, classes);
+       }
+       for(i = REGC_DIVIDEND32_FIRST; (reg == REG_UNSET) && (i <= REGC_DIVIDEND32_LAST); i++) {
+               reg = do_select_reg(state, used, i, classes);
+       }
+       for(i = REGC_DIVIDEND64_FIRST; (reg == REG_UNSET) && (i <= REGC_DIVIDEND64_LAST); i++) {
+               reg = do_select_reg(state, used, i, classes);
+       }
+       for(i = REGC_FLAGS_FIRST; (reg == REG_UNSET) && (i <= REGC_FLAGS_LAST); i++) {
                reg = do_select_reg(state, used, i, classes);
        }
        return reg;
@@ -15279,10 +17337,8 @@ static int arch_select_free_register(
 static unsigned arch_type_to_regcm(struct compile_state *state, struct type *type) 
 {
 #warning "FIXME force types smaller (if legal) before I get here"
-       unsigned avail_mask;
        unsigned mask;
        mask = 0;
-       avail_mask = arch_avail_mask(state);
        switch(type->type & TYPE_MASK) {
        case TYPE_ARRAY:
        case TYPE_VOID: 
@@ -15290,10 +17346,10 @@ static unsigned arch_type_to_regcm(struct compile_state *state, struct type *typ
                break;
        case TYPE_CHAR:
        case TYPE_UCHAR:
-               mask = REGCM_GPR8 | 
+               mask = REGCM_GPR8 | REGCM_GPR8_LO |
                        REGCM_GPR16 | REGCM_GPR16_8 | 
                        REGCM_GPR32 | REGCM_GPR32_8 |
-                       REGCM_GPR64 |
+                       REGCM_DIVIDEND32 | REGCM_DIVIDEND64 |
                        REGCM_MMX | REGCM_XMM |
                        REGCM_IMM32 | REGCM_IMM16 | REGCM_IMM8;
                break;
@@ -15301,7 +17357,7 @@ static unsigned arch_type_to_regcm(struct compile_state *state, struct type *typ
        case TYPE_USHORT:
                mask =  REGCM_GPR16 | REGCM_GPR16_8 |
                        REGCM_GPR32 | REGCM_GPR32_8 |
-                       REGCM_GPR64 |
+                       REGCM_DIVIDEND32 | REGCM_DIVIDEND64 |
                        REGCM_MMX | REGCM_XMM |
                        REGCM_IMM32 | REGCM_IMM16;
                break;
@@ -15311,14 +17367,15 @@ static unsigned arch_type_to_regcm(struct compile_state *state, struct type *typ
        case TYPE_ULONG:
        case TYPE_POINTER:
                mask =  REGCM_GPR32 | REGCM_GPR32_8 |
-                       REGCM_GPR64 | REGCM_MMX | REGCM_XMM |
+                       REGCM_DIVIDEND32 | REGCM_DIVIDEND64 |
+                       REGCM_MMX | REGCM_XMM |
                        REGCM_IMM32;
                break;
        default:
                internal_error(state, 0, "no register class for type");
                break;
        }
-       mask &= avail_mask;
+       mask = arch_regcm_normalize(state, mask);
        return mask;
 }
 
@@ -15369,52 +17426,81 @@ static int get_imm8(struct triple *ins, struct triple **expr)
        return 1;
 }
 
-#define TEMPLATE_NOP         0
-#define TEMPLATE_INTCONST8   1
-#define TEMPLATE_INTCONST32  2
-#define TEMPLATE_COPY_REG    3
-#define TEMPLATE_COPY_IMM32  4
-#define TEMPLATE_COPY_IMM16  5
-#define TEMPLATE_COPY_IMM8   6
-#define TEMPLATE_PHI         7
-#define TEMPLATE_STORE8      8
-#define TEMPLATE_STORE16     9
-#define TEMPLATE_STORE32    10
-#define TEMPLATE_LOAD8      11
-#define TEMPLATE_LOAD16     12
-#define TEMPLATE_LOAD32     13
-#define TEMPLATE_BINARY_REG 14
-#define TEMPLATE_BINARY_IMM 15
-#define TEMPLATE_SL_CL      16
-#define TEMPLATE_SL_IMM     17
-#define TEMPLATE_UNARY      18
-#define TEMPLATE_CMP_REG    19
-#define TEMPLATE_CMP_IMM    20
-#define TEMPLATE_TEST       21
-#define TEMPLATE_SET        22
-#define TEMPLATE_JMP        23
-#define TEMPLATE_INB_DX     24
-#define TEMPLATE_INB_IMM    25
-#define TEMPLATE_INW_DX     26
-#define TEMPLATE_INW_IMM    27
-#define TEMPLATE_INL_DX     28
-#define TEMPLATE_INL_IMM    29
-#define TEMPLATE_OUTB_DX    30
-#define TEMPLATE_OUTB_IMM   31
-#define TEMPLATE_OUTW_DX    32
-#define TEMPLATE_OUTW_IMM   33
-#define TEMPLATE_OUTL_DX    34
-#define TEMPLATE_OUTL_IMM   35
-#define TEMPLATE_BSF        36
-#define TEMPLATE_RDMSR      37
-#define TEMPLATE_WRMSR      38
-#define LAST_TEMPLATE       TEMPLATE_WRMSR
+#define TEMPLATE_NOP           0
+#define TEMPLATE_INTCONST8     1
+#define TEMPLATE_INTCONST32    2
+#define TEMPLATE_COPY8_REG     3
+#define TEMPLATE_COPY16_REG    4
+#define TEMPLATE_COPY32_REG    5
+#define TEMPLATE_COPY_IMM8     6
+#define TEMPLATE_COPY_IMM16    7
+#define TEMPLATE_COPY_IMM32    8
+#define TEMPLATE_PHI8          9
+#define TEMPLATE_PHI16        10
+#define TEMPLATE_PHI32        11
+#define TEMPLATE_STORE8       12
+#define TEMPLATE_STORE16      13
+#define TEMPLATE_STORE32      14
+#define TEMPLATE_LOAD8        15
+#define TEMPLATE_LOAD16       16
+#define TEMPLATE_LOAD32       17
+#define TEMPLATE_BINARY8_REG  18
+#define TEMPLATE_BINARY16_REG 19
+#define TEMPLATE_BINARY32_REG 20
+#define TEMPLATE_BINARY8_IMM  21
+#define TEMPLATE_BINARY16_IMM 22
+#define TEMPLATE_BINARY32_IMM 23
+#define TEMPLATE_SL8_CL       24
+#define TEMPLATE_SL16_CL      25
+#define TEMPLATE_SL32_CL      26
+#define TEMPLATE_SL8_IMM      27
+#define TEMPLATE_SL16_IMM     28
+#define TEMPLATE_SL32_IMM     29
+#define TEMPLATE_UNARY8       30
+#define TEMPLATE_UNARY16      31
+#define TEMPLATE_UNARY32      32
+#define TEMPLATE_CMP8_REG     33
+#define TEMPLATE_CMP16_REG    34
+#define TEMPLATE_CMP32_REG    35
+#define TEMPLATE_CMP8_IMM     36
+#define TEMPLATE_CMP16_IMM    37
+#define TEMPLATE_CMP32_IMM    38
+#define TEMPLATE_TEST8        39
+#define TEMPLATE_TEST16       40
+#define TEMPLATE_TEST32       41
+#define TEMPLATE_SET          42
+#define TEMPLATE_JMP          43
+#define TEMPLATE_RET          44
+#define TEMPLATE_INB_DX       45
+#define TEMPLATE_INB_IMM      46
+#define TEMPLATE_INW_DX       47
+#define TEMPLATE_INW_IMM      48
+#define TEMPLATE_INL_DX       49
+#define TEMPLATE_INL_IMM      50
+#define TEMPLATE_OUTB_DX      51
+#define TEMPLATE_OUTB_IMM     52
+#define TEMPLATE_OUTW_DX      53
+#define TEMPLATE_OUTW_IMM     54
+#define TEMPLATE_OUTL_DX      55
+#define TEMPLATE_OUTL_IMM     56
+#define TEMPLATE_BSF          57
+#define TEMPLATE_RDMSR        58
+#define TEMPLATE_WRMSR        59
+#define TEMPLATE_UMUL8        60
+#define TEMPLATE_UMUL16       61
+#define TEMPLATE_UMUL32       62
+#define TEMPLATE_DIV8         63
+#define TEMPLATE_DIV16        64
+#define TEMPLATE_DIV32        65
+#define LAST_TEMPLATE       TEMPLATE_DIV32
 #if LAST_TEMPLATE >= MAX_TEMPLATES
 #error "MAX_TEMPLATES to low"
 #endif
 
-#define COPY_REGCM (REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8 | REGCM_MMX | REGCM_XMM)
-#define COPY32_REGCM (REGCM_GPR32 | REGCM_MMX | REGCM_XMM)
+#define COPY8_REGCM     (REGCM_DIVIDEND64 | REGCM_DIVIDEND32 | REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8_LO | REGCM_MMX | REGCM_XMM)
+#define COPY16_REGCM    (REGCM_DIVIDEND64 | REGCM_DIVIDEND32 | REGCM_GPR32 | REGCM_GPR16 | REGCM_MMX | REGCM_XMM)  
+#define COPY32_REGCM    (REGCM_DIVIDEND64 | REGCM_DIVIDEND32 | REGCM_GPR32 | REGCM_MMX | REGCM_XMM)
+
 
 static struct ins_template templates[] = {
        [TEMPLATE_NOP]      = {},
@@ -15424,56 +17510,110 @@ static struct ins_template templates[] = {
        [TEMPLATE_INTCONST32] = { 
                .lhs = { [0] = { REG_UNNEEDED, REGCM_IMM32 } },
        },
-       [TEMPLATE_COPY_REG] = {
-               .lhs = { [0] = { REG_UNSET, COPY_REGCM } },
-               .rhs = { [0] = { REG_UNSET, COPY_REGCM }  },
+       [TEMPLATE_COPY8_REG] = {
+               .lhs = { [0] = { REG_UNSET, COPY8_REGCM } },
+               .rhs = { [0] = { REG_UNSET, COPY8_REGCM }  },
        },
-       [TEMPLATE_COPY_IMM32] = {
-               .lhs = { [0] = { REG_UNSET, COPY32_REGCM } },
-               .rhs = { [0] = { REG_UNNEEDED, REGCM_IMM32 } },
+       [TEMPLATE_COPY16_REG] = {
+               .lhs = { [0] = { REG_UNSET, COPY16_REGCM } },
+               .rhs = { [0] = { REG_UNSET, COPY16_REGCM }  },
        },
-       [TEMPLATE_COPY_IMM16] = {
-               .lhs = { [0] = { REG_UNSET, COPY32_REGCM | REGCM_GPR16 } },
-               .rhs = { [0] = { REG_UNNEEDED, REGCM_IMM16 } },
+       [TEMPLATE_COPY32_REG] = {
+               .lhs = { [0] = { REG_UNSET, COPY32_REGCM } },
+               .rhs = { [0] = { REG_UNSET, COPY32_REGCM }  },
        },
        [TEMPLATE_COPY_IMM8] = {
-               .lhs = { [0] = { REG_UNSET, COPY_REGCM } },
+               .lhs = { [0] = { REG_UNSET, COPY8_REGCM } },
                .rhs = { [0] = { REG_UNNEEDED, REGCM_IMM8 } },
        },
-       [TEMPLATE_PHI] = { 
-               .lhs = { [0] = { REG_VIRT0, COPY_REGCM } },
+       [TEMPLATE_COPY_IMM16] = {
+               .lhs = { [0] = { REG_UNSET, COPY16_REGCM } },
+               .rhs = { [0] = { REG_UNNEEDED, REGCM_IMM16 | REGCM_IMM8 } },
+       },
+       [TEMPLATE_COPY_IMM32] = {
+               .lhs = { [0] = { REG_UNSET, COPY32_REGCM } },
+               .rhs = { [0] = { REG_UNNEEDED, REGCM_IMM32 | REGCM_IMM16 | REGCM_IMM8 } },
+       },
+       [TEMPLATE_PHI8] = { 
+               .lhs = { [0] = { REG_VIRT0, COPY8_REGCM } },
                .rhs = { 
-                       [ 0] = { REG_VIRT0, COPY_REGCM },
-                       [ 1] = { REG_VIRT0, COPY_REGCM },
-                       [ 2] = { REG_VIRT0, COPY_REGCM },
-                       [ 3] = { REG_VIRT0, COPY_REGCM },
-                       [ 4] = { REG_VIRT0, COPY_REGCM },
-                       [ 5] = { REG_VIRT0, COPY_REGCM },
-                       [ 6] = { REG_VIRT0, COPY_REGCM },
-                       [ 7] = { REG_VIRT0, COPY_REGCM },
-                       [ 8] = { REG_VIRT0, COPY_REGCM },
-                       [ 9] = { REG_VIRT0, COPY_REGCM },
-                       [10] = { REG_VIRT0, COPY_REGCM },
-                       [11] = { REG_VIRT0, COPY_REGCM },
-                       [12] = { REG_VIRT0, COPY_REGCM },
-                       [13] = { REG_VIRT0, COPY_REGCM },
-                       [14] = { REG_VIRT0, COPY_REGCM },
-                       [15] = { REG_VIRT0, COPY_REGCM },
+                       [ 0] = { REG_VIRT0, COPY8_REGCM },
+                       [ 1] = { REG_VIRT0, COPY8_REGCM },
+                       [ 2] = { REG_VIRT0, COPY8_REGCM },
+                       [ 3] = { REG_VIRT0, COPY8_REGCM },
+                       [ 4] = { REG_VIRT0, COPY8_REGCM },
+                       [ 5] = { REG_VIRT0, COPY8_REGCM },
+                       [ 6] = { REG_VIRT0, COPY8_REGCM },
+                       [ 7] = { REG_VIRT0, COPY8_REGCM },
+                       [ 8] = { REG_VIRT0, COPY8_REGCM },
+                       [ 9] = { REG_VIRT0, COPY8_REGCM },
+                       [10] = { REG_VIRT0, COPY8_REGCM },
+                       [11] = { REG_VIRT0, COPY8_REGCM },
+                       [12] = { REG_VIRT0, COPY8_REGCM },
+                       [13] = { REG_VIRT0, COPY8_REGCM },
+                       [14] = { REG_VIRT0, COPY8_REGCM },
+                       [15] = { REG_VIRT0, COPY8_REGCM },
+               }, },
+       [TEMPLATE_PHI16] = { 
+               .lhs = { [0] = { REG_VIRT0, COPY16_REGCM } },
+               .rhs = { 
+                       [ 0] = { REG_VIRT0, COPY16_REGCM },
+                       [ 1] = { REG_VIRT0, COPY16_REGCM },
+                       [ 2] = { REG_VIRT0, COPY16_REGCM },
+                       [ 3] = { REG_VIRT0, COPY16_REGCM },
+                       [ 4] = { REG_VIRT0, COPY16_REGCM },
+                       [ 5] = { REG_VIRT0, COPY16_REGCM },
+                       [ 6] = { REG_VIRT0, COPY16_REGCM },
+                       [ 7] = { REG_VIRT0, COPY16_REGCM },
+                       [ 8] = { REG_VIRT0, COPY16_REGCM },
+                       [ 9] = { REG_VIRT0, COPY16_REGCM },
+                       [10] = { REG_VIRT0, COPY16_REGCM },
+                       [11] = { REG_VIRT0, COPY16_REGCM },
+                       [12] = { REG_VIRT0, COPY16_REGCM },
+                       [13] = { REG_VIRT0, COPY16_REGCM },
+                       [14] = { REG_VIRT0, COPY16_REGCM },
+                       [15] = { REG_VIRT0, COPY16_REGCM },
+               }, },
+       [TEMPLATE_PHI32] = { 
+               .lhs = { [0] = { REG_VIRT0, COPY32_REGCM } },
+               .rhs = { 
+                       [ 0] = { REG_VIRT0, COPY32_REGCM },
+                       [ 1] = { REG_VIRT0, COPY32_REGCM },
+                       [ 2] = { REG_VIRT0, COPY32_REGCM },
+                       [ 3] = { REG_VIRT0, COPY32_REGCM },
+                       [ 4] = { REG_VIRT0, COPY32_REGCM },
+                       [ 5] = { REG_VIRT0, COPY32_REGCM },
+                       [ 6] = { REG_VIRT0, COPY32_REGCM },
+                       [ 7] = { REG_VIRT0, COPY32_REGCM },
+                       [ 8] = { REG_VIRT0, COPY32_REGCM },
+                       [ 9] = { REG_VIRT0, COPY32_REGCM },
+                       [10] = { REG_VIRT0, COPY32_REGCM },
+                       [11] = { REG_VIRT0, COPY32_REGCM },
+                       [12] = { REG_VIRT0, COPY32_REGCM },
+                       [13] = { REG_VIRT0, COPY32_REGCM },
+                       [14] = { REG_VIRT0, COPY32_REGCM },
+                       [15] = { REG_VIRT0, COPY32_REGCM },
                }, },
        [TEMPLATE_STORE8] = {
-               .lhs = { [0] = { REG_UNSET, REGCM_GPR32 } },
-               .rhs = { [0] = { REG_UNSET, REGCM_GPR8 } },
+               .rhs = { 
+                       [0] = { REG_UNSET, REGCM_GPR32 },
+                       [1] = { REG_UNSET, REGCM_GPR8_LO },
+               },
        },
        [TEMPLATE_STORE16] = {
-               .lhs = { [0] = { REG_UNSET, REGCM_GPR32 } },
-               .rhs = { [0] = { REG_UNSET, REGCM_GPR16 } },
+               .rhs = { 
+                       [0] = { REG_UNSET, REGCM_GPR32 },
+                       [1] = { REG_UNSET, REGCM_GPR16 },
+               },
        },
        [TEMPLATE_STORE32] = {
-               .lhs = { [0] = { REG_UNSET, REGCM_GPR32 } },
-               .rhs = { [0] = { REG_UNSET, REGCM_GPR32 } },
+               .rhs = { 
+                       [0] = { REG_UNSET, REGCM_GPR32 },
+                       [1] = { REG_UNSET, REGCM_GPR32 },
+               },
        },
        [TEMPLATE_LOAD8] = {
-               .lhs = { [0] = { REG_UNSET, REGCM_GPR8 } },
+               .lhs = { [0] = { REG_UNSET, REGCM_GPR8_LO } },
                .rhs = { [0] = { REG_UNSET, REGCM_GPR32 } },
        },
        [TEMPLATE_LOAD16] = {
@@ -15484,69 +17624,172 @@ static struct ins_template templates[] = {
                .lhs = { [0] = { REG_UNSET, REGCM_GPR32 } },
                .rhs = { [0] = { REG_UNSET, REGCM_GPR32 } },
        },
-       [TEMPLATE_BINARY_REG] = {
+       [TEMPLATE_BINARY8_REG] = {
+               .lhs = { [0] = { REG_VIRT0, REGCM_GPR8_LO } },
+               .rhs = { 
+                       [0] = { REG_VIRT0, REGCM_GPR8_LO },
+                       [1] = { REG_UNSET, REGCM_GPR8_LO },
+               },
+       },
+       [TEMPLATE_BINARY16_REG] = {
+               .lhs = { [0] = { REG_VIRT0, REGCM_GPR16 } },
+               .rhs = { 
+                       [0] = { REG_VIRT0, REGCM_GPR16 },
+                       [1] = { REG_UNSET, REGCM_GPR16 },
+               },
+       },
+       [TEMPLATE_BINARY32_REG] = {
                .lhs = { [0] = { REG_VIRT0, REGCM_GPR32 } },
                .rhs = { 
                        [0] = { REG_VIRT0, REGCM_GPR32 },
                        [1] = { REG_UNSET, REGCM_GPR32 },
                },
        },
-       [TEMPLATE_BINARY_IMM] = {
+       [TEMPLATE_BINARY8_IMM] = {
+               .lhs = { [0] = { REG_VIRT0, REGCM_GPR8_LO } },
+               .rhs = { 
+                       [0] = { REG_VIRT0,    REGCM_GPR8_LO },
+                       [1] = { REG_UNNEEDED, REGCM_IMM8 },
+               },
+       },
+       [TEMPLATE_BINARY16_IMM] = {
+               .lhs = { [0] = { REG_VIRT0, REGCM_GPR16 } },
+               .rhs = { 
+                       [0] = { REG_VIRT0,    REGCM_GPR16 },
+                       [1] = { REG_UNNEEDED, REGCM_IMM16 },
+               },
+       },
+       [TEMPLATE_BINARY32_IMM] = {
                .lhs = { [0] = { REG_VIRT0, REGCM_GPR32 } },
                .rhs = { 
                        [0] = { REG_VIRT0,    REGCM_GPR32 },
                        [1] = { REG_UNNEEDED, REGCM_IMM32 },
                },
        },
-       [TEMPLATE_SL_CL] = {
+       [TEMPLATE_SL8_CL] = {
+               .lhs = { [0] = { REG_VIRT0, REGCM_GPR8_LO } },
+               .rhs = { 
+                       [0] = { REG_VIRT0, REGCM_GPR8_LO },
+                       [1] = { REG_CL, REGCM_GPR8_LO },
+               },
+       },
+       [TEMPLATE_SL16_CL] = {
+               .lhs = { [0] = { REG_VIRT0, REGCM_GPR16 } },
+               .rhs = { 
+                       [0] = { REG_VIRT0, REGCM_GPR16 },
+                       [1] = { REG_CL, REGCM_GPR8_LO },
+               },
+       },
+       [TEMPLATE_SL32_CL] = {
                .lhs = { [0] = { REG_VIRT0, REGCM_GPR32 } },
                .rhs = { 
                        [0] = { REG_VIRT0, REGCM_GPR32 },
-                       [1] = { REG_CL, REGCM_GPR8 },
+                       [1] = { REG_CL, REGCM_GPR8_LO },
+               },
+       },
+       [TEMPLATE_SL8_IMM] = {
+               .lhs = { [0] = { REG_VIRT0, REGCM_GPR8_LO } },
+               .rhs = { 
+                       [0] = { REG_VIRT0,    REGCM_GPR8_LO },
+                       [1] = { REG_UNNEEDED, REGCM_IMM8 },
+               },
+       },
+       [TEMPLATE_SL16_IMM] = {
+               .lhs = { [0] = { REG_VIRT0, REGCM_GPR16 } },
+               .rhs = { 
+                       [0] = { REG_VIRT0,    REGCM_GPR16 },
+                       [1] = { REG_UNNEEDED, REGCM_IMM8 },
                },
        },
-       [TEMPLATE_SL_IMM] = {
+       [TEMPLATE_SL32_IMM] = {
                .lhs = { [0] = { REG_VIRT0, REGCM_GPR32 } },
                .rhs = { 
                        [0] = { REG_VIRT0,    REGCM_GPR32 },
                        [1] = { REG_UNNEEDED, REGCM_IMM8 },
                },
        },
-       [TEMPLATE_UNARY] = {
+       [TEMPLATE_UNARY8] = {
+               .lhs = { [0] = { REG_VIRT0, REGCM_GPR8_LO } },
+               .rhs = { [0] = { REG_VIRT0, REGCM_GPR8_LO } },
+       },
+       [TEMPLATE_UNARY16] = {
+               .lhs = { [0] = { REG_VIRT0, REGCM_GPR16 } },
+               .rhs = { [0] = { REG_VIRT0, REGCM_GPR16 } },
+       },
+       [TEMPLATE_UNARY32] = {
                .lhs = { [0] = { REG_VIRT0, REGCM_GPR32 } },
                .rhs = { [0] = { REG_VIRT0, REGCM_GPR32 } },
        },
-       [TEMPLATE_CMP_REG] = {
+       [TEMPLATE_CMP8_REG] = {
+               .lhs = { [0] = { REG_EFLAGS, REGCM_FLAGS } },
+               .rhs = {
+                       [0] = { REG_UNSET, REGCM_GPR8_LO },
+                       [1] = { REG_UNSET, REGCM_GPR8_LO },
+               },
+       },
+       [TEMPLATE_CMP16_REG] = {
+               .lhs = { [0] = { REG_EFLAGS, REGCM_FLAGS } },
+               .rhs = {
+                       [0] = { REG_UNSET, REGCM_GPR16 },
+                       [1] = { REG_UNSET, REGCM_GPR16 },
+               },
+       },
+       [TEMPLATE_CMP32_REG] = {
                .lhs = { [0] = { REG_EFLAGS, REGCM_FLAGS } },
                .rhs = {
                        [0] = { REG_UNSET, REGCM_GPR32 },
                        [1] = { REG_UNSET, REGCM_GPR32 },
                },
        },
-       [TEMPLATE_CMP_IMM] = {
+       [TEMPLATE_CMP8_IMM] = {
+               .lhs = { [0] = { REG_EFLAGS, REGCM_FLAGS } },
+               .rhs = {
+                       [0] = { REG_UNSET, REGCM_GPR8_LO },
+                       [1] = { REG_UNNEEDED, REGCM_IMM8 },
+               },
+       },
+       [TEMPLATE_CMP16_IMM] = {
+               .lhs = { [0] = { REG_EFLAGS, REGCM_FLAGS } },
+               .rhs = {
+                       [0] = { REG_UNSET, REGCM_GPR16 },
+                       [1] = { REG_UNNEEDED, REGCM_IMM16 },
+               },
+       },
+       [TEMPLATE_CMP32_IMM] = {
                .lhs = { [0] = { REG_EFLAGS, REGCM_FLAGS } },
                .rhs = {
                        [0] = { REG_UNSET, REGCM_GPR32 },
                        [1] = { REG_UNNEEDED, REGCM_IMM32 },
                },
        },
-       [TEMPLATE_TEST] = {
+       [TEMPLATE_TEST8] = {
+               .lhs = { [0] = { REG_EFLAGS, REGCM_FLAGS } },
+               .rhs = { [0] = { REG_UNSET, REGCM_GPR8_LO } },
+       },
+       [TEMPLATE_TEST16] = {
+               .lhs = { [0] = { REG_EFLAGS, REGCM_FLAGS } },
+               .rhs = { [0] = { REG_UNSET, REGCM_GPR16 } },
+       },
+       [TEMPLATE_TEST32] = {
                .lhs = { [0] = { REG_EFLAGS, REGCM_FLAGS } },
                .rhs = { [0] = { REG_UNSET, REGCM_GPR32 } },
        },
        [TEMPLATE_SET] = {
-               .lhs = { [0] = { REG_UNSET, REGCM_GPR8 } },
+               .lhs = { [0] = { REG_UNSET, REGCM_GPR8_LO } },
                .rhs = { [0] = { REG_EFLAGS, REGCM_FLAGS } },
        },
        [TEMPLATE_JMP] = {
                .rhs = { [0] = { REG_EFLAGS, REGCM_FLAGS } },
        },
+       [TEMPLATE_RET] = {
+               .rhs = { [0] = { REG_UNSET, REGCM_GPR32 } },
+       },
        [TEMPLATE_INB_DX] = {
-               .lhs = { [0] = { REG_AL,  REGCM_GPR8 } },  
+               .lhs = { [0] = { REG_AL,  REGCM_GPR8_LO } },  
                .rhs = { [0] = { REG_DX, REGCM_GPR16 } },
        },
        [TEMPLATE_INB_IMM] = {
-               .lhs = { [0] = { REG_AL,  REGCM_GPR8 } },  
+               .lhs = { [0] = { REG_AL,  REGCM_GPR8_LO } },  
                .rhs = { [0] = { REG_UNNEEDED, REGCM_IMM8 } },
        },
        [TEMPLATE_INW_DX]  = { 
@@ -15567,13 +17810,13 @@ static struct ins_template templates[] = {
        },
        [TEMPLATE_OUTB_DX] = { 
                .rhs = {
-                       [0] = { REG_AL,  REGCM_GPR8 },
+                       [0] = { REG_AL,  REGCM_GPR8_LO },
                        [1] = { REG_DX, REGCM_GPR16 },
                },
        },
        [TEMPLATE_OUTB_IMM] = { 
                .rhs = {
-                       [0] = { REG_AL,  REGCM_GPR8 },  
+                       [0] = { REG_AL,  REGCM_GPR8_LO },  
                        [1] = { REG_UNNEEDED, REGCM_IMM8 },
                },
        },
@@ -15619,8 +17862,85 @@ static struct ins_template templates[] = {
                        [2] = { REG_EDX, REGCM_GPR32 },
                },
        },
+       [TEMPLATE_UMUL8] = {
+               .lhs = { [0] = { REG_AX, REGCM_GPR16 } },
+               .rhs = { 
+                       [0] = { REG_AL, REGCM_GPR8_LO },
+                       [1] = { REG_UNSET, REGCM_GPR8_LO },
+               },
+       },
+       [TEMPLATE_UMUL16] = {
+               .lhs = { [0] = { REG_DXAX, REGCM_DIVIDEND32 } },
+               .rhs = { 
+                       [0] = { REG_AX, REGCM_GPR16 },
+                       [1] = { REG_UNSET, REGCM_GPR16 },
+               },
+       },
+       [TEMPLATE_UMUL32] = {
+               .lhs = { [0] = { REG_EDXEAX, REGCM_DIVIDEND64 } },
+               .rhs = { 
+                       [0] = { REG_EAX, REGCM_GPR32 },
+                       [1] = { REG_UNSET, REGCM_GPR32 },
+               },
+       },
+       [TEMPLATE_DIV8] = {
+               .lhs = { 
+                       [0] = { REG_AL, REGCM_GPR8_LO },
+                       [1] = { REG_AH, REGCM_GPR8 },
+               },
+               .rhs = {
+                       [0] = { REG_AX, REGCM_GPR16 },
+                       [1] = { REG_UNSET, REGCM_GPR8_LO },
+               },
+       },
+       [TEMPLATE_DIV16] = {
+               .lhs = { 
+                       [0] = { REG_AX, REGCM_GPR16 },
+                       [1] = { REG_DX, REGCM_GPR16 },
+               },
+               .rhs = {
+                       [0] = { REG_DXAX, REGCM_DIVIDEND32 },
+                       [1] = { REG_UNSET, REGCM_GPR16 },
+               },
+       },
+       [TEMPLATE_DIV32] = {
+               .lhs = { 
+                       [0] = { REG_EAX, REGCM_GPR32 },
+                       [1] = { REG_EDX, REGCM_GPR32 },
+               },
+               .rhs = {
+                       [0] = { REG_EDXEAX, REGCM_DIVIDEND64 },
+                       [1] = { REG_UNSET, REGCM_GPR32 },
+               },
+       },
 };
 
+static void fixup_branch(struct compile_state *state,
+       struct triple *branch, int jmp_op, int cmp_op, struct type *cmp_type,
+       struct triple *left, struct triple *right)
+{
+       struct triple *test;
+       if (!left) {
+               internal_error(state, branch, "no branch test?");
+       }
+       test = pre_triple(state, branch,
+               cmp_op, cmp_type, left, right);
+       test->template_id = TEMPLATE_TEST32; 
+       if (cmp_op == OP_CMP) {
+               test->template_id = TEMPLATE_CMP32_REG;
+               if (get_imm32(test, &RHS(test, 1))) {
+                       test->template_id = TEMPLATE_CMP32_IMM;
+               }
+       }
+       use_triple(RHS(test, 0), test);
+       use_triple(RHS(test, 1), test);
+       unuse_triple(RHS(branch, 0), branch);
+       RHS(branch, 0) = test;
+       branch->op = jmp_op;
+       branch->template_id = TEMPLATE_JMP;
+       use_triple(RHS(branch, 0), branch);
+}
+
 static void fixup_branches(struct compile_state *state,
        struct triple *cmp, struct triple *use, int jmp_op)
 {
@@ -15630,8 +17950,8 @@ static void fixup_branches(struct compile_state *state,
                if (entry->member->op == OP_COPY) {
                        fixup_branches(state, cmp, entry->member, jmp_op);
                }
-               else if (entry->member->op == OP_BRANCH) {
-                       struct triple *branch, *test;
+               else if (entry->member->op == OP_CBRANCH) {
+                       struct triple *branch;
                        struct triple *left, *right;
                        left = right = 0;
                        left = RHS(cmp, 0);
@@ -15639,22 +17959,8 @@ static void fixup_branches(struct compile_state *state,
                                right = RHS(cmp, 1);
                        }
                        branch = entry->member;
-                       test = pre_triple(state, branch,
+                       fixup_branch(state, branch, jmp_op, 
                                cmp->op, cmp->type, left, right);
-                       test->template_id = TEMPLATE_TEST; 
-                       if (cmp->op == OP_CMP) {
-                               test->template_id = TEMPLATE_CMP_REG;
-                               if (get_imm32(test, &RHS(test, 1))) {
-                                       test->template_id = TEMPLATE_CMP_IMM;
-                               }
-                       }
-                       use_triple(RHS(test, 0), test);
-                       use_triple(RHS(test, 1), test);
-                       unuse_triple(RHS(branch, 0), branch);
-                       RHS(branch, 0) = test;
-                       branch->op = jmp_op;
-                       branch->template_id = TEMPLATE_JMP;
-                       use_triple(RHS(branch, 0), branch);
                }
        }
 }
@@ -15672,17 +17978,17 @@ static void bool_cmp(struct compile_state *state,
 
        /* Modify the comparison operator */
        ins->op = cmp_op;
-       ins->template_id = TEMPLATE_TEST;
+       ins->template_id = TEMPLATE_TEST32;
        if (cmp_op == OP_CMP) {
-               ins->template_id = TEMPLATE_CMP_REG;
+               ins->template_id = TEMPLATE_CMP32_REG;
                if (get_imm32(ins, &RHS(ins, 1))) {
-                       ins->template_id =  TEMPLATE_CMP_IMM;
+                       ins->template_id =  TEMPLATE_CMP32_IMM;
                }
        }
        /* Generate the instruction sequence that will transform the
         * result of the comparison into a logical value.
         */
-       set = post_triple(state, ins, set_op, ins->type, ins, 0);
+       set = post_triple(state, ins, set_op, &char_type, ins, 0);
        use_triple(ins, set);
        set->template_id = TEMPLATE_SET;
 
@@ -15787,16 +18093,58 @@ struct reg_info arch_reg_rhs(struct compile_state *state, struct triple *ins, in
        return result;
 }
 
+static struct triple *mod_div(struct compile_state *state,
+       struct triple *ins, int div_op, int index)
+{
+       struct triple *div, *piece0, *piece1;
+       
+       /* Generate a piece to hold the remainder */
+       piece1 = post_triple(state, ins, OP_PIECE, ins->type, 0, 0);
+       piece1->u.cval = 1;
+
+       /* Generate a piece to hold the quotient */
+       piece0 = post_triple(state, ins, OP_PIECE, ins->type, 0, 0);
+       piece0->u.cval = 0;
+
+       /* Generate the appropriate division instruction */
+       div = post_triple(state, ins, div_op, ins->type, 0, 0);
+       RHS(div, 0) = RHS(ins, 0);
+       RHS(div, 1) = RHS(ins, 1);
+       LHS(div, 0) = piece0;
+       LHS(div, 1) = piece1;
+       div->template_id  = TEMPLATE_DIV32;
+       use_triple(RHS(div, 0), div);
+       use_triple(RHS(div, 1), div);
+       use_triple(LHS(div, 0), div);
+       use_triple(LHS(div, 1), div);
+
+       /* Hook on piece0 */
+       MISC(piece0, 0) = div;
+       use_triple(div, piece0);
+
+       /* Hook on piece1 */
+       MISC(piece1, 0) = div;
+       use_triple(div, piece1);
+       
+       /* Replate uses of ins with the appropriate piece of the div */
+       propogate_use(state, ins, LHS(div, index));
+       release_triple(state, ins);
+
+       /* Return the address of the next instruction */
+       return piece1->next;
+}
+
 static struct triple *transform_to_arch_instruction(
        struct compile_state *state, struct triple *ins)
 {
        /* Transform from generic 3 address instructions
         * to archtecture specific instructions.
-        * And apply architecture specific constrains to instructions.
+        * And apply architecture specific constraints to instructions.
         * Copies are inserted to preserve the register flexibility
         * of 3 address instructions.
         */
        struct triple *next;
+       size_t size;
        next = ins->next;
        switch(ins->op) {
        case OP_INTCONST:
@@ -15815,22 +18163,46 @@ static struct triple *transform_to_arch_instruction(
                ins->template_id = TEMPLATE_NOP;
                break;
        case OP_COPY:
-               ins->template_id = TEMPLATE_COPY_REG;
-               if (is_imm8(RHS(ins, 0))) {
+               size = size_of(state, ins->type);
+               if (is_imm8(RHS(ins, 0)) && (size <= 1)) {
                        ins->template_id = TEMPLATE_COPY_IMM8;
                }
-               else if (is_imm16(RHS(ins, 0))) {
+               else if (is_imm16(RHS(ins, 0)) && (size <= 2)) {
                        ins->template_id = TEMPLATE_COPY_IMM16;
                }
-               else if (is_imm32(RHS(ins, 0))) {
+               else if (is_imm32(RHS(ins, 0)) && (size <= 4)) {
                        ins->template_id = TEMPLATE_COPY_IMM32;
                }
                else if (is_const(RHS(ins, 0))) {
                        internal_error(state, ins, "bad constant passed to copy");
                }
+               else if (size <= 1) {
+                       ins->template_id = TEMPLATE_COPY8_REG;
+               }
+               else if (size <= 2) {
+                       ins->template_id = TEMPLATE_COPY16_REG;
+               }
+               else if (size <= 4) {
+                       ins->template_id = TEMPLATE_COPY32_REG;
+               }
+               else {
+                       internal_error(state, ins, "bad type passed to copy");
+               }
                break;
        case OP_PHI:
-               ins->template_id = TEMPLATE_PHI;
+               size = size_of(state, ins->type);
+               if (size <= 1) {
+                       ins->template_id = TEMPLATE_PHI8;
+               }
+               else if (size <= 2) {
+                       ins->template_id = TEMPLATE_PHI16;
+               }
+               else if (size <= 4) {
+                       ins->template_id = TEMPLATE_PHI32;
+               }
+               else {
+                       internal_error(state, ins, "bad type passed to phi");
+               }
                break;
        case OP_STORE:
                switch(ins->type->type & TYPE_MASK) {
@@ -15853,23 +18225,16 @@ static struct triple *transform_to_arch_instruction(
        case OP_LOAD:
                switch(ins->type->type & TYPE_MASK) {
                case TYPE_CHAR:   case TYPE_UCHAR:
-                       ins->template_id = TEMPLATE_LOAD8;
-                       break;
-               case TYPE_SHORT:
-               case TYPE_USHORT:
-                       ins->template_id = TEMPLATE_LOAD16;
-                       break;
-               case TYPE_INT:
-               case TYPE_UINT:
-               case TYPE_LONG:
-               case TYPE_ULONG:
+               case TYPE_SHORT:  case TYPE_USHORT:
+               case TYPE_INT:    case TYPE_UINT:
+               case TYPE_LONG:   case TYPE_ULONG:
                case TYPE_POINTER:
-                       ins->template_id = TEMPLATE_LOAD32;
                        break;
                default:
                        internal_error(state, ins, "unknown type in load");
                        break;
                }
+               ins->template_id = TEMPLATE_LOAD32;
                break;
        case OP_ADD:
        case OP_SUB:
@@ -15877,22 +18242,45 @@ static struct triple *transform_to_arch_instruction(
        case OP_XOR:
        case OP_OR:
        case OP_SMUL:
-               ins->template_id = TEMPLATE_BINARY_REG;
+               ins->template_id = TEMPLATE_BINARY32_REG;
                if (get_imm32(ins, &RHS(ins, 1))) {
-                       ins->template_id = TEMPLATE_BINARY_IMM;
+                       ins->template_id = TEMPLATE_BINARY32_IMM;
                }
                break;
+       case OP_SDIVT:
+       case OP_UDIVT:
+               ins->template_id = TEMPLATE_DIV32;
+               next = after_lhs(state, ins);
+               break;
+               /* FIXME UMUL does not work yet.. */
+       case OP_UMUL:
+               ins->template_id = TEMPLATE_UMUL32;
+               break;
+       case OP_UDIV:
+               next = mod_div(state, ins, OP_UDIVT, 0);
+               break;
+       case OP_SDIV:
+               next = mod_div(state, ins, OP_SDIVT, 0);
+               break;
+       case OP_UMOD:
+               next = mod_div(state, ins, OP_UDIVT, 1);
+               break;
+       case OP_SMOD:
+               next = mod_div(state, ins, OP_SDIVT, 1);
+               break;
        case OP_SL:
        case OP_SSR:
        case OP_USR:
-               ins->template_id = TEMPLATE_SL_CL;
+               ins->template_id = TEMPLATE_SL32_CL;
                if (get_imm8(ins, &RHS(ins, 1))) {
-                       ins->template_id = TEMPLATE_SL_IMM;
+                       ins->template_id = TEMPLATE_SL32_IMM;
+               } else if (size_of(state, RHS(ins, 1)->type) > 1) {
+                       typed_pre_copy(state, &char_type, ins, 1);
                }
                break;
        case OP_INVERT:
        case OP_NEG:
-               ins->template_id = TEMPLATE_UNARY;
+               ins->template_id = TEMPLATE_UNARY32;
                break;
        case OP_EQ: 
                bool_cmp(state, ins, OP_CMP, OP_JMP_EQ, OP_SET_EQ); 
@@ -15931,12 +18319,19 @@ static struct triple *transform_to_arch_instruction(
                bool_cmp(state, ins, OP_TEST, OP_JMP_EQ, OP_SET_EQ);
                break;
        case OP_BRANCH:
-               if (TRIPLE_RHS(ins->sizes) > 0) {
-                       internal_error(state, ins, "bad branch test");
-               }
                ins->op = OP_JMP;
                ins->template_id = TEMPLATE_NOP;
                break;
+       case OP_CBRANCH:
+               fixup_branch(state, ins, OP_JMP_NOTEQ, OP_TEST, 
+                       RHS(ins, 0)->type, RHS(ins, 0), 0);
+               break;
+       case OP_CALL:
+               ins->template_id = TEMPLATE_NOP;
+               break;
+       case OP_RET:
+               ins->template_id = TEMPLATE_RET;
+               break;
        case OP_INB:
        case OP_INW:
        case OP_INL:
@@ -15981,14 +18376,17 @@ static struct triple *transform_to_arch_instruction(
                break;
                /* Already transformed instructions */
        case OP_TEST:
-               ins->template_id = TEMPLATE_TEST;
+               ins->template_id = TEMPLATE_TEST32;
                break;
        case OP_CMP:
-               ins->template_id = TEMPLATE_CMP_REG;
+               ins->template_id = TEMPLATE_CMP32_REG;
                if (get_imm32(ins, &RHS(ins, 1))) {
-                       ins->template_id = TEMPLATE_CMP_IMM;
+                       ins->template_id = TEMPLATE_CMP32_IMM;
                }
                break;
+       case OP_JMP:
+               ins->template_id = TEMPLATE_NOP;
+               break;
        case OP_JMP_EQ:      case OP_JMP_NOTEQ:
        case OP_JMP_SLESS:   case OP_JMP_ULESS:
        case OP_JMP_SMORE:   case OP_JMP_UMORE:
@@ -16013,18 +18411,21 @@ static struct triple *transform_to_arch_instruction(
        return next;
 }
 
+static long next_label(struct compile_state *state)
+{
+       static long label_counter = 0;
+       return ++label_counter;
+}
 static void generate_local_labels(struct compile_state *state)
 {
        struct triple *first, *label;
-       int label_counter;
-       label_counter = 0;
-       first = RHS(state->main_function, 0);
+       first = state->first;
        label = first;
        do {
                if ((label->op == OP_LABEL) || 
                        (label->op == OP_SDECL)) {
                        if (label->use) {
-                               label->u.cval = ++label_counter;
+                               label->u.cval = next_label(state);
                        } else {
                                label->u.cval = 0;
                        }
@@ -16053,6 +18454,9 @@ static int check_reg(struct compile_state *state,
 
 static const char *arch_reg_str(int reg)
 {
+#if REG_XMM7 != 44
+#error "Registers have renumberd fix arch_reg_str"
+#endif
        static const char *regs[] = {
                "%unset",
                "%unneeded",
@@ -16061,6 +18465,7 @@ static const char *arch_reg_str(int reg)
                "%ax", "%bx", "%cx", "%dx", "%si", "%di", "%bp", "%sp",
                "%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi", "%ebp", "%esp",
                "%edx:%eax",
+               "%dx:%ax",
                "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7",
                "%xmm0", "%xmm1", "%xmm2", "%xmm3", 
                "%xmm4", "%xmm5", "%xmm6", "%xmm7",
@@ -16101,13 +18506,21 @@ static void print_const_val(
        switch(ins->op) {
        case OP_INTCONST:
                fprintf(fp, " $%ld ", 
-                       (long_t)(ins->u.cval));
+                       (long)(ins->u.cval));
                break;
        case OP_ADDRCONST:
+               if ((MISC(ins, 0)->op != OP_SDECL) &&
+                       (MISC(ins, 0)->op != OP_LABEL))
+               {
+                       internal_error(state, ins, "bad base for addrconst");
+               }
+               if (MISC(ins, 0)->u.cval <= 0) {
+                       internal_error(state, ins, "unlabeled constant");
+               }
                fprintf(fp, " $L%s%lu+%lu ",
-                       state->label_prefix, 
-                       MISC(ins, 0)->u.cval,
-                       ins->u.cval);
+                       state->compiler->label_prefix, 
+                       (unsigned long)(MISC(ins, 0)->u.cval),
+                       (unsigned long)(ins->u.cval));
                break;
        default:
                internal_error(state, ins, "unknown constant type");
@@ -16115,12 +18528,86 @@ static void print_const_val(
        }
 }
 
+static void print_const(struct compile_state *state,
+       struct triple *ins, FILE *fp)
+{
+       switch(ins->op) {
+       case OP_INTCONST:
+               switch(ins->type->type & TYPE_MASK) {
+               case TYPE_CHAR:
+               case TYPE_UCHAR:
+                       fprintf(fp, ".byte 0x%02lx\n", 
+                               (unsigned long)(ins->u.cval));
+                       break;
+               case TYPE_SHORT:
+               case TYPE_USHORT:
+                       fprintf(fp, ".short 0x%04lx\n", 
+                               (unsigned long)(ins->u.cval));
+                       break;
+               case TYPE_INT:
+               case TYPE_UINT:
+               case TYPE_LONG:
+               case TYPE_ULONG:
+                       fprintf(fp, ".int %lu\n", 
+                               (unsigned long)(ins->u.cval));
+                       break;
+               default:
+                       internal_error(state, ins, "Unknown constant type");
+               }
+               break;
+       case OP_ADDRCONST:
+               if ((MISC(ins, 0)->op != OP_SDECL) &&
+                       (MISC(ins, 0)->op != OP_LABEL)) {
+                       internal_error(state, ins, "bad base for addrconst");
+               }
+               if (MISC(ins, 0)->u.cval <= 0) {
+                       internal_error(state, ins, "unlabeled constant");
+               }
+               fprintf(fp, ".int L%s%lu+%lu\n",
+                       state->compiler->label_prefix,
+                       (unsigned long)(MISC(ins, 0)->u.cval),
+                       (unsigned long)(ins->u.cval));
+               break;
+       case OP_BLOBCONST:
+       {
+               unsigned char *blob;
+               size_t size, i;
+               size = size_of(state, ins->type);
+               blob = ins->u.blob;
+               for(i = 0; i < size; i++) {
+                       fprintf(fp, ".byte 0x%02x\n",
+                               blob[i]);
+               }
+               break;
+       }
+       default:
+               internal_error(state, ins, "Unknown constant type");
+               break;
+       }
+}
+
+#define TEXT_SECTION ".rom.text"
+#define DATA_SECTION ".rom.data"
+
+static long get_const_pool_ref(
+       struct compile_state *state, struct triple *ins, FILE *fp)
+{
+       long ref;
+       ref = next_label(state);
+       fprintf(fp, ".section \"" DATA_SECTION "\"\n");
+       fprintf(fp, ".balign %d\n", align_of(state, ins->type));
+       fprintf(fp, "L%s%lu:\n", state->compiler->label_prefix, ref);
+       print_const(state, ins, fp);
+       fprintf(fp, ".section \"" TEXT_SECTION "\"\n");
+       return ref;
+}
+
 static void print_binary_op(struct compile_state *state,
        const char *op, struct triple *ins, FILE *fp) 
 {
        unsigned mask;
-       mask = REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8;
-       if (RHS(ins, 0)->id != ins->id) {
+       mask = REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8_LO;
+       if (ID_REG(RHS(ins, 0)->id) != ID_REG(ins->id)) {
                internal_error(state, ins, "invalid register assignment");
        }
        if (is_const(RHS(ins, 1))) {
@@ -16147,7 +18634,7 @@ static void print_unary_op(struct compile_state *state,
        const char *op, struct triple *ins, FILE *fp)
 {
        unsigned mask;
-       mask = REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8;
+       mask = REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8_LO;
        fprintf(fp, "\t%s %s\n",
                op,
                reg(state, RHS(ins, 0), mask));
@@ -16157,8 +18644,8 @@ static void print_op_shift(struct compile_state *state,
        const char *op, struct triple *ins, FILE *fp)
 {
        unsigned mask;
-       mask = REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8;
-       if (RHS(ins, 0)->id != ins->id) {
+       mask = REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8_LO;
+       if (ID_REG(RHS(ins, 0)->id) != ID_REG(ins->id)) {
                internal_error(state, ins, "invalid register assignment");
        }
        if (is_const(RHS(ins, 1))) {
@@ -16170,7 +18657,7 @@ static void print_op_shift(struct compile_state *state,
        else {
                fprintf(fp, "\t%s %s, %s\n",
                        op,
-                       reg(state, RHS(ins, 1), REGCM_GPR8),
+                       reg(state, RHS(ins, 1), REGCM_GPR8_LO),
                        reg(state, RHS(ins, 0), mask));
        }
 }
@@ -16182,7 +18669,7 @@ static void print_op_in(struct compile_state *state, struct triple *ins, FILE *f
        int dreg;
        mask = 0;
        switch(ins->op) {
-       case OP_INB: op = "inb", mask = REGCM_GPR8; break;
+       case OP_INB: op = "inb", mask = REGCM_GPR8_LO; break;
        case OP_INW: op = "inw", mask = REGCM_GPR16; break;
        case OP_INL: op = "inl", mask = REGCM_GPR32; break;
        default:
@@ -16220,7 +18707,7 @@ static void print_op_out(struct compile_state *state, struct triple *ins, FILE *
        int lreg;
        mask = 0;
        switch(ins->op) {
-       case OP_OUTB: op = "outb", mask = REGCM_GPR8; break;
+       case OP_OUTB: op = "outb", mask = REGCM_GPR8_LO; break;
        case OP_OUTW: op = "outw", mask = REGCM_GPR16; break;
        case OP_OUTL: op = "outl", mask = REGCM_GPR32; break;
        default:
@@ -16265,10 +18752,6 @@ static void print_op_move(struct compile_state *state,
                src = RHS(ins, 0);
                dst = ins;
        }
-       else if (ins->op == OP_WRITE) {
-               dst = LHS(ins, 0);
-               src = RHS(ins, 0);
-       }
        else {
                internal_error(state, ins, "unknown move operation");
                src = dst = 0;
@@ -16276,13 +18759,13 @@ static void print_op_move(struct compile_state *state,
        if (!is_const(src)) {
                int src_reg, dst_reg;
                int src_regcm, dst_regcm;
-               src_reg = ID_REG(src->id);
+               src_reg   = ID_REG(src->id);
                dst_reg   = ID_REG(dst->id);
                src_regcm = arch_reg_regcm(state, src_reg);
-               dst_regcm   = arch_reg_regcm(state, dst_reg);
+               dst_regcm = arch_reg_regcm(state, dst_reg);
                /* If the class is the same just move the register */
                if (src_regcm & dst_regcm & 
-                       (REGCM_GPR8 | REGCM_GPR16 | REGCM_GPR32)) {
+                       (REGCM_GPR8_LO | REGCM_GPR16 | REGCM_GPR32)) {
                        if ((src_reg != dst_reg) || !omit_copy) {
                                fprintf(fp, "\tmov %s, %s\n",
                                        reg(state, src, src_regcm),
@@ -16299,9 +18782,19 @@ static void print_op_move(struct compile_state *state,
                                        arch_reg_str(dst_reg));
                        }
                }
+               /* Move from 32bit gprs to 16bit gprs */
+               else if ((src_regcm & REGCM_GPR32) &&
+                       (dst_regcm & REGCM_GPR16)) {
+                       dst_reg = (dst_reg - REGC_GPR16_FIRST) + REGC_GPR32_FIRST;
+                       if ((src_reg != dst_reg) || !omit_copy) {
+                               fprintf(fp, "\tmov %s, %s\n",
+                                       arch_reg_str(src_reg),
+                                       arch_reg_str(dst_reg));
+                       }
+               }
                /* Move 32bit to 8bit */
                else if ((src_regcm & REGCM_GPR32_8) &&
-                       (dst_regcm & REGCM_GPR8))
+                       (dst_regcm & REGCM_GPR8_LO))
                {
                        src_reg = (src_reg - REGC_GPR32_8_FIRST) + REGC_GPR8_FIRST;
                        if ((src_reg != dst_reg) || !omit_copy) {
@@ -16312,7 +18805,7 @@ static void print_op_move(struct compile_state *state,
                }
                /* Move 16bit to 8bit */
                else if ((src_regcm & REGCM_GPR16_8) &&
-                       (dst_regcm & REGCM_GPR8))
+                       (dst_regcm & REGCM_GPR8_LO))
                {
                        src_reg = (src_reg - REGC_GPR16_8_FIRST) + REGC_GPR8_FIRST;
                        if ((src_reg != dst_reg) || !omit_copy) {
@@ -16322,7 +18815,7 @@ static void print_op_move(struct compile_state *state,
                        }
                }
                /* Move 8/16bit to 16/32bit */
-               else if ((src_regcm & (REGCM_GPR8 | REGCM_GPR16)) && 
+               else if ((src_regcm & (REGCM_GPR8_LO | REGCM_GPR16)) && 
                        (dst_regcm & (REGCM_GPR16 | REGCM_GPR32))) {
                        const char *op;
                        op = is_signed(src->type)? "movsx": "movzx";
@@ -16339,15 +18832,26 @@ static void print_op_move(struct compile_state *state,
                                        reg(state, dst, dst_regcm));
                        }
                }
-               /* Move between mmx registers or mmx & sse  registers */
-               else if ((src_regcm & (REGCM_MMX | REGCM_XMM)) &&
-                       (dst_regcm & (REGCM_MMX | REGCM_XMM))) {
+               /* Move between mmx registers */
+               else if ((src_regcm & dst_regcm & REGCM_MMX)) {
                        if ((src_reg != dst_reg) || !omit_copy) {
                                fprintf(fp, "\tmovq %s, %s\n",
                                        reg(state, src, src_regcm),
                                        reg(state, dst, dst_regcm));
                        }
                }
+               /* Move from sse to mmx registers */
+               else if ((src_regcm & REGCM_XMM) && (dst_regcm & REGCM_MMX)) {
+                       fprintf(fp, "\tmovdq2q %s, %s\n",
+                               reg(state, src, src_regcm),
+                               reg(state, dst, dst_regcm));
+               }
+               /* Move from mmx to sse registers */
+               else if ((src_regcm & REGCM_MMX) && (dst_regcm & REGCM_XMM)) {
+                       fprintf(fp, "\tmovq2dq %s, %s\n",
+                               reg(state, src, src_regcm),
+                               reg(state, dst, dst_regcm));
+               }
                /* Move between 32bit gprs & mmx/sse registers */
                else if ((src_regcm & (REGCM_GPR32 | REGCM_MMX | REGCM_XMM)) &&
                        (dst_regcm & (REGCM_GPR32 | REGCM_MMX | REGCM_XMM))) {
@@ -16355,9 +18859,71 @@ static void print_op_move(struct compile_state *state,
                                reg(state, src, src_regcm),
                                reg(state, dst, dst_regcm));
                }
+               /* Move from 16bit gprs &  mmx/sse registers */
+               else if ((src_regcm & REGCM_GPR16) &&
+                       (dst_regcm & (REGCM_MMX | REGCM_XMM))) {
+                       const char *op;
+                       int mid_reg;
+                       op = is_signed(src->type)? "movsx":"movzx";
+                       mid_reg = (src_reg - REGC_GPR16_FIRST) + REGC_GPR32_FIRST;
+                       fprintf(fp, "\t%s %s, %s\n\tmovd %s, %s\n",
+                               op,
+                               arch_reg_str(src_reg),
+                               arch_reg_str(mid_reg),
+                               arch_reg_str(mid_reg),
+                               arch_reg_str(dst_reg));
+               }
+               /* Move from mmx/sse registers to 16bit gprs */
+               else if ((src_regcm & (REGCM_MMX | REGCM_XMM)) &&
+                       (dst_regcm & REGCM_GPR16)) {
+                       dst_reg = (dst_reg - REGC_GPR16_FIRST) + REGC_GPR32_FIRST;
+                       fprintf(fp, "\tmovd %s, %s\n",
+                               arch_reg_str(src_reg),
+                               arch_reg_str(dst_reg));
+               }
+               /* Move from gpr to 64bit dividend */
+               else if ((src_regcm & (REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8_LO))  &&
+                       (dst_regcm & REGCM_DIVIDEND64)) {
+                       const char *extend;
+                       extend = is_signed(src->type)? "cltd":"movl $0, %edx";
+                       fprintf(fp, "\tmov %s, %%eax\n\t%s\n",
+                               arch_reg_str(src_reg), 
+                               extend);
+               }
+               /* Move from 64bit gpr to gpr */
+               else if ((src_regcm & REGCM_DIVIDEND64) &&
+                       (dst_regcm & (REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8_LO))) {
+                       if (dst_regcm & REGCM_GPR32) {
+                               src_reg = REG_EAX;
+                       } 
+                       else if (dst_regcm & REGCM_GPR16) {
+                               src_reg = REG_AX;
+                       }
+                       else if (dst_regcm & REGCM_GPR8_LO) {
+                               src_reg = REG_AL;
+                       }
+                       fprintf(fp, "\tmov %s, %s\n",
+                               arch_reg_str(src_reg),
+                               arch_reg_str(dst_reg));
+               }
+               /* Move from mmx/sse registers to 64bit gpr */
+               else if ((src_regcm & (REGCM_MMX | REGCM_XMM)) &&
+                       (dst_regcm & REGCM_DIVIDEND64)) {
+                       const char *extend;
+                       extend = is_signed(src->type)? "cltd": "movl $0, %edx";
+                       fprintf(fp, "\tmovd %s, %%eax\n\t%s\n",
+                               arch_reg_str(src_reg),
+                               extend);
+               }
+               /* Move from 64bit gpr to mmx/sse register */
+               else if ((src_regcm & REGCM_DIVIDEND64) &&
+                       (dst_regcm & (REGCM_XMM | REGCM_MMX))) {
+                       fprintf(fp, "\tmovd %%eax, %s\n",
+                               arch_reg_str(dst_reg));
+               }
 #if X86_4_8BIT_GPRS
                /* Move from 8bit gprs to  mmx/sse registers */
-               else if ((src_regcm & REGCM_GPR8) && (src_reg <= REG_DL) &&
+               else if ((src_regcm & REGCM_GPR8_LO) && (src_reg <= REG_DL) &&
                        (dst_regcm & (REGCM_MMX | REGCM_XMM))) {
                        const char *op;
                        int mid_reg;
@@ -16372,26 +18938,16 @@ static void print_op_move(struct compile_state *state,
                }
                /* Move from mmx/sse registers and 8bit gprs */
                else if ((src_regcm & (REGCM_MMX | REGCM_XMM)) &&
-                       (dst_regcm & REGCM_GPR8) && (dst_reg <= REG_DL)) {
+                       (dst_regcm & REGCM_GPR8_LO) && (dst_reg <= REG_DL)) {
                        int mid_reg;
                        mid_reg = (dst_reg - REGC_GPR8_FIRST) + REGC_GPR32_FIRST;
                        fprintf(fp, "\tmovd %s, %s\n",
                                reg(state, src, src_regcm),
                                arch_reg_str(mid_reg));
                }
-               /* Move from 32bit gprs to 16bit gprs */
-               else if ((src_regcm & REGCM_GPR32) &&
-                       (dst_regcm & REGCM_GPR16)) {
-                       dst_reg = (dst_reg - REGC_GPR16_FIRST) + REGC_GPR32_FIRST;
-                       if ((src_reg != dst_reg) || !omit_copy) {
-                               fprintf(fp, "\tmov %s, %s\n",
-                                       arch_reg_str(src_reg),
-                                       arch_reg_str(dst_reg));
-                       }
-               }
                /* Move from 32bit gprs to 8bit gprs */
                else if ((src_regcm & REGCM_GPR32) &&
-                       (dst_regcm & REGCM_GPR8)) {
+                       (dst_regcm & REGCM_GPR8_LO)) {
                        dst_reg = (dst_reg - REGC_GPR8_FIRST) + REGC_GPR32_FIRST;
                        if ((src_reg != dst_reg) || !omit_copy) {
                                fprintf(fp, "\tmov %s, %s\n",
@@ -16401,7 +18957,7 @@ static void print_op_move(struct compile_state *state,
                }
                /* Move from 16bit gprs to 8bit gprs */
                else if ((src_regcm & REGCM_GPR16) &&
-                       (dst_regcm & REGCM_GPR8)) {
+                       (dst_regcm & REGCM_GPR8_LO)) {
                        dst_reg = (dst_reg - REGC_GPR8_FIRST) + REGC_GPR16_FIRST;
                        if ((src_reg != dst_reg) || !omit_copy) {
                                fprintf(fp, "\tmov %s, %s\n",
@@ -16415,10 +18971,44 @@ static void print_op_move(struct compile_state *state,
                }
        }
        else {
-               fprintf(fp, "\tmov ");
-               print_const_val(state, src, fp);
-               fprintf(fp, ", %s\n",
-                       reg(state, dst, REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8));
+               int dst_reg;
+               int dst_regcm;
+               dst_reg = ID_REG(dst->id);
+               dst_regcm = arch_reg_regcm(state, dst_reg);
+               if (dst_regcm & (REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8_LO)) {
+                       fprintf(fp, "\tmov ");
+                       print_const_val(state, src, fp);
+                       fprintf(fp, ", %s\n",
+                               reg(state, dst, REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8_LO));
+               }
+               else if (dst_regcm & REGCM_DIVIDEND64) {
+                       if (size_of(state, dst->type) > 4) {
+                               internal_error(state, ins, "64bit constant...");
+                       }
+                       fprintf(fp, "\tmov $0, %%edx\n");
+                       fprintf(fp, "\tmov ");
+                       print_const_val(state, src, fp);
+                       fprintf(fp, ", %%eax\n");
+               }
+               else if (dst_regcm & REGCM_DIVIDEND32) {
+                       if (size_of(state, dst->type) > 2) {
+                               internal_error(state, ins, "32bit constant...");
+                       }
+                       fprintf(fp, "\tmov $0, %%dx\n");
+                       fprintf(fp, "\tmov ");
+                       print_const_val(state, src, fp);
+                       fprintf(fp, ", %%ax");
+               }
+               else if (dst_regcm & (REGCM_XMM | REGCM_MMX)) {
+                       long ref;
+                       ref = get_const_pool_ref(state, src, fp);
+                       fprintf(fp, "\tmovd L%s%lu, %s\n",
+                               state->compiler->label_prefix, ref,
+                               reg(state, dst, (REGCM_XMM | REGCM_MMX)));
+               }
+               else {
+                       internal_error(state, ins, "unknown copy immediate type");
+               }
        }
 }
 
@@ -16426,14 +19016,31 @@ static void print_op_load(struct compile_state *state,
        struct triple *ins, FILE *fp)
 {
        struct triple *dst, *src;
+       const char *op;
        dst = ins;
        src = RHS(ins, 0);
        if (is_const(src) || is_const(dst)) {
                internal_error(state, ins, "unknown load operation");
        }
-       fprintf(fp, "\tmov (%s), %s\n",
+       switch(ins->type->type & TYPE_MASK) {
+       case TYPE_CHAR:   op = "movsbl"; break;
+       case TYPE_UCHAR:  op = "movzbl"; break;
+       case TYPE_SHORT:  op = "movswl"; break;
+       case TYPE_USHORT: op = "movzwl"; break;
+       case TYPE_INT:    case TYPE_UINT:
+       case TYPE_LONG:   case TYPE_ULONG:
+       case TYPE_POINTER:
+               op = "movl"; 
+               break;
+       default:
+               internal_error(state, ins, "unknown type in load");
+               op = "<invalid opcode>";
+               break;
+       }
+       fprintf(fp, "\t%s (%s), %s\n",
+               op, 
                reg(state, src, REGCM_GPR32),
-               reg(state, dst, REGCM_GPR8 | REGCM_GPR16 | REGCM_GPR32));
+               reg(state, dst, REGCM_GPR32));
 }
 
 
@@ -16441,21 +19048,21 @@ static void print_op_store(struct compile_state *state,
        struct triple *ins, FILE *fp)
 {
        struct triple *dst, *src;
-       dst = LHS(ins, 0);
-       src = RHS(ins, 0);
+       dst = RHS(ins, 0);
+       src = RHS(ins, 1);
        if (is_const(src) && (src->op == OP_INTCONST)) {
                long_t value;
                value = (long_t)(src->u.cval);
                fprintf(fp, "\tmov%s $%ld, (%s)\n",
                        type_suffix(state, src->type),
-                       value,
+                       (long)(value),
                        reg(state, dst, REGCM_GPR32));
        }
        else if (is_const(dst) && (dst->op == OP_INTCONST)) {
                fprintf(fp, "\tmov%s %s, 0x%08lx\n",
                        type_suffix(state, src->type),
-                       reg(state, src, REGCM_GPR8 | REGCM_GPR16 | REGCM_GPR32),
-                       dst->u.cval);
+                       reg(state, src, REGCM_GPR8_LO | REGCM_GPR16 | REGCM_GPR32),
+                       (unsigned long)(dst->u.cval));
        }
        else {
                if (is_const(src) || is_const(dst)) {
@@ -16463,7 +19070,7 @@ static void print_op_store(struct compile_state *state,
                }
                fprintf(fp, "\tmov%s %s, (%s)\n",
                        type_suffix(state, src->type),
-                       reg(state, src, REGCM_GPR8 | REGCM_GPR16 | REGCM_GPR32),
+                       reg(state, src, REGCM_GPR8_LO | REGCM_GPR16 | REGCM_GPR32),
                        reg(state, dst, REGCM_GPR32));
        }
        
@@ -16490,7 +19097,7 @@ static void print_op_cmp(struct compile_state *state,
 {
        unsigned mask;
        int dreg;
-       mask = REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8;
+       mask = REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8_LO;
        dreg = check_reg(state, ins, REGCM_FLAGS);
        if (!reg_is_reg(state, dreg, REG_EFLAGS)) {
                internal_error(state, ins, "bad dest register for cmp");
@@ -16518,7 +19125,7 @@ static void print_op_test(struct compile_state *state,
        struct triple *ins, FILE *fp)
 {
        unsigned mask;
-       mask = REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8;
+       mask = REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8_LO;
        fprintf(fp, "\ttest %s, %s\n",
                reg(state, RHS(ins, 0), mask),
                reg(state, RHS(ins, 0), mask));
@@ -16528,7 +19135,7 @@ static void print_op_branch(struct compile_state *state,
        struct triple *branch, FILE *fp)
 {
        const char *bop = "j";
-       if (branch->op == OP_JMP) {
+       if ((branch->op == OP_JMP) || (branch->op == OP_CALL)) {
                if (TRIPLE_RHS(branch->sizes) != 0) {
                        internal_error(state, branch, "jmp with condition?");
                }
@@ -16570,8 +19177,15 @@ static void print_op_branch(struct compile_state *state,
        }
        fprintf(fp, "\t%s L%s%lu\n",
                bop, 
-               state->label_prefix,
-               TARG(branch, 0)->u.cval);
+               state->compiler->label_prefix,
+               (unsigned long)(TARG(branch, 0)->u.cval));
+}
+
+static void print_op_ret(struct compile_state *state,
+       struct triple *branch, FILE *fp)
+{
+       fprintf(fp, "\tjmp *%s\n",
+               reg(state, RHS(branch, 0), REGCM_GPR32));
 }
 
 static void print_op_set(struct compile_state *state,
@@ -16605,7 +19219,7 @@ static void print_op_set(struct compile_state *state,
                break;
        }
        fprintf(fp, "\t%s %s\n",
-               sop, reg(state, set, REGCM_GPR8));
+               sop, reg(state, set, REGCM_GPR8_LO));
 }
 
 static void print_op_bit_scan(struct compile_state *state, 
@@ -16631,57 +19245,14 @@ static void print_op_bit_scan(struct compile_state *state,
                reg(state, ins, REGCM_GPR32));
 }
 
-static void print_const(struct compile_state *state,
-       struct triple *ins, FILE *fp)
-{
-       switch(ins->op) {
-       case OP_INTCONST:
-               switch(ins->type->type & TYPE_MASK) {
-               case TYPE_CHAR:
-               case TYPE_UCHAR:
-                       fprintf(fp, ".byte 0x%02lx\n", ins->u.cval);
-                       break;
-               case TYPE_SHORT:
-               case TYPE_USHORT:
-                       fprintf(fp, ".short 0x%04lx\n", ins->u.cval);
-                       break;
-               case TYPE_INT:
-               case TYPE_UINT:
-               case TYPE_LONG:
-               case TYPE_ULONG:
-                       fprintf(fp, ".int %lu\n", ins->u.cval);
-                       break;
-               default:
-                       internal_error(state, ins, "Unknown constant type");
-               }
-               break;
-       case OP_BLOBCONST:
-       {
-               unsigned char *blob;
-               size_t size, i;
-               size = size_of(state, ins->type);
-               blob = ins->u.blob;
-               for(i = 0; i < size; i++) {
-                       fprintf(fp, ".byte 0x%02x\n",
-                               blob[i]);
-               }
-               break;
-       }
-       default:
-               internal_error(state, ins, "Unknown constant type");
-               break;
-       }
-}
-
-#define TEXT_SECTION ".rom.text"
-#define DATA_SECTION ".rom.data"
 
 static void print_sdecl(struct compile_state *state,
        struct triple *ins, FILE *fp)
 {
        fprintf(fp, ".section \"" DATA_SECTION "\"\n");
        fprintf(fp, ".balign %d\n", align_of(state, ins->type));
-       fprintf(fp, "L%s%lu:\n", state->label_prefix, ins->u.cval);
+       fprintf(fp, "L%s%lu:\n", 
+               state->compiler->label_prefix, (unsigned long)(ins->u.cval));
        print_const(state, MISC(ins, 0), fp);
        fprintf(fp, ".section \"" TEXT_SECTION "\"\n");
                
@@ -16718,7 +19289,6 @@ static void print_instruction(struct compile_state *state,
        case OP_SDECL:
                print_sdecl(state, ins, fp);
                break;
-       case OP_WRITE: 
        case OP_COPY:   
                print_op_move(state, ins, fp);
                break;
@@ -16739,8 +19309,12 @@ static void print_instruction(struct compile_state *state,
        case OP_JMP_SMORE:   case OP_JMP_UMORE:
        case OP_JMP_SLESSEQ: case OP_JMP_ULESSEQ:
        case OP_JMP_SMOREEQ: case OP_JMP_UMOREEQ:
+       case OP_CALL:
                print_op_branch(state, ins, fp);
                break;
+       case OP_RET:
+               print_op_ret(state, ins, fp);
+               break;
        case OP_SET_EQ:      case OP_SET_NOTEQ:
        case OP_SET_SLESS:   case OP_SET_ULESS:
        case OP_SET_SMORE:   case OP_SET_UMORE:
@@ -16768,20 +19342,28 @@ static void print_instruction(struct compile_state *state,
        case OP_HLT:
                fprintf(fp, "\thlt\n");
                break;
+       case OP_SDIVT:
+               fprintf(fp, "\tidiv %s\n", reg(state, RHS(ins, 1), REGCM_GPR32));
+               break;
+       case OP_UDIVT:
+               fprintf(fp, "\tdiv %s\n", reg(state, RHS(ins, 1), REGCM_GPR32));
+               break;
+       case OP_UMUL:
+               fprintf(fp, "\tmul %s\n", reg(state, RHS(ins, 1), REGCM_GPR32));
+               break;
        case OP_LABEL:
                if (!ins->use) {
                        return;
                }
-               fprintf(fp, "L%s%lu:\n", state->label_prefix, ins->u.cval);
+               fprintf(fp, "L%s%lu:\n", 
+                       state->compiler->label_prefix, (unsigned long)(ins->u.cval));
                break;
                /* Ignore OP_PIECE */
        case OP_PIECE:
                break;
-               /* Operations I am not yet certain how to handle */
-       case OP_UMUL:
+               /* Operations that should never get here */
        case OP_SDIV: case OP_UDIV:
        case OP_SMOD: case OP_UMOD:
-               /* Operations that should never get here */
        case OP_LTRUE:   case OP_LFALSE:  case OP_EQ:      case OP_NOTEQ:
        case OP_SLESS:   case OP_ULESS:   case OP_SMORE:   case OP_UMORE:
        case OP_SLESSEQ: case OP_ULESSEQ: case OP_SMOREEQ: case OP_UMOREEQ:
@@ -16798,11 +19380,13 @@ static void print_instructions(struct compile_state *state)
        int print_location;
        struct occurance *last_occurance;
        FILE *fp;
+       int max_inline_depth;
+       max_inline_depth = 0;
        print_location = 1;
        last_occurance = 0;
        fp = state->output;
        fprintf(fp, ".section \"" TEXT_SECTION "\"\n");
-       first = RHS(state->main_function, 0);
+       first = state->first;
        ins = first;
        do {
                if (print_location && 
@@ -16816,8 +19400,11 @@ static void print_instructions(struct compile_state *state)
                        }
                        else {
                                struct occurance *ptr;
+                               int inline_depth;
                                fprintf(fp, "\t/*\n");
+                               inline_depth = 0;
                                for(ptr = ins->occurance; ptr; ptr = ptr->parent) {
+                                       inline_depth++;
                                        fprintf(fp, "\t * %s,%s:%d.%d\n",
                                                ptr->function,
                                                ptr->filename,
@@ -16825,7 +19412,9 @@ static void print_instructions(struct compile_state *state)
                                                ptr->col);
                                }
                                fprintf(fp, "\t */\n");
-                               
+                               if (inline_depth > max_inline_depth) {
+                                       max_inline_depth = inline_depth;
+                               }
                        }
                        if (last_occurance) {
                                put_occurance(last_occurance);
@@ -16837,8 +19426,12 @@ static void print_instructions(struct compile_state *state)
                print_instruction(state, ins, fp);
                ins = ins->next;
        } while(ins != first);
-       
+       if (print_location) {
+               fprintf(fp, "/* max inline depth %d */\n",
+                       max_inline_depth);
+       }
 }
+
 static void generate_code(struct compile_state *state)
 {
        generate_local_labels(state);
@@ -16865,30 +19458,26 @@ static void print_tokens(struct compile_state *state)
        } while(tk->tok != TOK_EOF);
 }
 
-static void compile(const char *filename, const char *ofilename, 
-       int cpu, int debug, int opt, const char *label_prefix)
+static void compile(const char *filename, 
+       struct compiler_state *compiler, struct arch_state *arch)
 {
        int i;
        struct compile_state state;
+       struct triple *ptr;
        memset(&state, 0, sizeof(state));
+       state.compiler = compiler;
+       state.arch     = arch;
        state.file = 0;
        for(i = 0; i < sizeof(state.token)/sizeof(state.token[0]); i++) {
                memset(&state.token[i], 0, sizeof(state.token[i]));
                state.token[i].tok = -1;
        }
-       /* Remember the debug settings */
-       state.cpu      = cpu;
-       state.debug    = debug;
-       state.optimize = opt;
        /* Remember the output filename */
-       state.ofilename = ofilename;
-       state.output    = fopen(state.ofilename, "w");
+       state.output    = fopen(state.compiler->ofilename, "w");
        if (!state.output) {
                error(&state, 0, "Cannot open output file %s\n",
-                       ofilename);
+                       state.compiler->ofilename);
        }
-       /* Remember the label prefix */
-       state.label_prefix = label_prefix;
        /* Prep the preprocessor */
        state.if_depth = 0;
        state.if_value = 0;
@@ -16897,8 +19486,28 @@ static void compile(const char *filename, const char *ofilename,
        /* register the keywords the macro preprocessor knows */
        register_macro_keywords(&state);
        /* Memorize where some special keywords are. */
+       state.i_switch   = lookup(&state, "switch", 6);
+       state.i_case     = lookup(&state, "case", 4);
        state.i_continue = lookup(&state, "continue", 8);
        state.i_break    = lookup(&state, "break", 5);
+       state.i_default  = lookup(&state, "default", 7);
+       state.i_return   = lookup(&state, "return", 6);
+
+       /* Allocate beginning bounding labels for the function list */
+       state.first = label(&state);
+       state.first->id |= TRIPLE_FLAG_VOLATILE;
+       use_triple(state.first, state.first);
+       ptr = label(&state);
+       ptr->id |= TRIPLE_FLAG_VOLATILE;
+       use_triple(ptr, ptr);
+       flatten(&state, state.first, ptr);
+
+       /* Allocate a label for the pool of global variables */
+       state.global_pool = label(&state);
+       state.global_pool->id |= TRIPLE_FLAG_VOLATILE;
+       flatten(&state, state.first, state.global_pool);
+
+
        /* Enter the globl definition scope */
        start_scope(&state);
        register_builtins(&state);
@@ -16907,16 +19516,20 @@ static void compile(const char *filename, const char *ofilename,
        print_tokens(&state);
 #endif 
        decls(&state);
+
        /* Exit the global definition scope */
        end_scope(&state);
 
+       /* Join all of the functions into one giant function */
+       join_functions(&state);
+
        /* Now that basic compilation has happened 
         * optimize the intermediate code 
         */
        optimize(&state);
 
        generate_code(&state);
-       if (state.debug) {
+       if (state.compiler->debug) {
                fprintf(stderr, "done\n");
        }
 }
@@ -16948,61 +19561,58 @@ static void arg_error(char *fmt, ...)
 int main(int argc, char **argv)
 {
        const char *filename;
-       const char *ofilename;
-       const char *label_prefix;
-       int cpu;
-       int last_argc;
-       int debug;
-       int optimize;
-       cpu = CPU_DEFAULT;
-       label_prefix = "";
-       ofilename = "auto.inc";
-       optimize = 0;
-       debug = 0;
-       last_argc = -1;
-       while((argc > 1) && (argc != last_argc)) {
-               last_argc = argc;
-               if (strncmp(argv[1], "--debug=", 8) == 0) {
-                       debug = atoi(argv[1] + 8);
-                       argv++;
-                       argc--;
-               }
-               else if (strncmp(argv[1], "--label-prefix=", 15) == 0) {
-                       label_prefix= argv[1] + 15;
-                       argv++;
-                       argc--;
-               }
-               else if ((strcmp(argv[1],"-O") == 0) ||
-                       (strcmp(argv[1], "-O1") == 0)) {
-                       optimize = 1;
-                       argv++;
-                       argc--;
+       struct compiler_state compiler;
+       struct arch_state arch;
+       int all_opts;
+       init_compiler_state(&compiler);
+       init_arch_state(&arch);
+       filename = 0;
+       all_opts = 0;
+       while(argc > 1) {
+               if (!all_opts && (strcmp(argv[1], "-o") == 0) && (argc > 2)) {
+                       compiler.ofilename = argv[2];
+                       argv += 2;
+                       argc -= 2;
                }
-               else if (strcmp(argv[1],"-O2") == 0) {
-                       optimize = 2;
+               else if (!all_opts && argv[1][0] == '-') {
+                       int result;
+                       result = -1;
+                       if (strcmp(argv[1], "--") == 0) {
+                               result = 0;
+                               all_opts = 1;
+                       }
+                       else if (strncmp(argv[1],"-O", 2) == 0) {
+                               result = compiler_encode_flag(&compiler, argv[1]);
+                       }
+                       else if (strncmp(argv[1], "--label-prefix=", 15) == 0) {
+                               result = compiler_encode_flag(&compiler, argv[1]+2);
+                       }
+                       else if (strncmp(argv[1], "-f", 2) == 0) {
+                               result = compiler_encode_flag(&compiler, argv[1]+2);
+                       }
+                       else if (strncmp(argv[1], "-m", 2) == 0) {
+                               result = arch_encode_flag(&arch, argv[1]+2);
+                       }
+                       if (result < 0) {
+                               arg_error("Invalid option specified: %s\n",
+                                       argv[1]);
+                       }
                        argv++;
                        argc--;
                }
-               else if ((strcmp(argv[1], "-o") == 0) && (argc > 2)) {
-                       ofilename = argv[2];
-                       argv += 2;
-                       argc -= 2;
-               }
-               else if (strncmp(argv[1], "-mcpu=", 6) == 0) {
-                       cpu = arch_encode_cpu(argv[1] + 6);
-                       if (cpu == BAD_CPU) {
-                               arg_error("Invalid cpu specified: %s\n",
-                                       argv[1] + 6);
+               else {
+                       if (filename) {
+                               arg_error("Only one filename may be specified\n");
                        }
+                       filename = argv[1];
                        argv++;
                        argc--;
                }
        }
-       if (argc != 2) {
-               arg_error("Wrong argument count %d\n", argc);
+       if (!filename) {
+               arg_error("No filename specified\n");
        }
-       filename = argv[1];
-       compile(filename, ofilename, cpu, debug, optimize, label_prefix);
+       compile(filename, &compiler, &arch);
 
        return 0;
 }