- Massive set of cleanups/fixes for romcc. Lots of corner cases now work
authorEric Biederman <ebiederm@xmission.com>
Tue, 1 Jul 2003 10:05:30 +0000 (10:05 +0000)
committerEric Biederman <ebiederm@xmission.com>
Tue, 1 Jul 2003 10:05:30 +0000 (10:05 +0000)
  properly.  And a few long standing bugs have been rooted out and removed.

git-svn-id: svn://svn.coreboot.org/coreboot/trunk@931 2b7e53f0-3cfb-0310-b3e9-8179ed1497e1

util/romcc/Makefile
util/romcc/romcc.c
util/romcc/tests/raminit_test5.c [new file with mode: 0644]
util/romcc/tests/simple_test48.c [new file with mode: 0644]
util/romcc/tests/simple_test49.c [new file with mode: 0644]
util/romcc/tests/simple_test50.c [new file with mode: 0644]
util/romcc/tests/simple_test51.c [new file with mode: 0644]
util/romcc/tests/simple_test52.c [new file with mode: 0644]
util/romcc/tests/simple_test53.c [new file with mode: 0644]
util/romcc/tests/simple_test54.c [new file with mode: 0644]

index c6b654eb4c8689f2ed39b0d0fdae3a0020e19eb9..58778218ed4e36848b2b2990cccbb045dcca1aaf 100644 (file)
@@ -1,5 +1,5 @@
-VERSION:=0.32
-RELEASE_DATE:=28 June 2003
+VERSION:=0.33
+RELEASE_DATE:=1 July 2003
 PACKAGE:=romcc
 
 
@@ -67,10 +67,18 @@ TESTS=\
        simple_test45.c \
        simple_test46.c \
        simple_test47.c \
+       simple_test48.c \
+       simple_test49.c \
+       simple_test50.c \
+       simple_test51.c \
+       simple_test52.c \
+       simple_test53.c \
+       simple_test54.c \
        raminit_test.c \
        raminit_test2.c \
        raminit_test3.c \
-       raminit_test4.c
+       raminit_test4.c \
+       raminit_test5.c
 
 FAIL_TESTS = \
        fail_test1.c
@@ -85,7 +93,7 @@ FAIL_OUT:=$(patsubst %.c, tests/%.out, $(FAIL_TESTS))
 
 
 $(TEST_ASM): %.S: %.c romcc
-       export ALLOC_CHECK_=2; ./romcc -mcpu=k8 -O -o $@ $< > $*.debug
+       export ALLOC_CHECK_=2; ./romcc -O -mcpu=k8 -o $@ $< > $*.debug
 
 $(FAIL_OUT): %.out: %.c romcc
        export ALLOC_CHECK_=2; if ./romcc -O -o $*.S $< > $*.debug 2> $@ ; then exit 1 ; else exit 0 ; fi
@@ -96,7 +104,7 @@ $(TEST_OBJ): %.o: %.S
 $(TEST_ELF): %.elf: %.o tests/ldscript.ld
        ld -T tests/ldscript.ld $< -o $@
 
-test: $(TEST_ELF)
+test: $(TEST_ELF) $(FAIL_OUT)
 
 echo:
        echo "TEST_SRCS=$(TEST_SRCS)"
index 6f3335fe493a28f28b073d69a30ff3ee187fd8f3..7d380d681a289f2dbc072f0c5a82f2ebbfe183f1 100644 (file)
 #define DEBUG_CONSISTENCY 2
 #define DEBUG_RANGE_CONFLICTS 0
 #define DEBUG_COALESCING 0
+#define DEBUG_SDP_BLOCKS 0
+#define DEBUG_TRIPLE_COLOR 0
 
 #warning "FIXME boundary cases with small types in larger registers"
 #warning "FIXME give clear error messages about unused variables"
+#warning "FIXME properly handle multi dimensional arrays"
+#warning "FIXME fix scc_transform"
 
 /*  Control flow graph of a loop without goto.
  * 
@@ -261,23 +265,25 @@ struct token {
 /* Operations on general purpose registers.
  */
 
-#define OP_SMUL       0
-#define OP_UMUL       1
-#define OP_SDIV       2
-#define OP_UDIV       3
-#define OP_SMOD       4
-#define OP_UMOD       5
-#define OP_ADD        6
-#define OP_SUB        7
-#define OP_SL         8
-#define OP_USR        9
-#define OP_SSR       10 
-#define OP_AND       11 
-#define OP_XOR       12
-#define OP_OR        13
-#define OP_POS       14 /* Dummy positive operator don't use it */
-#define OP_NEG       15
-#define OP_INVERT    16
+#define OP_SDIVT      0
+#define OP_UDIVT      1
+#define OP_SMUL       2
+#define OP_UMUL       3
+#define OP_SDIV       4
+#define OP_UDIV       5
+#define OP_SMOD       6
+#define OP_UMOD       7
+#define OP_ADD        8
+#define OP_SUB        9
+#define OP_SL        10
+#define OP_USR       11
+#define OP_SSR       12 
+#define OP_AND       13 
+#define OP_XOR       14
+#define OP_OR        15
+#define OP_POS       16 /* Dummy positive operator don't use it */
+#define OP_NEG       17
+#define OP_INVERT    18
                     
 #define OP_EQ        20
 #define OP_NOTEQ     21
@@ -295,6 +301,10 @@ struct token {
 
 #define OP_LOAD      32
 #define OP_STORE     33
+/* For OP_STORE ->type holds the type
+ * RHS(0) holds the destination address
+ * RHS(1) holds the value to store.
+ */
 
 #define OP_NOOP      34
 
@@ -318,8 +328,8 @@ struct token {
 
 #define OP_WRITE     60 
 /* OP_WRITE moves one pseudo register to another.
- * LHS(0) holds the destination pseudo register, which must be an OP_DECL.
- * RHS(0) holds the psuedo to move.
+ * RHS(0) holds the destination pseudo register, which must be an OP_DECL.
+ * RHS(1) holds the psuedo to move.
  */
 
 #define OP_READ      61
@@ -509,6 +519,8 @@ struct op_info {
        .targ = (TARG), \
         }
 static const struct op_info table_ops[] = {
+[OP_SDIVT      ] = OP( 2,  2, 0, 0, PURE | BLOCK , "sdivt"),
+[OP_UDIVT      ] = OP( 2,  2, 0, 0, PURE | BLOCK , "udivt"),
 [OP_SMUL       ] = OP( 0,  2, 0, 0, PURE | DEF | BLOCK , "smul"),
 [OP_UMUL       ] = OP( 0,  2, 0, 0, PURE | DEF | BLOCK , "umul"),
 [OP_SDIV       ] = OP( 0,  2, 0, 0, PURE | DEF | BLOCK , "sdiv"),
@@ -541,7 +553,7 @@ static const struct op_info table_ops[] = {
 [OP_LTRUE      ] = OP( 0,  1, 0, 0, PURE | DEF | BLOCK , "ltrue"),
 
 [OP_LOAD       ] = OP( 0,  1, 0, 0, IMPURE | DEF | BLOCK, "load"),
-[OP_STORE      ] = OP( 1,  1, 0, 0, IMPURE | BLOCK , "store"),
+[OP_STORE      ] = OP( 0,  2, 0, 0, IMPURE | BLOCK , "store"),
 
 [OP_NOOP       ] = OP( 0,  0, 0, 0, PURE | BLOCK, "noop"),
 
@@ -549,7 +561,7 @@ static const struct op_info table_ops[] = {
 [OP_BLOBCONST  ] = OP( 0,  0, 0, 0, PURE, "blobconst"),
 [OP_ADDRCONST  ] = OP( 0,  0, 1, 0, PURE | DEF, "addrconst"),
 
-[OP_WRITE      ] = OP( 1,  1, 0, 0, PURE | BLOCK, "write"),
+[OP_WRITE      ] = OP( 0,  2, 0, 0, PURE | BLOCK, "write"),
 [OP_READ       ] = OP( 0,  1, 0, 0, PURE | DEF | BLOCK, "read"),
 [OP_COPY       ] = OP( 0,  1, 0, 0, PURE | DEF | BLOCK, "copy"),
 [OP_PIECE      ] = OP( 0,  0, 1, 0, PURE | DEF, "piece"),
@@ -864,9 +876,9 @@ struct type {
 #define MAX_REG_EQUIVS     16
 #define REGISTER_BITS      16
 #define MAX_VIRT_REGISTERS (1<<REGISTER_BITS)
-#define TEMPLATE_BITS      6
+#define TEMPLATE_BITS      7
 #define MAX_TEMPLATES      (1<<TEMPLATE_BITS)
-#define MAX_REGC           12
+#define MAX_REGC           14
 #define REG_UNSET          0
 #define REG_UNNEEDED       1
 #define REG_VIRT0          (MAX_REGISTERS + 0)
@@ -961,7 +973,7 @@ static int get_col(struct file_state *file)
 static void loc(FILE *fp, struct compile_state *state, struct triple *triple)
 {
        int col;
-       if (triple) {
+       if (triple && triple->occurance) {
                struct occurance *spot;
                spot = triple->occurance;
                while(spot->parent) {
@@ -1557,7 +1569,7 @@ static struct triple *post_triple(struct compile_state *state,
        }
        /* If I have a left hand side skip over it */
        zlhs = TRIPLE_LHS(base->sizes);
-       if (zlhs && (base->op != OP_WRITE) && (base->op != OP_STORE)) {
+       if (zlhs) {
                base = LHS(base, zlhs - 1);
        }
 
@@ -1626,6 +1638,14 @@ static void display_triple(FILE *fp, struct triple *ins)
                        ptr->col);
        }
        fprintf(fp, "\n");
+#if 0
+       {
+               struct triple_set *user;
+               for(user = ptr->use; user; user = user->next) {
+                       fprintf(fp, "use: %p\n", user->member);
+               }
+       }
+#endif
        fflush(fp);
 }
 
@@ -1656,6 +1676,23 @@ static int triple_is_branch(struct compile_state *state, struct triple *ins)
        return is_branch;
 }
 
+static int triple_is_cond_branch(struct compile_state *state, struct triple *ins)
+{
+       /* A conditional branch has the condition argument as a single
+        * RHS parameter.
+        */
+       return triple_is_branch(state, ins) &&
+               (TRIPLE_RHS(ins->sizes) == 1);
+}
+
+static int triple_is_uncond_branch(struct compile_state *state, struct triple *ins)
+{
+       /* A unconditional branch has no RHS parameters.
+        */
+       return triple_is_branch(state, ins) &&
+               (TRIPLE_RHS(ins->sizes) == 0);
+}
+
 static int triple_is_def(struct compile_state *state, struct triple *ins)
 {
        /* This function is used to determine which triples need
@@ -4191,6 +4228,10 @@ static int equiv_types(struct type *left, struct type *right)
                return 0;
        }
        type = left->type & TYPE_MASK;
+       /* If the basic types match and it is a void type we are done */
+       if (type == TYPE_VOID) {
+               return 1;
+       }
        /* if the basic types match and it is an arithmetic type we are done */
        if (TYPE_ARITHMETIC(type)) {
                return 1;
@@ -5132,13 +5173,6 @@ static struct triple *flatten(
                        return ptr;
                }
                switch(ptr->op) {
-               case OP_WRITE:
-               case OP_STORE:
-                       RHS(ptr, 0) = flatten(state, first, RHS(ptr, 0));
-                       LHS(ptr, 0) = flatten(state, first, LHS(ptr, 0));
-                       use_triple(LHS(ptr, 0), ptr);
-                       use_triple(RHS(ptr, 0), ptr);
-                       break;
                case OP_COMMA:
                        RHS(ptr, 0) = flatten(state, first, RHS(ptr, 0));
                        ptr = RHS(ptr, 1);
@@ -5474,6 +5508,22 @@ static int is_one(struct triple *ins)
        return is_const(ins) && (ins->u.cval == 1);
 }
 
+static long_t bit_count(ulong_t value)
+{
+       int count;
+       int i;
+       count = 0;
+       for(i = (sizeof(ulong_t)*8) -1; i >= 0; i--) {
+               ulong_t mask;
+               mask = 1;
+               mask <<= i;
+               if (value & mask) {
+                       count++;
+               }
+       }
+       return count;
+       
+}
 static long_t bsr(ulong_t value)
 {
        int i;
@@ -5700,8 +5750,8 @@ static void flatten_structures(struct compile_state *state)
                                ulong_t i;
 
                                op = ins->op;
-                               src = RHS(ins, 0);
-                               dst = LHS(ins, 0);
+                               src = RHS(ins, 1);
+                               dst = RHS(ins, 0);
                                get_occurance(ins->occurance);
                                next = alloc_triple(state, OP_VAL_VEC, ins->type, -1, -1,
                                        ins->occurance);
@@ -5949,13 +5999,13 @@ static void simplify_add(struct compile_state *state, struct triple *ins)
                RHS(ins, 1) = tmp;
        }
        if (is_const(RHS(ins, 0)) && is_const(RHS(ins, 1))) {
-               if (!is_pointer(RHS(ins, 0))) {
+               if (RHS(ins, 0)->op == OP_INTCONST) {
                        ulong_t left, right;
                        left  = read_const(state, ins, &RHS(ins, 0));
                        right = read_const(state, ins, &RHS(ins, 1));
                        mkconst(state, ins, left + right);
                }
-               else /* op == OP_ADDRCONST */ {
+               else if (RHS(ins, 0)->op == OP_ADDRCONST) {
                        struct triple *sdecl;
                        ulong_t left, right;
                        sdecl = MISC(RHS(ins, 0), 0);
@@ -5963,6 +6013,9 @@ static void simplify_add(struct compile_state *state, struct triple *ins)
                        right = RHS(ins, 1)->u.cval;
                        mkaddr_const(state, ins, sdecl, left + right);
                }
+               else {
+                       internal_warning(state, ins, "Optimize me!");
+               }
        }
        else if (is_const(RHS(ins, 0)) && !is_const(RHS(ins, 1))) {
                struct triple *tmp;
@@ -5975,13 +6028,13 @@ static void simplify_add(struct compile_state *state, struct triple *ins)
 static void simplify_sub(struct compile_state *state, struct triple *ins)
 {
        if (is_const(RHS(ins, 0)) && is_const(RHS(ins, 1))) {
-               if (!is_pointer(RHS(ins, 0))) {
+               if (RHS(ins, 0)->op == OP_INTCONST) {
                        ulong_t left, right;
                        left  = read_const(state, ins, &RHS(ins, 0));
                        right = read_const(state, ins, &RHS(ins, 1));
                        mkconst(state, ins, left - right);
                }
-               else /* op == OP_ADDRCONST */ {
+               else if (RHS(ins, 0)->op == OP_ADDRCONST) {
                        struct triple *sdecl;
                        ulong_t left, right;
                        sdecl = MISC(RHS(ins, 0), 0);
@@ -5989,6 +6042,9 @@ static void simplify_sub(struct compile_state *state, struct triple *ins)
                        right = RHS(ins, 1)->u.cval;
                        mkaddr_const(state, ins, sdecl, left - right);
                }
+               else {
+                       internal_warning(state, ins, "Optimize me!");
+               }
        }
 }
 
@@ -6372,6 +6428,65 @@ static void simplify_branch(struct compile_state *state, struct triple *ins)
        }
 }
 
+int phi_present(struct block *block)
+{
+       struct triple *ptr;
+       if (!block) {
+               return 0;
+       }
+       ptr = block->first;
+       do {
+               if (ptr->op == OP_PHI) {
+                       return 1;
+               }
+               ptr = ptr->next;
+       } while(ptr != block->last);
+       return 0;
+}
+
+static void simplify_label(struct compile_state *state, struct triple *ins)
+{
+#warning "FIXME enable simplify_label"
+       struct triple *first, *last;
+       first = RHS(state->main_function, 0);
+       last = first->prev;
+       /* Ignore the first and last instructions */
+       if ((ins == first) || (ins == last)) {
+               return;
+       }
+       if (ins->use == 0) {
+               ins->op = OP_NOOP;
+       }
+       else if (ins->prev->op == OP_LABEL) {
+               struct block *block;
+               block = ins->prev->u.block;
+               /* In general it is not safe to merge one label that
+                * imediately follows another.  The problem is that the empty
+                * looking block may have phi functions that depend on it.
+                */
+               if (!block || 
+                       (!phi_present(block->left) && 
+                       !phi_present(block->right))) 
+               {
+                       struct triple_set *user, *next;
+                       ins->op = OP_NOOP;
+                       for(user = ins->use; user; user = next) {
+                               struct triple *use;
+                               next = user->next;
+                               use = user->member;
+                               if (TARG(use, 0) == ins) {
+                                       TARG(use, 0) = ins->prev;
+                                       unuse_triple(ins, use);
+                                       use_triple(ins->prev, use);
+                               }
+                       }
+                       if (ins->use) {
+                               internal_error(state, ins, "noop use != 0");
+                       }
+               }
+       }
+}
+
 static void simplify_phi(struct compile_state *state, struct triple *ins)
 {
        struct triple **expr;
@@ -6414,6 +6529,10 @@ static void simplify_bsr(struct compile_state *state, struct triple *ins)
 
 typedef void (*simplify_t)(struct compile_state *state, struct triple *ins);
 static const simplify_t table_simplify[] = {
+#if 1
+#define simplify_sdivt    simplify_noop
+#define simplify_udivt    simplify_noop
+#endif
 #if 0
 #define simplify_smul     simplify_noop
 #define simplify_umul    simplify_noop
@@ -6472,6 +6591,9 @@ static const simplify_t table_simplify[] = {
 #if 0
 #define simplify_branch          simplify_noop
 #endif
+#if 1
+#define simplify_label   simplify_noop
+#endif
 
 #if 0
 #define simplify_phi     simplify_noop
@@ -6482,6 +6604,8 @@ static const simplify_t table_simplify[] = {
 #define simplify_bsr      simplify_noop
 #endif
 
+[OP_SDIVT      ] = simplify_sdivt,
+[OP_UDIVT      ] = simplify_udivt,
 [OP_SMUL       ] = simplify_smul,
 [OP_UMUL       ] = simplify_umul,
 [OP_SDIV       ] = simplify_sdiv,
@@ -6533,7 +6657,7 @@ static const simplify_t table_simplify[] = {
 
 [OP_LIST       ] = simplify_noop,
 [OP_BRANCH     ] = simplify_branch,
-[OP_LABEL      ] = simplify_noop,
+[OP_LABEL      ] = simplify_label,
 [OP_ADECL      ] = simplify_noop,
 [OP_SDECL      ] = simplify_noop,
 [OP_PHI        ] = simplify_phi,
@@ -6581,7 +6705,7 @@ static void simplify_all(struct compile_state *state)
        do {
                simplify(state, ins);
                ins = ins->next;
-       } while(ins != first);
+       }while(ins != first);
 }
 
 /*
@@ -6758,8 +6882,32 @@ static struct type *register_builtin_type(struct compile_state *state,
 
 static void register_builtins(struct compile_state *state)
 {
+       struct type *div_type, *ldiv_type;
+       struct type *udiv_type, *uldiv_type;
        struct type *msr_type;
 
+       div_type = register_builtin_type(state, "__builtin_div_t",
+               partial_struct(state, "quot", &int_type,
+               partial_struct(state, "rem",  &int_type, 0)));
+       ldiv_type = register_builtin_type(state, "__builtin_ldiv_t",
+               partial_struct(state, "quot", &long_type,
+               partial_struct(state, "rem",  &long_type, 0)));
+       udiv_type = register_builtin_type(state, "__builtin_udiv_t",
+               partial_struct(state, "quot", &uint_type,
+               partial_struct(state, "rem",  &uint_type, 0)));
+       uldiv_type = register_builtin_type(state, "__builtin_uldiv_t",
+               partial_struct(state, "quot", &ulong_type,
+               partial_struct(state, "rem",  &ulong_type, 0)));
+
+       register_builtin_function(state, "__builtin_div",   OP_SDIVT, div_type,
+               &int_type, &int_type);
+       register_builtin_function(state, "__builtin_ldiv",  OP_SDIVT, ldiv_type,
+               &long_type, &long_type);
+       register_builtin_function(state, "__builtin_udiv",  OP_UDIVT, udiv_type,
+               &uint_type, &uint_type);
+       register_builtin_function(state, "__builtin_uldiv", OP_UDIVT, uldiv_type,
+               &ulong_type, &ulong_type);
+
        register_builtin_function(state, "__builtin_inb", OP_INB, &uchar_type, 
                &ushort_type);
        register_builtin_function(state, "__builtin_inw", OP_INW, &ushort_type,
@@ -8404,24 +8552,23 @@ static struct type *enum_specifier(
        return type;
 }
 
-#if 0
 static struct type *struct_declarator(
        struct compile_state *state, struct type *type, struct hash_entry **ident)
 {
        int tok;
-#warning "struct_declarator is complicated because of bitfields, kill them?"
        tok = peek(state);
        if (tok != TOK_COLON) {
                type = declarator(state, type, ident, 1);
        }
        if ((tok == TOK_COLON) || (peek(state) == TOK_COLON)) {
+               struct triple *value;
                eat(state, TOK_COLON);
-               constant_expr(state);
+               value = constant_expr(state);
+#warning "FIXME implement bitfields to reduce register usage"
+               error(state, 0, "bitfields not yet implemented");
        }
-       FINISHME();
        return type;
 }
-#endif
 
 static struct type *struct_or_union_specifier(
        struct compile_state *state, unsigned int spec)
@@ -8466,7 +8613,7 @@ static struct type *struct_or_union_specifier(
                                struct type *type;
                                struct hash_entry *fident;
                                done = 1;
-                               type = declarator(state, base_type, &fident, 1);
+                               type = struct_declarator(state, base_type, &fident);
                                elements++;
                                if (peek(state) == TOK_COMMA) {
                                        done = 0;
@@ -9363,14 +9510,6 @@ static int do_print_triple(struct compile_state *state, struct triple *ins, int
        if ((ins->op == OP_BRANCH) && ins->use) {
                internal_error(state, ins, "branch used?");
        }
-#if 0
-       {
-               struct triple_set *user;
-               for(user = ins->use; user; user = user->next) {
-                       printf("use: %p\n", user->member);
-               }
-       }
-#endif
        if (triple_is_branch(state, ins)) {
                printf("\n");
        }
@@ -9497,13 +9636,16 @@ static void walk_blocks(struct compile_state *state,
        ptr = first;
        do {
                struct block *block;
-               if (ptr->op == OP_LABEL) {
+               if (triple_stores_block(state, ptr)) {
                        block = ptr->u.block;
                        if (block && (block != last_block)) {
                                cb(state, block, arg);
                        }
                        last_block = block;
                }
+               if (block && (block->last == ptr)) {
+                       block = 0;
+               }
                ptr = ptr->next;
        } while(ptr != first);
 }
@@ -9511,10 +9653,11 @@ static void walk_blocks(struct compile_state *state,
 static void print_block(
        struct compile_state *state, struct block *block, void *arg)
 {
+       struct block_set *user;
        struct triple *ptr;
        FILE *fp = arg;
 
-       fprintf(fp, "\nblock: %p (%d), %p<-%p %p<-%p\n", 
+       fprintf(fp, "\nblock: %p (%d)  %p<-%p %p<-%p\n", 
                block, 
                block->vertex,
                block->left, 
@@ -9525,51 +9668,17 @@ static void print_block(
                fprintf(fp, "%p:\n", block->first);
        }
        for(ptr = block->first; ; ptr = ptr->next) {
-               struct triple_set *user;
-               int op = ptr->op;
-               
-               if (triple_stores_block(state, ptr)) {
-                       if (ptr->u.block != block) {
-                               internal_error(state, ptr, 
-                                       "Wrong block pointer: %p\n",
-                                       ptr->u.block);
-                       }
-               }
-               if (op == OP_ADECL) {
-                       for(user = ptr->use; user; user = user->next) {
-                               if (!user->member->u.block) {
-                                       internal_error(state, user->member, 
-                                               "Use %p not in a block?\n",
-                                               user->member);
-                               }
-                       }
-               }
                display_triple(fp, ptr);
-
-#if 0
-               for(user = ptr->use; user; user = user->next) {
-                       fprintf(fp, "use: %p\n", user->member);
-               }
-#endif
-
-               /* Sanity checks... */
-               valid_ins(state, ptr);
-               for(user = ptr->use; user; user = user->next) {
-                       struct triple *use;
-                       use = user->member;
-                       valid_ins(state, use);
-                       if (triple_stores_block(state, user->member) &&
-                               !user->member->u.block) {
-                               internal_error(state, user->member,
-                                       "Use %p not in a block?",
-                                       user->member);
-                       }
-               }
-
                if (ptr == block->last)
                        break;
        }
-       fprintf(fp,"\n");
+       fprintf(fp, "users %d: ", block->users);
+       for(user = block->use; user; user = user->next) {
+               fprintf(fp, "%p (%d) ", 
+                       user->member,
+                       user->member->vertex);
+       }
+       fprintf(fp,"\n\n");
 }
 
 
@@ -9595,6 +9704,9 @@ static void prune_nonblock_triples(struct compile_state *state)
                if (!block) {
                        release_triple(state, ins);
                }
+               if (block && block->last == ins) {
+                       block = 0;
+               }
                ins = next;
        } while(ins != first);
 }
@@ -9615,10 +9727,6 @@ static void setup_basic_blocks(struct compile_state *state)
        if (!state->last_block) {
                internal_error(state, 0, "end not used?");
        }
-       /* Insert an extra unused edge from start to the end 
-        * This helps with reverse control flow calculations.
-        */
-       use_block(state->first_block, state->last_block);
        /* If we are debugging print what I have just done */
        if (state->debug & DEBUG_BASIC_BLOCKS) {
                print_blocks(state, stdout);
@@ -9770,7 +9878,8 @@ static int initialize_sdblock(struct sdom_block *sd,
        return vertex;
 }
 
-static int initialize_sdpblock(struct sdom_block *sd,
+static int initialize_sdpblock(
+       struct compile_state *state, struct sdom_block *sd,
        struct block *parent, struct block *block, int vertex)
 {
        struct block_set *user;
@@ -9787,7 +9896,38 @@ static int initialize_sdpblock(struct sdom_block *sd,
        sd[vertex].ancestor = 0;
        sd[vertex].vertex   = vertex;
        for(user = block->use; user; user = user->next) {
-               vertex = initialize_sdpblock(sd, block, user->member, vertex);
+               vertex = initialize_sdpblock(state, sd, block, user->member, vertex);
+       }
+       return vertex;
+}
+
+static int setup_sdpblocks(struct compile_state *state, struct sdom_block *sd)
+{
+       struct block *block;
+       int vertex;
+       /* Setup as many sdpblocks as possible without using fake edges */
+       vertex = initialize_sdpblock(state, sd, 0, state->last_block, 0);
+
+       /* Walk through the graph and find unconnected blocks.  If 
+        * we can, add a fake edge from the unconnected blocks to the
+        * end of the graph.
+        */
+       block = state->first_block->last->next->u.block;
+       for(; block && block != state->first_block; block =  block->last->next->u.block) {
+               if (sd[block->vertex].block == block) {
+                       continue;
+               }
+               if (block->left != 0) {
+                       continue;
+               }
+
+#if DEBUG_SDP_BLOCKS
+               fprintf(stderr, "Adding %d\n", vertex +1);
+#endif
+
+               block->left = state->last_block;
+               use_block(block->left, block);
+               vertex = initialize_sdpblock(state, sd, state->last_block, block, vertex);
        }
        return vertex;
 }
@@ -10027,10 +10167,15 @@ static void find_immediate_dominators(struct compile_state *state)
 static void find_post_dominators(struct compile_state *state)
 {
        struct sdom_block *sd;
+       int vertex;
        /* Step 1 initialize the basic block information */
        sd = xcmalloc(sizeof(*sd) * (state->last_vertex + 1), "sdom_state");
 
-       initialize_sdpblock(sd, 0, state->last_block, 0);
+       vertex = setup_sdpblocks(state, sd);
+       if (vertex != state->last_vertex) {
+               internal_error(state, 0, "missing %d blocks\n",
+                       state->last_vertex - vertex);
+       }
 
        /* Step 2 compute the semidominators */
        /* Step 3 implicitly define the immediate dominator of each vertex */
@@ -10440,8 +10585,8 @@ static void rename_block_variables(
                /* LHS(A) */
                if (ptr->op == OP_WRITE) {
                        struct triple *var, *val, *tval;
-                       var = LHS(ptr, 0);
-                       tval = val = RHS(ptr, 0);
+                       var = RHS(ptr, 0);
+                       tval = val = RHS(ptr, 1);
                        if ((val->op == OP_WRITE) || (val->op == OP_READ)) {
                                internal_error(state, val, "bad value in write");
                        }
@@ -10456,7 +10601,7 @@ static void rename_block_variables(
                                        use_triple(val, tval);
                                }
                                unuse_triple(val, ptr);
-                               RHS(ptr, 0) = tval;
+                               RHS(ptr, 1) = tval;
                                use_triple(tval, ptr);
                        }
                        propogate_use(state, ptr, tval);
@@ -10491,9 +10636,9 @@ static void rename_block_variables(
                }
                if (ptr->op == OP_WRITE) {
                        struct triple *var;
-                       var = LHS(ptr, 0);
+                       var = RHS(ptr, 0);
                        /* Pop OP_WRITE ptr->right from the stack of variable uses */
-                       pop_triple(var, RHS(ptr, 0));
+                       pop_triple(var, RHS(ptr, 1));
                        release_triple(state, ptr);
                        continue;
                }
@@ -10645,6 +10790,7 @@ static void transform_from_ssa_form(struct compile_state *state)
                        unuse_triple(phi, use->member);
                }
 
+#warning "CHECK_ME does the OP_ADECL need to be placed somewhere that dominates all of the incoming phi edges?"
                /* A variable to replace the phi function */
                var = post_triple(state, phi, OP_ADECL, phi->type, 0,0);
                /* A read of the single value that is set into the variable */
@@ -10659,7 +10805,7 @@ static void transform_from_ssa_form(struct compile_state *state)
                for(edge = 0, set = block->use; set; set = set->next, edge++) {
                        struct block *eblock;
                        struct triple *move;
-                       struct triple *val;
+                       struct triple *val, *base;
                        eblock = set->member;
                        val = slot[edge];
                        slot[edge] = 0;
@@ -10671,20 +10817,21 @@ static void transform_from_ssa_form(struct compile_state *state)
                                continue;
                        }
                        
-                       move = post_triple(state, 
-                               val, OP_WRITE, phi->type, var, val);
+                       /* Make certain the write is placed in the edge block... */
+                       base = eblock->first;
+                       if (block_of_triple(state, val) == eblock) {
+                               base = val;
+                       }
+                       move = post_triple(state, base, OP_WRITE, phi->type, var, val);
                        use_triple(val, move);
                        use_triple(var, move);
                }               
                /* See if there are any writers of var */
                used = 0;
                for(use = var->use; use; use = use->next) {
-                       struct triple **expr;
-                       expr = triple_lhs(state, use->member, 0);
-                       for(; expr; expr = triple_lhs(state, use->member, expr)) {
-                               if (*expr == var) {
-                                       used = 1;
-                               }
+                       if ((use->member->op == OP_WRITE) &&
+                               (RHS(use->member, 0) == var)) {
+                               used = 1;
                        }
                }
                /* If var is not used free it */
@@ -10792,7 +10939,7 @@ static struct reg_info find_lhs_post_color(
        struct triple_set *set;
        struct reg_info info;
        struct triple *lhs;
-#if 0
+#if DEBUG_TRIPLE_COLOR
        fprintf(stderr, "find_lhs_post_color(%p, %d)\n",
                ins, index);
 #endif
@@ -10836,7 +10983,7 @@ static struct reg_info find_lhs_post_color(
                        info.regcm &= rinfo.regcm;
                }
        }
-#if 0
+#if DEBUG_TRIPLE_COLOR
        fprintf(stderr, "find_lhs_post_color(%p, %d) -> ( %d, %x)\n",
                ins, index, info.reg, info.regcm);
 #endif
@@ -10848,7 +10995,7 @@ static struct reg_info find_rhs_post_color(
 {
        struct reg_info info, rinfo;
        int zlhs, i;
-#if 0
+#if DEBUG_TRIPLE_COLOR
        fprintf(stderr, "find_rhs_post_color(%p, %d)\n",
                ins, index);
 #endif
@@ -10871,7 +11018,7 @@ static struct reg_info find_rhs_post_color(
                        if (tinfo.reg >= MAX_REGISTERS) {
                                tinfo.reg = REG_UNSET;
                        }
-                       info.regcm &= linfo.reg;
+                       info.regcm &= linfo.regcm;
                        info.regcm &= tinfo.regcm;
                        if (info.reg != REG_UNSET) {
                                internal_error(state, ins, "register conflict");
@@ -10882,7 +11029,7 @@ static struct reg_info find_rhs_post_color(
                        info.reg = tinfo.reg;
                }
        }
-#if 0
+#if DEBUG_TRIPLE_COLOR
        fprintf(stderr, "find_rhs_post_color(%p, %d) -> ( %d, %x)\n",
                ins, index, info.reg, info.regcm);
 #endif
@@ -10893,7 +11040,7 @@ static struct reg_info find_lhs_color(
        struct compile_state *state, struct triple *ins, int index)
 {
        struct reg_info pre, post, info;
-#if 0
+#if DEBUG_TRIPLE_COLOR
        fprintf(stderr, "find_lhs_color(%p, %d)\n",
                ins, index);
 #endif
@@ -10909,9 +11056,10 @@ static struct reg_info find_lhs_color(
        if (info.reg == REG_UNSET) {
                info.reg = post.reg;
        }
-#if 0
-       fprintf(stderr, "find_lhs_color(%p, %d) -> ( %d, %x)\n",
-               ins, index, info.reg, info.regcm);
+#if DEBUG_TRIPLE_COLOR
+       fprintf(stderr, "find_lhs_color(%p, %d) -> ( %d, %x) ... (%d, %x) (%d, %x)\n",
+               ins, index, info.reg, info.regcm,
+               pre.reg, pre.regcm, post.reg, post.regcm);
 #endif
        return info;
 }
@@ -11503,6 +11651,12 @@ static void eliminate_inefectual_code(struct compile_state *state)
                if (!triple_is_pure(state, ins) || triple_is_branch(state, ins)) {
                        awaken(state, dtriple, &ins, &work_list_tail);
                }
+#if 1
+               /* Unconditionally keep the very last instruction */
+               else if (ins->next == first) {
+                       awaken(state, dtriple, &ins, &work_list_tail);
+               }
+#endif
                i++;
                ins = ins->next;
        } while(ins != first);
@@ -11829,7 +11983,6 @@ static void print_interference_block(
                fprintf(fp, "%p:\n", block->first);
        }
        for(done = 0, ptr = block->first; !done; ptr = ptr->next) {
-               struct triple_set *user;
                struct live_range *lr;
                unsigned id;
                int op;
@@ -11837,23 +11990,6 @@ static void print_interference_block(
                done = (ptr == block->last);
                lr = rstate->lrd[ptr->id].lr;
                
-               if (triple_stores_block(state, ptr)) {
-                       if (ptr->u.block != block) {
-                               internal_error(state, ptr, 
-                                       "Wrong block pointer: %p",
-                                       ptr->u.block);
-                       }
-               }
-               if (op == OP_ADECL) {
-                       for(user = ptr->use; user; user = user->next) {
-                               if (!user->member->u.block) {
-                                       internal_error(state, user->member, 
-                                               "Use %p not in a block?",
-                                               user->member);
-                               }
-                               
-                       }
-               }
                id = ptr->id;
                ptr->id = rstate->lrd[id].orig_id;
                SET_REG(ptr->id, lr->color);
@@ -11895,23 +12031,6 @@ static void print_interference_block(
                        internal_error(state, ptr, "Invalid triple id: %d",
                                ptr->id);
                }
-               for(user = ptr->use; user; user = user->next) {
-                       struct triple *use;
-                       struct live_range *ulr;
-                       use = user->member;
-                       valid_ins(state, use);
-                       if ((use->id < 0) || (use->id > rstate->defs)) {
-                               internal_error(state, use, "Invalid triple id: %d",
-                                       use->id);
-                       }
-                       ulr = rstate->lrd[user->member->id].lr;
-                       if (triple_stores_block(state, user->member) &&
-                               !user->member->u.block) {
-                               internal_error(state, user->member,
-                                       "Use %p not in a block?",
-                                       user->member);
-                       }
-               }
        }
        if (rb->out) {
                struct triple_reg_set *out_set;
@@ -12451,7 +12570,6 @@ static void initialize_live_ranges(
 #if DEBUG_COALESCING > 1
                fprintf(stderr, "mandatory coalesce: %p %d %d\n",
                        ins, zlhs, zrhs);
-               
 #endif         
                for(i = 0; i < zlhs; i++) {
                        struct reg_info linfo;
@@ -13028,6 +13146,10 @@ struct triple *find_constrained_def(
                 * least dominated one first.
                 */
                if (is_constrained) {
+#if DEBUG_RANGE_CONFLICTS
+                       fprintf(stderr, "canidate: %p %-8s regcm: %x %x\n",
+                               lrd->def, tops(lrd->def->op), regcm, info.regcm);
+#endif
                        if (!constrained || 
                                tdominates(state, lrd->def, constrained))
                        {
@@ -13060,8 +13182,8 @@ static int split_constrained_ranges(
                constrained = find_constrained_def(state, range, constrained);
        }
 #if DEBUG_RANGE_CONFLICTS
-       fprintf(stderr, "constrained: %s %p\n",
-               tops(constrained->op), constrained);
+       fprintf(stderr, "constrained: %p %-8s\n",
+               constrained, tops(constrained->op));
 #endif
        if (constrained) {
                ids_from_rstate(state, rstate);
@@ -13107,7 +13229,6 @@ static int split_ranges(
        return split;
 }
 
-
 #if DEBUG_COLOR_GRAPH > 1
 #define cgdebug_printf(...) fprintf(stdout, __VA_ARGS__)
 #define cgdebug_flush() fflush(stdout)
@@ -13165,8 +13286,6 @@ static int select_free_color(struct compile_state *state,
        }       
 #endif
 
-#warning "FIXME detect conflicts caused by the source and destination being the same register"
-
        /* If a color is already assigned see if it will work */
        if (range->color != REG_UNSET) {
                struct live_range_def *lrd;
@@ -13207,6 +13326,7 @@ static int select_free_color(struct compile_state *state,
                entry = lrd->def->use;
                for(;(range->color == REG_UNSET) && entry; entry = entry->next) {
                        struct live_range_def *insd;
+                       unsigned regcm;
                        insd = &rstate->lrd[entry->member->id];
                        if (insd->lr->defs == 0) {
                                continue;
@@ -13215,8 +13335,11 @@ static int select_free_color(struct compile_state *state,
                                !interfere(rstate, range, insd->lr)) {
                                phi = insd;
                        }
-                       if ((insd->lr->color == REG_UNSET) ||
-                               ((insd->lr->classes & range->classes) == 0) ||
+                       if (insd->lr->color == REG_UNSET) {
+                               continue;
+                       }
+                       regcm = insd->lr->classes;
+                       if (((regcm & range->classes) == 0) ||
                                (used[insd->lr->color])) {
                                continue;
                        }
@@ -13239,12 +13362,16 @@ static int select_free_color(struct compile_state *state,
                        expr = triple_rhs(state, phi->def, 0);
                        for(; expr; expr = triple_rhs(state, phi->def, expr)) {
                                struct live_range *lr;
+                               unsigned regcm;
                                if (!*expr) {
                                        continue;
                                }
                                lr = rstate->lrd[(*expr)->id].lr;
-                               if ((lr->color == REG_UNSET) || 
-                                       ((lr->classes & range->classes) == 0) ||
+                               if (lr->color == REG_UNSET) {
+                                       continue;
+                               }
+                               regcm = lr->classes;
+                               if (((regcm & range->classes) == 0) ||
                                        (used[lr->color])) {
                                        continue;
                                }
@@ -13261,12 +13388,16 @@ static int select_free_color(struct compile_state *state,
                expr = triple_rhs(state, lrd->def, 0);
                for(; expr; expr = triple_rhs(state, lrd->def, expr)) {
                        struct live_range *lr;
+                       unsigned regcm;
                        if (!*expr) {
                                continue;
                        }
                        lr = rstate->lrd[(*expr)->id].lr;
-                       if ((lr->color == -1) || 
-                               ((lr->classes & range->classes) == 0) ||
+                       if (lr->color == REG_UNSET) {
+                               continue;
+                       }
+                       regcm = lr->classes;
+                       if (((regcm & range->classes) == 0) ||
                                (used[lr->color])) {
                                continue;
                        }
@@ -13322,8 +13453,8 @@ static int select_free_color(struct compile_state *state,
                internal_error(state, range->defs->def, "too few registers");
 #endif
        }
-       range->classes = arch_reg_regcm(state, range->color);
-       if (range->color == -1) {
+       range->classes &= arch_reg_regcm(state, range->color);
+       if ((range->color == REG_UNSET) || (range->classes == 0)) {
                internal_error(state, range->defs->def, "select_free_color did not?");
        }
        return 1;
@@ -14503,6 +14634,7 @@ static void verify_blocks_present(struct compile_state *state)
        first = RHS(state->main_function, 0);
        ins = first;
        do {
+               valid_ins(state, ins);
                if (triple_stores_block(state, ins)) {
                        if (!ins->u.block) {
                                internal_error(state, ins, 
@@ -14518,19 +14650,73 @@ static void verify_blocks(struct compile_state *state)
 {
        struct triple *ins;
        struct block *block;
+       int blocks;
        block = state->first_block;
        if (!block) {
                return;
        }
+       blocks = 0;
        do {
+               int users;
+               struct block_set *user;
+               blocks++;
                for(ins = block->first; ins != block->last->next; ins = ins->next) {
-                       if (!triple_stores_block(state, ins)) {
+                       if (triple_stores_block(state, ins) && (ins->u.block != block)) {
+                               internal_error(state, ins, "inconsitent block specified");
+                       }
+                       valid_ins(state, ins);
+               }
+               users = 0;
+               for(user = block->use; user; user = user->next) {
+                       users++;
+                       if ((block == state->last_block) &&
+                               (user->member == state->first_block)) {
                                continue;
                        }
-                       if (ins->u.block != block) {
-                               internal_error(state, ins, "inconsitent block specified");
+                       if ((user->member->left != block) &&
+                               (user->member->right != block)) {
+                               internal_error(state, user->member->first,
+                                       "user does not use block");
+                       }
+               }
+               if (triple_is_branch(state, block->last) &&
+                       (block->right != block_of_triple(state, TARG(block->last, 0))))
+               {
+                       internal_error(state, block->last, "block->right != TARG(0)");
+               }
+               if (!triple_is_uncond_branch(state, block->last) &&
+                       (block != state->last_block) &&
+                       (block->left != block_of_triple(state, block->last->next)))
+               {
+                       internal_error(state, block->last, "block->left != block->last->next");
+               }
+               if (block->left) {
+                       for(user = block->left->use; user; user = user->next) {
+                               if (user->member == block) {
+                                       break;
+                               }
+                       }
+                       if (!user || user->member != block) {
+                               internal_error(state, block->first,
+                                       "block does not use left");
+                       }
+               }
+               if (block->right) {
+                       for(user = block->right->use; user; user = user->next) {
+                               if (user->member == block) {
+                                       break;
+                               }
+                       }
+                       if (!user || user->member != block) {
+                               internal_error(state, block->first,
+                                       "block does not use right");
                        }
                }
+               if (block->users != users) {
+                       internal_error(state, block->first, 
+                               "computed users %d != stored users %d\n",
+                               users, block->users);
+               }
                if (!triple_stores_block(state, block->last->next)) {
                        internal_error(state, block->last->next, 
                                "cannot find next block");
@@ -14541,6 +14727,10 @@ static void verify_blocks(struct compile_state *state)
                                "bad next block");
                }
        } while(block != state->first_block);
+       if (blocks != state->last_vertex) {
+               internal_error(state, 0, "computed blocks != stored blocks %d\n",
+                       blocks, state->last_vertex);
+       }
 }
 
 static void verify_domination(struct compile_state *state)
@@ -14585,9 +14775,6 @@ static void verify_piece(struct compile_state *state)
                struct triple *ptr;
                int lhs, i;
                lhs = TRIPLE_LHS(ins->sizes);
-               if ((ins->op == OP_WRITE) || (ins->op == OP_STORE)) {
-                       lhs = 0;
-               }
                for(ptr = ins->next, i = 0; i < lhs; i++, ptr = ptr->next) {
                        if (ptr != LHS(ins, i)) {
                                internal_error(state, ins, "malformed lhs on %s",
@@ -14644,8 +14831,18 @@ static void optimize(struct compile_state *state)
        analyze_idominators(state);
        analyze_ipdominators(state);
 
-       /* Transform the code to ssa form */
+       /* Transform the code to ssa form. */
+       /*
+        * The transformation to ssa form puts a phi function
+        * on each of edge of a dominance frontier where that
+        * phi function might be needed.  At -O2 if we don't
+        * eleminate the excess phi functions we can get an
+        * exponential code size growth.  So I kill the extra
+        * phi functions early and I kill them often.
+        */
        transform_to_ssa_form(state);
+       eliminate_inefectual_code(state);
+
        verify_consistency(state);
        if (state->debug & DEBUG_CODE_ELIMINATION) {
                fprintf(stdout, "After transform_to_ssa_form\n");
@@ -14654,11 +14851,21 @@ static void optimize(struct compile_state *state)
        /* Do strength reduction and simple constant optimizations */
        if (state->optimize >= 1) {
                simplify_all(state);
+               transform_from_ssa_form(state);
+               free_basic_blocks(state);
+               setup_basic_blocks(state);
+               analyze_idominators(state);
+               analyze_ipdominators(state);
+               transform_to_ssa_form(state);
+               eliminate_inefectual_code(state);
+       }
+       if (state->debug & DEBUG_CODE_ELIMINATION) {
+               fprintf(stdout, "After simplify_all\n");
+               print_blocks(state, stdout);
        }
        verify_consistency(state);
        /* Propogate constants throughout the code */
        if (state->optimize >= 2) {
-#warning "FIXME fix scc_transform"
                scc_transform(state);
                transform_from_ssa_form(state);
                free_basic_blocks(state);
@@ -14666,6 +14873,7 @@ static void optimize(struct compile_state *state)
                analyze_idominators(state);
                analyze_ipdominators(state);
                transform_to_ssa_form(state);
+               eliminate_inefectual_code(state);
        }
        verify_consistency(state);
 #warning "WISHLIST implement single use constants (least possible register pressure)"
@@ -14781,37 +14989,41 @@ static void print_op_asm(struct compile_state *state,
 #define CPU_DEFAULT  CPU_I386
 
 /* The x86 register classes */
-#define REGC_FLAGS    0
-#define REGC_GPR8     1
-#define REGC_GPR16    2
-#define REGC_GPR32    3
-#define REGC_GPR64    4
-#define REGC_MMX      5
-#define REGC_XMM      6
-#define REGC_GPR32_8  7
-#define REGC_GPR16_8  8
-#define REGC_IMM32    9
-#define REGC_IMM16   10
-#define REGC_IMM8    11
+#define REGC_FLAGS       0
+#define REGC_GPR8        1
+#define REGC_GPR16       2
+#define REGC_GPR32       3
+#define REGC_DIVIDEND64  4
+#define REGC_DIVIDEND32  5
+#define REGC_MMX         6
+#define REGC_XMM         7
+#define REGC_GPR32_8     8
+#define REGC_GPR16_8     9
+#define REGC_GPR8_LO    10
+#define REGC_IMM32      11
+#define REGC_IMM16      12
+#define REGC_IMM8       13
 #define LAST_REGC  REGC_IMM8
 #if LAST_REGC >= MAX_REGC
 #error "MAX_REGC is to low"
 #endif
 
 /* Register class masks */
-#define REGCM_FLAGS   (1 << REGC_FLAGS)
-#define REGCM_GPR8    (1 << REGC_GPR8)
-#define REGCM_GPR16   (1 << REGC_GPR16)
-#define REGCM_GPR32   (1 << REGC_GPR32)
-#define REGCM_GPR64   (1 << REGC_GPR64)
-#define REGCM_MMX     (1 << REGC_MMX)
-#define REGCM_XMM     (1 << REGC_XMM)
-#define REGCM_GPR32_8 (1 << REGC_GPR32_8)
-#define REGCM_GPR16_8 (1 << REGC_GPR16_8)
-#define REGCM_IMM32   (1 << REGC_IMM32)
-#define REGCM_IMM16   (1 << REGC_IMM16)
-#define REGCM_IMM8    (1 << REGC_IMM8)
-#define REGCM_ALL     ((1 << (LAST_REGC + 1)) - 1)
+#define REGCM_FLAGS      (1 << REGC_FLAGS)
+#define REGCM_GPR8       (1 << REGC_GPR8)
+#define REGCM_GPR16      (1 << REGC_GPR16)
+#define REGCM_GPR32      (1 << REGC_GPR32)
+#define REGCM_DIVIDEND64 (1 << REGC_DIVIDEND64)
+#define REGCM_DIVIDEND32 (1 << REGC_DIVIDEND32)
+#define REGCM_MMX        (1 << REGC_MMX)
+#define REGCM_XMM        (1 << REGC_XMM)
+#define REGCM_GPR32_8    (1 << REGC_GPR32_8)
+#define REGCM_GPR16_8    (1 << REGC_GPR16_8)
+#define REGCM_GPR8_LO    (1 << REGC_GPR8_LO)
+#define REGCM_IMM32      (1 << REGC_IMM32)
+#define REGCM_IMM16      (1 << REGC_IMM16)
+#define REGCM_IMM8       (1 << REGC_IMM8)
+#define REGCM_ALL        ((1 << (LAST_REGC + 1)) - 1)
 
 /* The x86 registers */
 #define REG_EFLAGS  2
@@ -14825,12 +15037,10 @@ static void print_op_asm(struct compile_state *state,
 #define REG_BH      8
 #define REG_CH      9
 #define REG_DH      10
+#define REGC_GPR8_LO_FIRST REG_AL
+#define REGC_GPR8_LO_LAST  REG_DL
 #define REGC_GPR8_FIRST  REG_AL
-#if X86_4_8BIT_GPRS
-#define REGC_GPR8_LAST   REG_DL
-#else 
 #define REGC_GPR8_LAST   REG_DH
-#endif
 #define REG_AX     11
 #define REG_BX     12
 #define REG_CX     13
@@ -14852,26 +15062,29 @@ static void print_op_asm(struct compile_state *state,
 #define REGC_GPR32_FIRST REG_EAX
 #define REGC_GPR32_LAST  REG_ESP
 #define REG_EDXEAX 27
-#define REGC_GPR64_FIRST REG_EDXEAX
-#define REGC_GPR64_LAST  REG_EDXEAX
-#define REG_MMX0   28
-#define REG_MMX1   29
-#define REG_MMX2   30
-#define REG_MMX3   31
-#define REG_MMX4   32
-#define REG_MMX5   33
-#define REG_MMX6   34
-#define REG_MMX7   35
+#define REGC_DIVIDEND64_FIRST REG_EDXEAX
+#define REGC_DIVIDEND64_LAST  REG_EDXEAX
+#define REG_DXAX   28
+#define REGC_DIVIDEND32_FIRST REG_DXAX
+#define REGC_DIVIDEND32_LAST  REG_DXAX
+#define REG_MMX0   29
+#define REG_MMX1   30
+#define REG_MMX2   31
+#define REG_MMX3   32
+#define REG_MMX4   33
+#define REG_MMX5   34
+#define REG_MMX6   35
+#define REG_MMX7   36
 #define REGC_MMX_FIRST REG_MMX0
 #define REGC_MMX_LAST  REG_MMX7
-#define REG_XMM0   36
-#define REG_XMM1   37
-#define REG_XMM2   38
-#define REG_XMM3   39
-#define REG_XMM4   40
-#define REG_XMM5   41
-#define REG_XMM6   42
-#define REG_XMM7   43
+#define REG_XMM0   37
+#define REG_XMM1   38
+#define REG_XMM2   39
+#define REG_XMM3   40
+#define REG_XMM4   41
+#define REG_XMM5   42
+#define REG_XMM6   43
+#define REG_XMM7   44
 #define REGC_XMM_FIRST REG_XMM0
 #define REGC_XMM_LAST  REG_XMM7
 #warning "WISHLIST figure out how to use pinsrw and pextrw to better use extended regs"
@@ -14895,35 +15108,39 @@ static void print_op_asm(struct compile_state *state,
 
 
 static unsigned regc_size[LAST_REGC +1] = {
-       [REGC_FLAGS]   = REGC_FLAGS_LAST   - REGC_FLAGS_FIRST + 1,
-       [REGC_GPR8]    = REGC_GPR8_LAST    - REGC_GPR8_FIRST + 1,
-       [REGC_GPR16]   = REGC_GPR16_LAST   - REGC_GPR16_FIRST + 1,
-       [REGC_GPR32]   = REGC_GPR32_LAST   - REGC_GPR32_FIRST + 1,
-       [REGC_GPR64]   = REGC_GPR64_LAST   - REGC_GPR64_FIRST + 1,
-       [REGC_MMX]     = REGC_MMX_LAST     - REGC_MMX_FIRST + 1,
-       [REGC_XMM]     = REGC_XMM_LAST     - REGC_XMM_FIRST + 1,
-       [REGC_GPR32_8] = REGC_GPR32_8_LAST - REGC_GPR32_8_FIRST + 1,
-       [REGC_GPR16_8] = REGC_GPR16_8_LAST - REGC_GPR16_8_FIRST + 1,
-       [REGC_IMM32]   = 0,
-       [REGC_IMM16]   = 0,
-       [REGC_IMM8]    = 0,
+       [REGC_FLAGS]      = REGC_FLAGS_LAST      - REGC_FLAGS_FIRST + 1,
+       [REGC_GPR8]       = REGC_GPR8_LAST       - REGC_GPR8_FIRST + 1,
+       [REGC_GPR16]      = REGC_GPR16_LAST      - REGC_GPR16_FIRST + 1,
+       [REGC_GPR32]      = REGC_GPR32_LAST      - REGC_GPR32_FIRST + 1,
+       [REGC_DIVIDEND64] = REGC_DIVIDEND64_LAST - REGC_DIVIDEND64_FIRST + 1,
+       [REGC_DIVIDEND32] = REGC_DIVIDEND32_LAST - REGC_DIVIDEND32_FIRST + 1,
+       [REGC_MMX]        = REGC_MMX_LAST        - REGC_MMX_FIRST + 1,
+       [REGC_XMM]        = REGC_XMM_LAST        - REGC_XMM_FIRST + 1,
+       [REGC_GPR32_8]    = REGC_GPR32_8_LAST    - REGC_GPR32_8_FIRST + 1,
+       [REGC_GPR16_8]    = REGC_GPR16_8_LAST    - REGC_GPR16_8_FIRST + 1,
+       [REGC_GPR8_LO]    = REGC_GPR8_LO_LAST    - REGC_GPR8_LO_FIRST + 1,
+       [REGC_IMM32]      = 0,
+       [REGC_IMM16]      = 0,
+       [REGC_IMM8]       = 0,
 };
 
 static const struct {
        int first, last;
 } regcm_bound[LAST_REGC + 1] = {
-       [REGC_FLAGS]   = { REGC_FLAGS_FIRST,   REGC_FLAGS_LAST },
-       [REGC_GPR8]    = { REGC_GPR8_FIRST,    REGC_GPR8_LAST },
-       [REGC_GPR16]   = { REGC_GPR16_FIRST,   REGC_GPR16_LAST },
-       [REGC_GPR32]   = { REGC_GPR32_FIRST,   REGC_GPR32_LAST },
-       [REGC_GPR64]   = { REGC_GPR64_FIRST,   REGC_GPR64_LAST },
-       [REGC_MMX]     = { REGC_MMX_FIRST,     REGC_MMX_LAST },
-       [REGC_XMM]     = { REGC_XMM_FIRST,     REGC_XMM_LAST },
-       [REGC_GPR32_8] = { REGC_GPR32_8_FIRST, REGC_GPR32_8_LAST },
-       [REGC_GPR16_8] = { REGC_GPR16_8_FIRST, REGC_GPR16_8_LAST },
-       [REGC_IMM32]   = { REGC_IMM32_FIRST,   REGC_IMM32_LAST },
-       [REGC_IMM16]   = { REGC_IMM16_FIRST,   REGC_IMM16_LAST },
-       [REGC_IMM8]    = { REGC_IMM8_FIRST,    REGC_IMM8_LAST },
+       [REGC_FLAGS]      = { REGC_FLAGS_FIRST,      REGC_FLAGS_LAST },
+       [REGC_GPR8]       = { REGC_GPR8_FIRST,       REGC_GPR8_LAST },
+       [REGC_GPR16]      = { REGC_GPR16_FIRST,      REGC_GPR16_LAST },
+       [REGC_GPR32]      = { REGC_GPR32_FIRST,      REGC_GPR32_LAST },
+       [REGC_DIVIDEND64] = { REGC_DIVIDEND64_FIRST, REGC_DIVIDEND64_LAST },
+       [REGC_DIVIDEND32] = { REGC_DIVIDEND32_FIRST, REGC_DIVIDEND32_LAST },
+       [REGC_MMX]        = { REGC_MMX_FIRST,        REGC_MMX_LAST },
+       [REGC_XMM]        = { REGC_XMM_FIRST,        REGC_XMM_LAST },
+       [REGC_GPR32_8]    = { REGC_GPR32_8_FIRST,    REGC_GPR32_8_LAST },
+       [REGC_GPR16_8]    = { REGC_GPR16_8_FIRST,    REGC_GPR16_8_LAST },
+       [REGC_GPR8_LO]    = { REGC_GPR8_LO_FIRST,    REGC_GPR8_LO_LAST },
+       [REGC_IMM32]      = { REGC_IMM32_FIRST,      REGC_IMM32_LAST },
+       [REGC_IMM16]      = { REGC_IMM16_FIRST,      REGC_IMM16_LAST },
+       [REGC_IMM8]       = { REGC_IMM8_FIRST,       REGC_IMM8_LAST },
 };
 
 static int arch_encode_cpu(const char *cpu)
@@ -14959,8 +15176,9 @@ static unsigned arch_regc_size(struct compile_state *state, int class)
 static int arch_regcm_intersect(unsigned regcm1, unsigned regcm2)
 {
        /* See if two register classes may have overlapping registers */
-       unsigned gpr_mask = REGCM_GPR8 | REGCM_GPR16_8 | REGCM_GPR16 |
-               REGCM_GPR32_8 | REGCM_GPR32 | REGCM_GPR64;
+       unsigned gpr_mask = REGCM_GPR8 | REGCM_GPR8_LO | REGCM_GPR16_8 | REGCM_GPR16 |
+               REGCM_GPR32_8 | REGCM_GPR32 | 
+               REGCM_DIVIDEND32 | REGCM_DIVIDEND64;
 
        /* Special case for the immediates */
        if ((regcm1 & (REGCM_IMM32 | REGCM_IMM16 | REGCM_IMM8)) &&
@@ -14987,6 +15205,7 @@ static void arch_reg_equivs(
 #endif
                *equiv++ = REG_AX;
                *equiv++ = REG_EAX;
+               *equiv++ = REG_DXAX;
                *equiv++ = REG_EDXEAX;
                break;
        case REG_AH:
@@ -14995,6 +15214,7 @@ static void arch_reg_equivs(
 #endif
                *equiv++ = REG_AX;
                *equiv++ = REG_EAX;
+               *equiv++ = REG_DXAX;
                *equiv++ = REG_EDXEAX;
                break;
        case REG_BL:  
@@ -15033,6 +15253,7 @@ static void arch_reg_equivs(
 #endif
                *equiv++ = REG_DX;
                *equiv++ = REG_EDX;
+               *equiv++ = REG_DXAX;
                *equiv++ = REG_EDXEAX;
                break;
        case REG_DH:
@@ -15041,12 +15262,14 @@ static void arch_reg_equivs(
 #endif
                *equiv++ = REG_DX;
                *equiv++ = REG_EDX;
+               *equiv++ = REG_DXAX;
                *equiv++ = REG_EDXEAX;
                break;
        case REG_AX:
                *equiv++ = REG_AL;
                *equiv++ = REG_AH;
                *equiv++ = REG_EAX;
+               *equiv++ = REG_DXAX;
                *equiv++ = REG_EDXEAX;
                break;
        case REG_BX:
@@ -15063,6 +15286,7 @@ static void arch_reg_equivs(
                *equiv++ = REG_DL;
                *equiv++ = REG_DH;
                *equiv++ = REG_EDX;
+               *equiv++ = REG_DXAX;
                *equiv++ = REG_EDXEAX;
                break;
        case REG_SI:  
@@ -15081,6 +15305,7 @@ static void arch_reg_equivs(
                *equiv++ = REG_AL;
                *equiv++ = REG_AH;
                *equiv++ = REG_AX;
+               *equiv++ = REG_DXAX;
                *equiv++ = REG_EDXEAX;
                break;
        case REG_EBX:
@@ -15097,6 +15322,7 @@ static void arch_reg_equivs(
                *equiv++ = REG_DL;
                *equiv++ = REG_DH;
                *equiv++ = REG_DX;
+               *equiv++ = REG_DXAX;
                *equiv++ = REG_EDXEAX;
                break;
        case REG_ESI: 
@@ -15111,6 +15337,17 @@ static void arch_reg_equivs(
        case REG_ESP: 
                *equiv++ = REG_SP;
                break;
+       case REG_DXAX: 
+               *equiv++ = REG_AL;
+               *equiv++ = REG_AH;
+               *equiv++ = REG_DL;
+               *equiv++ = REG_DH;
+               *equiv++ = REG_AX;
+               *equiv++ = REG_DX;
+               *equiv++ = REG_EAX;
+               *equiv++ = REG_EDX;
+               *equiv++ = REG_EDXEAX;
+               break;
        case REG_EDXEAX: 
                *equiv++ = REG_AL;
                *equiv++ = REG_AH;
@@ -15120,6 +15357,7 @@ static void arch_reg_equivs(
                *equiv++ = REG_DX;
                *equiv++ = REG_EAX;
                *equiv++ = REG_EDX;
+               *equiv++ = REG_DXAX;
                break;
        }
        *equiv++ = REG_UNSET; 
@@ -15128,8 +15366,10 @@ static void arch_reg_equivs(
 static unsigned arch_avail_mask(struct compile_state *state)
 {
        unsigned avail_mask;
-       avail_mask = REGCM_GPR8 | REGCM_GPR16_8 | REGCM_GPR16 | 
-               REGCM_GPR32 | REGCM_GPR32_8 | REGCM_GPR64 |
+       /* REGCM_GPR8 is not available */
+       avail_mask = REGCM_GPR8_LO | REGCM_GPR16_8 | REGCM_GPR16 | 
+               REGCM_GPR32 | REGCM_GPR32_8 | 
+               REGCM_DIVIDEND32 | REGCM_DIVIDEND64 |
                REGCM_IMM32 | REGCM_IMM16 | REGCM_IMM8 | REGCM_FLAGS;
        switch(state->cpu) {
        case CPU_P3:
@@ -15141,12 +15381,6 @@ static unsigned arch_avail_mask(struct compile_state *state)
                avail_mask |= REGCM_MMX | REGCM_XMM;
                break;
        }
-#if 0
-       /* Don't enable 8 bit values until I can force both operands
-        * to be 8bits simultaneously.
-        */
-       avail_mask &= ~(REGCM_GPR8 | REGCM_GPR16_8 | REGCM_GPR16);
-#endif
        return avail_mask;
 }
 
@@ -15155,7 +15389,6 @@ static unsigned arch_regcm_normalize(struct compile_state *state, unsigned regcm
        unsigned mask, result;
        int class, class2;
        result = regcm;
-       result &= arch_avail_mask(state);
 
        for(class = 0, mask = 1; mask; mask <<= 1, class++) {
                if ((result & mask) == 0) {
@@ -15171,6 +15404,7 @@ static unsigned arch_regcm_normalize(struct compile_state *state, unsigned regcm
                        }
                }
        }
+       result &= arch_avail_mask(state);
        return result;
 }
 
@@ -15209,19 +15443,19 @@ static struct reg_info arch_reg_constraint(
                unsigned int mask;
                unsigned int reg;
        } constraints[] = {
-               { 'r', REGCM_GPR32, REG_UNSET },
-               { 'g', REGCM_GPR32, REG_UNSET },
-               { 'p', REGCM_GPR32, REG_UNSET },
-               { 'q', REGCM_GPR8 REG_UNSET },
+               { 'r', REGCM_GPR32,   REG_UNSET },
+               { 'g', REGCM_GPR32,   REG_UNSET },
+               { 'p', REGCM_GPR32,   REG_UNSET },
+               { 'q', REGCM_GPR8_LO, REG_UNSET },
                { 'Q', REGCM_GPR32_8, REG_UNSET },
-               { 'x', REGCM_XMM,   REG_UNSET },
-               { 'y', REGCM_MMX,   REG_UNSET },
-               { 'a', REGCM_GPR32, REG_EAX },
-               { 'b', REGCM_GPR32, REG_EBX },
-               { 'c', REGCM_GPR32, REG_ECX },
-               { 'd', REGCM_GPR32, REG_EDX },
-               { 'D', REGCM_GPR32, REG_EDI },
-               { 'S', REGCM_GPR32, REG_ESI },
+               { 'x', REGCM_XMM,     REG_UNSET },
+               { 'y', REGCM_MMX,     REG_UNSET },
+               { 'a', REGCM_GPR32,   REG_EAX },
+               { 'b', REGCM_GPR32,   REG_EBX },
+               { 'c', REGCM_GPR32,   REG_ECX },
+               { 'd', REGCM_GPR32,   REG_EDX },
+               { 'D', REGCM_GPR32,   REG_EDI },
+               { 'S', REGCM_GPR32,   REG_ESI },
                { '\0', 0, REG_UNSET },
        };
        unsigned int regcm;
@@ -15368,7 +15602,13 @@ static int arch_select_free_register(
        for(i = REGC_GPR8_FIRST; (reg == REG_UNSET) && (i <= REGC_GPR8_LAST); i++) {
                reg = do_select_reg(state, used, i, classes);
        }
-       for(i = REGC_GPR64_FIRST; (reg == REG_UNSET) && (i <= REGC_GPR64_LAST); i++) {
+       for(i = REGC_GPR8_LO_FIRST; (reg == REG_UNSET) && (i <= REGC_GPR8_LO_LAST); i++) {
+               reg = do_select_reg(state, used, i, classes);
+       }
+       for(i = REGC_DIVIDEND32_FIRST; (reg == REG_UNSET) && (i <= REGC_DIVIDEND32_LAST); i++) {
+               reg = do_select_reg(state, used, i, classes);
+       }
+       for(i = REGC_DIVIDEND64_FIRST; (reg == REG_UNSET) && (i <= REGC_DIVIDEND64_LAST); i++) {
                reg = do_select_reg(state, used, i, classes);
        }
        for(i = REGC_FLAGS_FIRST; (reg == REG_UNSET) && (i <= REGC_FLAGS_LAST); i++) {
@@ -15390,10 +15630,10 @@ static unsigned arch_type_to_regcm(struct compile_state *state, struct type *typ
                break;
        case TYPE_CHAR:
        case TYPE_UCHAR:
-               mask = REGCM_GPR8 | 
+               mask = REGCM_GPR8 | REGCM_GPR8_LO |
                        REGCM_GPR16 | REGCM_GPR16_8 | 
                        REGCM_GPR32 | REGCM_GPR32_8 |
-                       REGCM_GPR64 |
+                       REGCM_DIVIDEND32 | REGCM_DIVIDEND64 |
                        REGCM_MMX | REGCM_XMM |
                        REGCM_IMM32 | REGCM_IMM16 | REGCM_IMM8;
                break;
@@ -15401,7 +15641,7 @@ static unsigned arch_type_to_regcm(struct compile_state *state, struct type *typ
        case TYPE_USHORT:
                mask =  REGCM_GPR16 | REGCM_GPR16_8 |
                        REGCM_GPR32 | REGCM_GPR32_8 |
-                       REGCM_GPR64 |
+                       REGCM_DIVIDEND32 | REGCM_DIVIDEND64 |
                        REGCM_MMX | REGCM_XMM |
                        REGCM_IMM32 | REGCM_IMM16;
                break;
@@ -15411,7 +15651,8 @@ static unsigned arch_type_to_regcm(struct compile_state *state, struct type *typ
        case TYPE_ULONG:
        case TYPE_POINTER:
                mask =  REGCM_GPR32 | REGCM_GPR32_8 |
-                       REGCM_GPR64 | REGCM_MMX | REGCM_XMM |
+                       REGCM_DIVIDEND32 | REGCM_DIVIDEND64 |
+                       REGCM_MMX | REGCM_XMM |
                        REGCM_IMM32;
                break;
        default:
@@ -15469,63 +15710,79 @@ static int get_imm8(struct triple *ins, struct triple **expr)
        return 1;
 }
 
-#define TEMPLATE_NOP         0
-#define TEMPLATE_INTCONST8   1
-#define TEMPLATE_INTCONST32  2
-#define TEMPLATE_COPY8_REG   3
-#define TEMPLATE_COPY16_REG  4
-#define TEMPLATE_COPY32_REG  5
-#define TEMPLATE_COPY_IMM8   6
-#define TEMPLATE_COPY_IMM16  7
-#define TEMPLATE_COPY_IMM32  8
-#define TEMPLATE_PHI8        9
-#define TEMPLATE_PHI16      10
-#define TEMPLATE_PHI32      11
-#define TEMPLATE_STORE8     12
-#define TEMPLATE_STORE16    13
-#define TEMPLATE_STORE32    14
-#define TEMPLATE_LOAD8      15
-#define TEMPLATE_LOAD16     16
-#define TEMPLATE_LOAD32     17
-#define TEMPLATE_BINARY_REG 18
-#define TEMPLATE_BINARY_IMM 19
-#define TEMPLATE_SL_CL      20
-#define TEMPLATE_SL_IMM     21
-#define TEMPLATE_UNARY      22
-#define TEMPLATE_CMP_REG    23
-#define TEMPLATE_CMP_IMM    24
-#define TEMPLATE_TEST       25
-#define TEMPLATE_SET        26
-#define TEMPLATE_JMP        27
-#define TEMPLATE_INB_DX     28
-#define TEMPLATE_INB_IMM    29
-#define TEMPLATE_INW_DX     30
-#define TEMPLATE_INW_IMM    31
-#define TEMPLATE_INL_DX     32
-#define TEMPLATE_INL_IMM    33
-#define TEMPLATE_OUTB_DX    34
-#define TEMPLATE_OUTB_IMM   35
-#define TEMPLATE_OUTW_DX    36
-#define TEMPLATE_OUTW_IMM   37
-#define TEMPLATE_OUTL_DX    38
-#define TEMPLATE_OUTL_IMM   39
-#define TEMPLATE_BSF        40
-#define TEMPLATE_RDMSR      41
-#define TEMPLATE_WRMSR      42
-#define TEMPLATE_UMUL       43
-#define TEMPLATE_DIV        44
-#define TEMPLATE_MOD        45
-#define LAST_TEMPLATE       TEMPLATE_MOD
+#define TEMPLATE_NOP           0
+#define TEMPLATE_INTCONST8     1
+#define TEMPLATE_INTCONST32    2
+#define TEMPLATE_COPY8_REG     3
+#define TEMPLATE_COPY16_REG    4
+#define TEMPLATE_COPY32_REG    5
+#define TEMPLATE_COPY_IMM8     6
+#define TEMPLATE_COPY_IMM16    7
+#define TEMPLATE_COPY_IMM32    8
+#define TEMPLATE_PHI8          9
+#define TEMPLATE_PHI16        10
+#define TEMPLATE_PHI32        11
+#define TEMPLATE_STORE8       12
+#define TEMPLATE_STORE16      13
+#define TEMPLATE_STORE32      14
+#define TEMPLATE_LOAD8        15
+#define TEMPLATE_LOAD16       16
+#define TEMPLATE_LOAD32       17
+#define TEMPLATE_BINARY8_REG  18
+#define TEMPLATE_BINARY16_REG 19
+#define TEMPLATE_BINARY32_REG 20
+#define TEMPLATE_BINARY8_IMM  21
+#define TEMPLATE_BINARY16_IMM 22
+#define TEMPLATE_BINARY32_IMM 23
+#define TEMPLATE_SL8_CL       24
+#define TEMPLATE_SL16_CL      25
+#define TEMPLATE_SL32_CL      26
+#define TEMPLATE_SL8_IMM      27
+#define TEMPLATE_SL16_IMM     28
+#define TEMPLATE_SL32_IMM     29
+#define TEMPLATE_UNARY8       30
+#define TEMPLATE_UNARY16      31
+#define TEMPLATE_UNARY32      32
+#define TEMPLATE_CMP8_REG     33
+#define TEMPLATE_CMP16_REG    34
+#define TEMPLATE_CMP32_REG    35
+#define TEMPLATE_CMP8_IMM     36
+#define TEMPLATE_CMP16_IMM    37
+#define TEMPLATE_CMP32_IMM    38
+#define TEMPLATE_TEST8        39
+#define TEMPLATE_TEST16       40
+#define TEMPLATE_TEST32       41
+#define TEMPLATE_SET          42
+#define TEMPLATE_JMP          43
+#define TEMPLATE_INB_DX       44
+#define TEMPLATE_INB_IMM      45
+#define TEMPLATE_INW_DX       46
+#define TEMPLATE_INW_IMM      47
+#define TEMPLATE_INL_DX       48
+#define TEMPLATE_INL_IMM      49
+#define TEMPLATE_OUTB_DX      50
+#define TEMPLATE_OUTB_IMM     51
+#define TEMPLATE_OUTW_DX      52
+#define TEMPLATE_OUTW_IMM     53
+#define TEMPLATE_OUTL_DX      54
+#define TEMPLATE_OUTL_IMM     55
+#define TEMPLATE_BSF          56
+#define TEMPLATE_RDMSR        57
+#define TEMPLATE_WRMSR        58
+#define TEMPLATE_UMUL8        59
+#define TEMPLATE_UMUL16       60
+#define TEMPLATE_UMUL32       61
+#define TEMPLATE_DIV8         62
+#define TEMPLATE_DIV16        63
+#define TEMPLATE_DIV32        64
+#define LAST_TEMPLATE       TEMPLATE_DIV32
 #if LAST_TEMPLATE >= MAX_TEMPLATES
 #error "MAX_TEMPLATES to low"
 #endif
 
-#define COPY8_REGCM     (REGCM_GPR64 | REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8 | REGCM_MMX | REGCM_XMM)
-#define COPY16_REGCM    (REGCM_GPR64 | REGCM_GPR32 | REGCM_GPR16 | REGCM_MMX | REGCM_XMM)  
-#define COPY32_REGCM    (REGCM_GPR64 | REGCM_GPR32 | REGCM_MMX | REGCM_XMM)
-#define COPYIMM8_REGCM  (REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8)
-#define COPYIMM16_REGCM (REGCM_GPR32 | REGCM_GPR16)
-#define COPYIMM32_REGCM (REGCM_GPR32)
+#define COPY8_REGCM     (REGCM_DIVIDEND64 | REGCM_DIVIDEND32 | REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8_LO | REGCM_MMX | REGCM_XMM)
+#define COPY16_REGCM    (REGCM_DIVIDEND64 | REGCM_DIVIDEND32 | REGCM_GPR32 | REGCM_GPR16 | REGCM_MMX | REGCM_XMM)  
+#define COPY32_REGCM    (REGCM_DIVIDEND64 | REGCM_DIVIDEND32 | REGCM_GPR32 | REGCM_MMX | REGCM_XMM)
 
 
 static struct ins_template templates[] = {
@@ -15549,15 +15806,15 @@ static struct ins_template templates[] = {
                .rhs = { [0] = { REG_UNSET, COPY32_REGCM }  },
        },
        [TEMPLATE_COPY_IMM8] = {
-               .lhs = { [0] = { REG_UNSET, COPYIMM8_REGCM } },
+               .lhs = { [0] = { REG_UNSET, COPY8_REGCM } },
                .rhs = { [0] = { REG_UNNEEDED, REGCM_IMM8 } },
        },
        [TEMPLATE_COPY_IMM16] = {
-               .lhs = { [0] = { REG_UNSET, COPYIMM16_REGCM } },
+               .lhs = { [0] = { REG_UNSET, COPY16_REGCM } },
                .rhs = { [0] = { REG_UNNEEDED, REGCM_IMM16 | REGCM_IMM8 } },
        },
        [TEMPLATE_COPY_IMM32] = {
-               .lhs = { [0] = { REG_UNSET, COPYIMM32_REGCM } },
+               .lhs = { [0] = { REG_UNSET, COPY32_REGCM } },
                .rhs = { [0] = { REG_UNNEEDED, REGCM_IMM32 | REGCM_IMM16 | REGCM_IMM8 } },
        },
        [TEMPLATE_PHI8] = { 
@@ -15621,19 +15878,25 @@ static struct ins_template templates[] = {
                        [15] = { REG_VIRT0, COPY32_REGCM },
                }, },
        [TEMPLATE_STORE8] = {
-               .lhs = { [0] = { REG_UNSET, REGCM_GPR32 } },
-               .rhs = { [0] = { REG_UNSET, REGCM_GPR8 } },
+               .rhs = { 
+                       [0] = { REG_UNSET, REGCM_GPR32 },
+                       [1] = { REG_UNSET, REGCM_GPR8_LO },
+               },
        },
        [TEMPLATE_STORE16] = {
-               .lhs = { [0] = { REG_UNSET, REGCM_GPR32 } },
-               .rhs = { [0] = { REG_UNSET, REGCM_GPR16 } },
+               .rhs = { 
+                       [0] = { REG_UNSET, REGCM_GPR32 },
+                       [1] = { REG_UNSET, REGCM_GPR16 },
+               },
        },
        [TEMPLATE_STORE32] = {
-               .lhs = { [0] = { REG_UNSET, REGCM_GPR32 } },
-               .rhs = { [0] = { REG_UNSET, REGCM_GPR32 } },
+               .rhs = { 
+                       [0] = { REG_UNSET, REGCM_GPR32 },
+                       [1] = { REG_UNSET, REGCM_GPR32 },
+               },
        },
        [TEMPLATE_LOAD8] = {
-               .lhs = { [0] = { REG_UNSET, REGCM_GPR8 } },
+               .lhs = { [0] = { REG_UNSET, REGCM_GPR8_LO } },
                .rhs = { [0] = { REG_UNSET, REGCM_GPR32 } },
        },
        [TEMPLATE_LOAD16] = {
@@ -15644,69 +15907,169 @@ static struct ins_template templates[] = {
                .lhs = { [0] = { REG_UNSET, REGCM_GPR32 } },
                .rhs = { [0] = { REG_UNSET, REGCM_GPR32 } },
        },
-       [TEMPLATE_BINARY_REG] = {
+       [TEMPLATE_BINARY8_REG] = {
+               .lhs = { [0] = { REG_VIRT0, REGCM_GPR8_LO } },
+               .rhs = { 
+                       [0] = { REG_VIRT0, REGCM_GPR8_LO },
+                       [1] = { REG_UNSET, REGCM_GPR8_LO },
+               },
+       },
+       [TEMPLATE_BINARY16_REG] = {
+               .lhs = { [0] = { REG_VIRT0, REGCM_GPR16 } },
+               .rhs = { 
+                       [0] = { REG_VIRT0, REGCM_GPR16 },
+                       [1] = { REG_UNSET, REGCM_GPR16 },
+               },
+       },
+       [TEMPLATE_BINARY32_REG] = {
                .lhs = { [0] = { REG_VIRT0, REGCM_GPR32 } },
                .rhs = { 
                        [0] = { REG_VIRT0, REGCM_GPR32 },
                        [1] = { REG_UNSET, REGCM_GPR32 },
                },
        },
-       [TEMPLATE_BINARY_IMM] = {
+       [TEMPLATE_BINARY8_IMM] = {
+               .lhs = { [0] = { REG_VIRT0, REGCM_GPR8_LO } },
+               .rhs = { 
+                       [0] = { REG_VIRT0,    REGCM_GPR8_LO },
+                       [1] = { REG_UNNEEDED, REGCM_IMM8 },
+               },
+       },
+       [TEMPLATE_BINARY16_IMM] = {
+               .lhs = { [0] = { REG_VIRT0, REGCM_GPR16 } },
+               .rhs = { 
+                       [0] = { REG_VIRT0,    REGCM_GPR16 },
+                       [1] = { REG_UNNEEDED, REGCM_IMM16 },
+               },
+       },
+       [TEMPLATE_BINARY32_IMM] = {
                .lhs = { [0] = { REG_VIRT0, REGCM_GPR32 } },
                .rhs = { 
                        [0] = { REG_VIRT0,    REGCM_GPR32 },
                        [1] = { REG_UNNEEDED, REGCM_IMM32 },
                },
        },
-       [TEMPLATE_SL_CL] = {
+       [TEMPLATE_SL8_CL] = {
+               .lhs = { [0] = { REG_VIRT0, REGCM_GPR8_LO } },
+               .rhs = { 
+                       [0] = { REG_VIRT0, REGCM_GPR8_LO },
+                       [1] = { REG_CL, REGCM_GPR8_LO },
+               },
+       },
+       [TEMPLATE_SL16_CL] = {
+               .lhs = { [0] = { REG_VIRT0, REGCM_GPR16 } },
+               .rhs = { 
+                       [0] = { REG_VIRT0, REGCM_GPR16 },
+                       [1] = { REG_CL, REGCM_GPR8_LO },
+               },
+       },
+       [TEMPLATE_SL32_CL] = {
                .lhs = { [0] = { REG_VIRT0, REGCM_GPR32 } },
                .rhs = { 
                        [0] = { REG_VIRT0, REGCM_GPR32 },
-                       [1] = { REG_CL, REGCM_GPR8 },
+                       [1] = { REG_CL, REGCM_GPR8_LO },
+               },
+       },
+       [TEMPLATE_SL8_IMM] = {
+               .lhs = { [0] = { REG_VIRT0, REGCM_GPR8_LO } },
+               .rhs = { 
+                       [0] = { REG_VIRT0,    REGCM_GPR8_LO },
+                       [1] = { REG_UNNEEDED, REGCM_IMM8 },
                },
        },
-       [TEMPLATE_SL_IMM] = {
+       [TEMPLATE_SL16_IMM] = {
+               .lhs = { [0] = { REG_VIRT0, REGCM_GPR16 } },
+               .rhs = { 
+                       [0] = { REG_VIRT0,    REGCM_GPR16 },
+                       [1] = { REG_UNNEEDED, REGCM_IMM8 },
+               },
+       },
+       [TEMPLATE_SL32_IMM] = {
                .lhs = { [0] = { REG_VIRT0, REGCM_GPR32 } },
                .rhs = { 
                        [0] = { REG_VIRT0,    REGCM_GPR32 },
                        [1] = { REG_UNNEEDED, REGCM_IMM8 },
                },
        },
-       [TEMPLATE_UNARY] = {
+       [TEMPLATE_UNARY8] = {
+               .lhs = { [0] = { REG_VIRT0, REGCM_GPR8_LO } },
+               .rhs = { [0] = { REG_VIRT0, REGCM_GPR8_LO } },
+       },
+       [TEMPLATE_UNARY16] = {
+               .lhs = { [0] = { REG_VIRT0, REGCM_GPR16 } },
+               .rhs = { [0] = { REG_VIRT0, REGCM_GPR16 } },
+       },
+       [TEMPLATE_UNARY32] = {
                .lhs = { [0] = { REG_VIRT0, REGCM_GPR32 } },
                .rhs = { [0] = { REG_VIRT0, REGCM_GPR32 } },
        },
-       [TEMPLATE_CMP_REG] = {
+       [TEMPLATE_CMP8_REG] = {
+               .lhs = { [0] = { REG_EFLAGS, REGCM_FLAGS } },
+               .rhs = {
+                       [0] = { REG_UNSET, REGCM_GPR8_LO },
+                       [1] = { REG_UNSET, REGCM_GPR8_LO },
+               },
+       },
+       [TEMPLATE_CMP16_REG] = {
+               .lhs = { [0] = { REG_EFLAGS, REGCM_FLAGS } },
+               .rhs = {
+                       [0] = { REG_UNSET, REGCM_GPR16 },
+                       [1] = { REG_UNSET, REGCM_GPR16 },
+               },
+       },
+       [TEMPLATE_CMP32_REG] = {
                .lhs = { [0] = { REG_EFLAGS, REGCM_FLAGS } },
                .rhs = {
                        [0] = { REG_UNSET, REGCM_GPR32 },
                        [1] = { REG_UNSET, REGCM_GPR32 },
                },
        },
-       [TEMPLATE_CMP_IMM] = {
+       [TEMPLATE_CMP8_IMM] = {
+               .lhs = { [0] = { REG_EFLAGS, REGCM_FLAGS } },
+               .rhs = {
+                       [0] = { REG_UNSET, REGCM_GPR8_LO },
+                       [1] = { REG_UNNEEDED, REGCM_IMM8 },
+               },
+       },
+       [TEMPLATE_CMP16_IMM] = {
+               .lhs = { [0] = { REG_EFLAGS, REGCM_FLAGS } },
+               .rhs = {
+                       [0] = { REG_UNSET, REGCM_GPR16 },
+                       [1] = { REG_UNNEEDED, REGCM_IMM16 },
+               },
+       },
+       [TEMPLATE_CMP32_IMM] = {
                .lhs = { [0] = { REG_EFLAGS, REGCM_FLAGS } },
                .rhs = {
                        [0] = { REG_UNSET, REGCM_GPR32 },
                        [1] = { REG_UNNEEDED, REGCM_IMM32 },
                },
        },
-       [TEMPLATE_TEST] = {
+       [TEMPLATE_TEST8] = {
+               .lhs = { [0] = { REG_EFLAGS, REGCM_FLAGS } },
+               .rhs = { [0] = { REG_UNSET, REGCM_GPR8_LO } },
+       },
+       [TEMPLATE_TEST16] = {
+               .lhs = { [0] = { REG_EFLAGS, REGCM_FLAGS } },
+               .rhs = { [0] = { REG_UNSET, REGCM_GPR16 } },
+       },
+       [TEMPLATE_TEST32] = {
                .lhs = { [0] = { REG_EFLAGS, REGCM_FLAGS } },
                .rhs = { [0] = { REG_UNSET, REGCM_GPR32 } },
        },
        [TEMPLATE_SET] = {
-               .lhs = { [0] = { REG_UNSET, REGCM_GPR8 } },
+               .lhs = { [0] = { REG_UNSET, REGCM_GPR8_LO } },
                .rhs = { [0] = { REG_EFLAGS, REGCM_FLAGS } },
        },
        [TEMPLATE_JMP] = {
                .rhs = { [0] = { REG_EFLAGS, REGCM_FLAGS } },
        },
        [TEMPLATE_INB_DX] = {
-               .lhs = { [0] = { REG_AL,  REGCM_GPR8 } },  
+               .lhs = { [0] = { REG_AL,  REGCM_GPR8_LO } },  
                .rhs = { [0] = { REG_DX, REGCM_GPR16 } },
        },
        [TEMPLATE_INB_IMM] = {
-               .lhs = { [0] = { REG_AL,  REGCM_GPR8 } },  
+               .lhs = { [0] = { REG_AL,  REGCM_GPR8_LO } },  
                .rhs = { [0] = { REG_UNNEEDED, REGCM_IMM8 } },
        },
        [TEMPLATE_INW_DX]  = { 
@@ -15727,13 +16090,13 @@ static struct ins_template templates[] = {
        },
        [TEMPLATE_OUTB_DX] = { 
                .rhs = {
-                       [0] = { REG_AL,  REGCM_GPR8 },
+                       [0] = { REG_AL,  REGCM_GPR8_LO },
                        [1] = { REG_DX, REGCM_GPR16 },
                },
        },
        [TEMPLATE_OUTB_IMM] = { 
                .rhs = {
-                       [0] = { REG_AL,  REGCM_GPR8 },  
+                       [0] = { REG_AL,  REGCM_GPR8_LO },  
                        [1] = { REG_UNNEEDED, REGCM_IMM8 },
                },
        },
@@ -15779,30 +16142,54 @@ static struct ins_template templates[] = {
                        [2] = { REG_EDX, REGCM_GPR32 },
                },
        },
-       [TEMPLATE_UMUL] = {
-               .lhs = { [0] = { REG_EDXEAX, REGCM_GPR64 } },
+       [TEMPLATE_UMUL8] = {
+               .lhs = { [0] = { REG_AX, REGCM_GPR16 } },
+               .rhs = { 
+                       [0] = { REG_AL, REGCM_GPR8_LO },
+                       [1] = { REG_UNSET, REGCM_GPR8_LO },
+               },
+       },
+       [TEMPLATE_UMUL16] = {
+               .lhs = { [0] = { REG_DXAX, REGCM_DIVIDEND32 } },
+               .rhs = { 
+                       [0] = { REG_AX, REGCM_GPR16 },
+                       [1] = { REG_UNSET, REGCM_GPR16 },
+               },
+       },
+       [TEMPLATE_UMUL32] = {
+               .lhs = { [0] = { REG_EDXEAX, REGCM_DIVIDEND64 } },
                .rhs = { 
                        [0] = { REG_EAX, REGCM_GPR32 },
                        [1] = { REG_UNSET, REGCM_GPR32 },
                },
        },
-       [TEMPLATE_DIV] = {
+       [TEMPLATE_DIV8] = {
                .lhs = { 
-                       [0] = { REG_EAX, REGCM_GPR32 },
-                       [1] = { REG_EDX, REGCM_GPR32 },
+                       [0] = { REG_AL, REGCM_GPR8_LO },
+                       [1] = { REG_AH, REGCM_GPR8 },
                },
                .rhs = {
-                       [0] = { REG_EDXEAX, REGCM_GPR64 },
-                       [1] = { REG_UNSET, REGCM_GPR32 },
+                       [0] = { REG_AX, REGCM_GPR16 },
+                       [1] = { REG_UNSET, REGCM_GPR8_LO },
                },
        },
-       [TEMPLATE_MOD] = {
+       [TEMPLATE_DIV16] = {
                .lhs = { 
-                       [0] = { REG_EDX, REGCM_GPR32 },
-                       [1] = { REG_EAX, REGCM_GPR32 },
+                       [0] = { REG_AX, REGCM_GPR16 },
+                       [1] = { REG_DX, REGCM_GPR16 },
                },
                .rhs = {
-                       [0] = { REG_EDXEAX, REGCM_GPR64 },
+                       [0] = { REG_DXAX, REGCM_DIVIDEND32 },
+                       [1] = { REG_UNSET, REGCM_GPR16 },
+               },
+       },
+       [TEMPLATE_DIV32] = {
+               .lhs = { 
+                       [0] = { REG_EAX, REGCM_GPR32 },
+                       [1] = { REG_EDX, REGCM_GPR32 },
+               },
+               .rhs = {
+                       [0] = { REG_EDXEAX, REGCM_DIVIDEND64 },
                        [1] = { REG_UNSET, REGCM_GPR32 },
                },
        },
@@ -15828,11 +16215,11 @@ static void fixup_branches(struct compile_state *state,
                        branch = entry->member;
                        test = pre_triple(state, branch,
                                cmp->op, cmp->type, left, right);
-                       test->template_id = TEMPLATE_TEST; 
+                       test->template_id = TEMPLATE_TEST32
                        if (cmp->op == OP_CMP) {
-                               test->template_id = TEMPLATE_CMP_REG;
+                               test->template_id = TEMPLATE_CMP32_REG;
                                if (get_imm32(test, &RHS(test, 1))) {
-                                       test->template_id = TEMPLATE_CMP_IMM;
+                                       test->template_id = TEMPLATE_CMP32_IMM;
                                }
                        }
                        use_triple(RHS(test, 0), test);
@@ -15859,11 +16246,11 @@ static void bool_cmp(struct compile_state *state,
 
        /* Modify the comparison operator */
        ins->op = cmp_op;
-       ins->template_id = TEMPLATE_TEST;
+       ins->template_id = TEMPLATE_TEST32;
        if (cmp_op == OP_CMP) {
-               ins->template_id = TEMPLATE_CMP_REG;
+               ins->template_id = TEMPLATE_CMP32_REG;
                if (get_imm32(ins, &RHS(ins, 1))) {
-                       ins->template_id =  TEMPLATE_CMP_IMM;
+                       ins->template_id =  TEMPLATE_CMP32_IMM;
                }
        }
        /* Generate the instruction sequence that will transform the
@@ -15974,6 +16361,47 @@ struct reg_info arch_reg_rhs(struct compile_state *state, struct triple *ins, in
        return result;
 }
 
+static struct triple *mod_div(struct compile_state *state,
+       struct triple *ins, int div_op, int index)
+{
+       struct triple *div, *piece0, *piece1;
+       
+       /* Generate a piece to hold the remainder */
+       piece1 = post_triple(state, ins, OP_PIECE, ins->type, 0, 0);
+       piece1->u.cval = 1;
+
+       /* Generate a piece to hold the quotient */
+       piece0 = post_triple(state, ins, OP_PIECE, ins->type, 0, 0);
+       piece0->u.cval = 0;
+
+       /* Generate the appropriate division instruction */
+       div = post_triple(state, ins, div_op, ins->type, 0, 0);
+       RHS(div, 0) = RHS(ins, 0);
+       RHS(div, 1) = RHS(ins, 1);
+       LHS(div, 0) = piece0;
+       LHS(div, 1) = piece1;
+       div->template_id  = TEMPLATE_DIV32;
+       use_triple(RHS(div, 0), div);
+       use_triple(RHS(div, 1), div);
+       use_triple(LHS(div, 0), div);
+       use_triple(LHS(div, 1), div);
+
+       /* Hook on piece0 */
+       MISC(piece0, 0) = div;
+       use_triple(div, piece0);
+
+       /* Hook on piece1 */
+       MISC(piece1, 0) = div;
+       use_triple(div, piece1);
+       
+       /* Replate uses of ins with the appropriate piece of the div */
+       propogate_use(state, ins, LHS(div, index));
+       release_triple(state, ins);
+
+       /* Return the address of the next instruction */
+       return piece1->next;
+}
+
 static struct triple *transform_to_arch_instruction(
        struct compile_state *state, struct triple *ins)
 {
@@ -16089,38 +16517,45 @@ static struct triple *transform_to_arch_instruction(
        case OP_XOR:
        case OP_OR:
        case OP_SMUL:
-               ins->template_id = TEMPLATE_BINARY_REG;
+               ins->template_id = TEMPLATE_BINARY32_REG;
                if (get_imm32(ins, &RHS(ins, 1))) {
-                       ins->template_id = TEMPLATE_BINARY_IMM;
+                       ins->template_id = TEMPLATE_BINARY32_IMM;
                }
                break;
-#if 0
-               /* This code does not work yet */
+       case OP_SDIVT:
+       case OP_UDIVT:
+               ins->template_id = TEMPLATE_DIV32;
+               next = after_lhs(state, ins);
+               break;
+               /* FIXME UMUL does not work yet.. */
        case OP_UMUL:
-               ins->template_id = TEMPLATE_UMUL;
+               ins->template_id = TEMPLATE_UMUL32;
                break;
        case OP_UDIV:
+               next = mod_div(state, ins, OP_UDIVT, 0);
+               break;
        case OP_SDIV:
-               ins->template_id = TEMPLATE_DIV;
+               next = mod_div(state, ins, OP_SDIVT, 0);
                break;
        case OP_UMOD:
+               next = mod_div(state, ins, OP_UDIVT, 1);
+               break;
        case OP_SMOD:
-               ins->template_id = TEMPLATE_MOD;
+               next = mod_div(state, ins, OP_SDIVT, 1);
                break;
-#endif
        case OP_SL:
        case OP_SSR:
        case OP_USR:
-               ins->template_id = TEMPLATE_SL_CL;
+               ins->template_id = TEMPLATE_SL32_CL;
                if (get_imm8(ins, &RHS(ins, 1))) {
-                       ins->template_id = TEMPLATE_SL_IMM;
+                       ins->template_id = TEMPLATE_SL32_IMM;
                } else if (size_of(state, RHS(ins, 1)->type) > 1) {
                        typed_pre_copy(state, &char_type, ins, 1);
                }
                break;
        case OP_INVERT:
        case OP_NEG:
-               ins->template_id = TEMPLATE_UNARY;
+               ins->template_id = TEMPLATE_UNARY32;
                break;
        case OP_EQ: 
                bool_cmp(state, ins, OP_CMP, OP_JMP_EQ, OP_SET_EQ); 
@@ -16209,12 +16644,12 @@ static struct triple *transform_to_arch_instruction(
                break;
                /* Already transformed instructions */
        case OP_TEST:
-               ins->template_id = TEMPLATE_TEST;
+               ins->template_id = TEMPLATE_TEST32;
                break;
        case OP_CMP:
-               ins->template_id = TEMPLATE_CMP_REG;
+               ins->template_id = TEMPLATE_CMP32_REG;
                if (get_imm32(ins, &RHS(ins, 1))) {
-                       ins->template_id = TEMPLATE_CMP_IMM;
+                       ins->template_id = TEMPLATE_CMP32_IMM;
                }
                break;
        case OP_JMP_EQ:      case OP_JMP_NOTEQ:
@@ -16241,18 +16676,21 @@ static struct triple *transform_to_arch_instruction(
        return next;
 }
 
+static long next_label(struct compile_state *state)
+{
+       static long label_counter = 0;
+       return ++label_counter;
+}
 static void generate_local_labels(struct compile_state *state)
 {
        struct triple *first, *label;
-       int label_counter;
-       label_counter = 0;
        first = RHS(state->main_function, 0);
        label = first;
        do {
                if ((label->op == OP_LABEL) || 
                        (label->op == OP_SDECL)) {
                        if (label->use) {
-                               label->u.cval = ++label_counter;
+                               label->u.cval = next_label(state);
                        } else {
                                label->u.cval = 0;
                        }
@@ -16281,6 +16719,9 @@ static int check_reg(struct compile_state *state,
 
 static const char *arch_reg_str(int reg)
 {
+#if REG_XMM7 != 44
+#error "Registers have renumberd fix arch_reg_str"
+#endif
        static const char *regs[] = {
                "%unset",
                "%unneeded",
@@ -16289,6 +16730,7 @@ static const char *arch_reg_str(int reg)
                "%ax", "%bx", "%cx", "%dx", "%si", "%di", "%bp", "%sp",
                "%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi", "%ebp", "%esp",
                "%edx:%eax",
+               "%dx:%ax",
                "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7",
                "%xmm0", "%xmm1", "%xmm2", "%xmm3", 
                "%xmm4", "%xmm5", "%xmm6", "%xmm7",
@@ -16343,11 +16785,75 @@ static void print_const_val(
        }
 }
 
+static void print_const(struct compile_state *state,
+       struct triple *ins, FILE *fp)
+{
+       switch(ins->op) {
+       case OP_INTCONST:
+               switch(ins->type->type & TYPE_MASK) {
+               case TYPE_CHAR:
+               case TYPE_UCHAR:
+                       fprintf(fp, ".byte 0x%02lx\n", ins->u.cval);
+                       break;
+               case TYPE_SHORT:
+               case TYPE_USHORT:
+                       fprintf(fp, ".short 0x%04lx\n", ins->u.cval);
+                       break;
+               case TYPE_INT:
+               case TYPE_UINT:
+               case TYPE_LONG:
+               case TYPE_ULONG:
+                       fprintf(fp, ".int %lu\n", ins->u.cval);
+                       break;
+               default:
+                       internal_error(state, ins, "Unknown constant type");
+               }
+               break;
+       case OP_ADDRCONST:
+               fprintf(fp, " .int L%s%lu+%lu ",
+                       state->label_prefix,
+                       MISC(ins, 0)->u.cval,
+                       ins->u.cval);
+               break;
+       case OP_BLOBCONST:
+       {
+               unsigned char *blob;
+               size_t size, i;
+               size = size_of(state, ins->type);
+               blob = ins->u.blob;
+               for(i = 0; i < size; i++) {
+                       fprintf(fp, ".byte 0x%02x\n",
+                               blob[i]);
+               }
+               break;
+       }
+       default:
+               internal_error(state, ins, "Unknown constant type");
+               break;
+       }
+}
+
+#define TEXT_SECTION ".rom.text"
+#define DATA_SECTION ".rom.data"
+
+static long get_const_pool_ref(
+       struct compile_state *state, struct triple *ins, FILE *fp)
+{
+       long ref;
+       ref = next_label(state);
+       fprintf(fp, ".section \"" DATA_SECTION "\"\n");
+       fprintf(fp, ".balign %d\n", align_of(state, ins->type));
+       fprintf(fp, "L%s%lu:\n", state->label_prefix, ref);
+       print_const(state, ins, fp);
+       fprintf(fp, ".section \"" TEXT_SECTION "\"\n");
+       return ref;
+}
+
 static void print_binary_op(struct compile_state *state,
        const char *op, struct triple *ins, FILE *fp) 
 {
        unsigned mask;
-       mask = REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8;
+       mask = REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8_LO;
        if (RHS(ins, 0)->id != ins->id) {
                internal_error(state, ins, "invalid register assignment");
        }
@@ -16375,7 +16881,7 @@ static void print_unary_op(struct compile_state *state,
        const char *op, struct triple *ins, FILE *fp)
 {
        unsigned mask;
-       mask = REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8;
+       mask = REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8_LO;
        fprintf(fp, "\t%s %s\n",
                op,
                reg(state, RHS(ins, 0), mask));
@@ -16385,7 +16891,7 @@ static void print_op_shift(struct compile_state *state,
        const char *op, struct triple *ins, FILE *fp)
 {
        unsigned mask;
-       mask = REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8;
+       mask = REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8_LO;
        if (RHS(ins, 0)->id != ins->id) {
                internal_error(state, ins, "invalid register assignment");
        }
@@ -16398,7 +16904,7 @@ static void print_op_shift(struct compile_state *state,
        else {
                fprintf(fp, "\t%s %s, %s\n",
                        op,
-                       reg(state, RHS(ins, 1), REGCM_GPR8),
+                       reg(state, RHS(ins, 1), REGCM_GPR8_LO),
                        reg(state, RHS(ins, 0), mask));
        }
 }
@@ -16410,7 +16916,7 @@ static void print_op_in(struct compile_state *state, struct triple *ins, FILE *f
        int dreg;
        mask = 0;
        switch(ins->op) {
-       case OP_INB: op = "inb", mask = REGCM_GPR8; break;
+       case OP_INB: op = "inb", mask = REGCM_GPR8_LO; break;
        case OP_INW: op = "inw", mask = REGCM_GPR16; break;
        case OP_INL: op = "inl", mask = REGCM_GPR32; break;
        default:
@@ -16448,7 +16954,7 @@ static void print_op_out(struct compile_state *state, struct triple *ins, FILE *
        int lreg;
        mask = 0;
        switch(ins->op) {
-       case OP_OUTB: op = "outb", mask = REGCM_GPR8; break;
+       case OP_OUTB: op = "outb", mask = REGCM_GPR8_LO; break;
        case OP_OUTW: op = "outw", mask = REGCM_GPR16; break;
        case OP_OUTL: op = "outl", mask = REGCM_GPR32; break;
        default:
@@ -16493,10 +16999,6 @@ static void print_op_move(struct compile_state *state,
                src = RHS(ins, 0);
                dst = ins;
        }
-       else if (ins->op == OP_WRITE) {
-               dst = LHS(ins, 0);
-               src = RHS(ins, 0);
-       }
        else {
                internal_error(state, ins, "unknown move operation");
                src = dst = 0;
@@ -16504,13 +17006,13 @@ static void print_op_move(struct compile_state *state,
        if (!is_const(src)) {
                int src_reg, dst_reg;
                int src_regcm, dst_regcm;
-               src_reg = ID_REG(src->id);
+               src_reg   = ID_REG(src->id);
                dst_reg   = ID_REG(dst->id);
                src_regcm = arch_reg_regcm(state, src_reg);
-               dst_regcm   = arch_reg_regcm(state, dst_reg);
+               dst_regcm = arch_reg_regcm(state, dst_reg);
                /* If the class is the same just move the register */
                if (src_regcm & dst_regcm & 
-                       (REGCM_GPR8 | REGCM_GPR16 | REGCM_GPR32)) {
+                       (REGCM_GPR8_LO | REGCM_GPR16 | REGCM_GPR32)) {
                        if ((src_reg != dst_reg) || !omit_copy) {
                                fprintf(fp, "\tmov %s, %s\n",
                                        reg(state, src, src_regcm),
@@ -16539,7 +17041,7 @@ static void print_op_move(struct compile_state *state,
                }
                /* Move 32bit to 8bit */
                else if ((src_regcm & REGCM_GPR32_8) &&
-                       (dst_regcm & REGCM_GPR8))
+                       (dst_regcm & REGCM_GPR8_LO))
                {
                        src_reg = (src_reg - REGC_GPR32_8_FIRST) + REGC_GPR8_FIRST;
                        if ((src_reg != dst_reg) || !omit_copy) {
@@ -16550,7 +17052,7 @@ static void print_op_move(struct compile_state *state,
                }
                /* Move 16bit to 8bit */
                else if ((src_regcm & REGCM_GPR16_8) &&
-                       (dst_regcm & REGCM_GPR8))
+                       (dst_regcm & REGCM_GPR8_LO))
                {
                        src_reg = (src_reg - REGC_GPR16_8_FIRST) + REGC_GPR8_FIRST;
                        if ((src_reg != dst_reg) || !omit_copy) {
@@ -16560,7 +17062,7 @@ static void print_op_move(struct compile_state *state,
                        }
                }
                /* Move 8/16bit to 16/32bit */
-               else if ((src_regcm & (REGCM_GPR8 | REGCM_GPR16)) && 
+               else if ((src_regcm & (REGCM_GPR8_LO | REGCM_GPR16)) && 
                        (dst_regcm & (REGCM_GPR16 | REGCM_GPR32))) {
                        const char *op;
                        op = is_signed(src->type)? "movsx": "movzx";
@@ -16577,15 +17079,26 @@ static void print_op_move(struct compile_state *state,
                                        reg(state, dst, dst_regcm));
                        }
                }
-               /* Move between mmx registers or mmx & sse  registers */
-               else if ((src_regcm & (REGCM_MMX | REGCM_XMM)) &&
-                       (dst_regcm & (REGCM_MMX | REGCM_XMM))) {
+               /* Move between mmx registers */
+               else if ((src_regcm & dst_regcm & REGCM_MMX)) {
                        if ((src_reg != dst_reg) || !omit_copy) {
                                fprintf(fp, "\tmovq %s, %s\n",
                                        reg(state, src, src_regcm),
                                        reg(state, dst, dst_regcm));
                        }
                }
+               /* Move from sse to mmx registers */
+               else if ((src_regcm & REGCM_XMM) && (dst_regcm & REGCM_MMX)) {
+                       fprintf(fp, "\tmovdq2q %s, %s\n",
+                               reg(state, src, src_regcm),
+                               reg(state, dst, dst_regcm));
+               }
+               /* Move from mmx to sse registers */
+               else if ((src_regcm & REGCM_MMX) && (dst_regcm & REGCM_XMM)) {
+                       fprintf(fp, "\tmovq2dq %s, %s\n",
+                               reg(state, src, src_regcm),
+                               reg(state, dst, dst_regcm));
+               }
                /* Move between 32bit gprs & mmx/sse registers */
                else if ((src_regcm & (REGCM_GPR32 | REGCM_MMX | REGCM_XMM)) &&
                        (dst_regcm & (REGCM_GPR32 | REGCM_MMX | REGCM_XMM))) {
@@ -16607,7 +17120,6 @@ static void print_op_move(struct compile_state *state,
                                arch_reg_str(mid_reg),
                                arch_reg_str(dst_reg));
                }
-
                /* Move from mmx/sse registers to 16bit gprs */
                else if ((src_regcm & (REGCM_MMX | REGCM_XMM)) &&
                        (dst_regcm & REGCM_GPR16)) {
@@ -16616,10 +17128,49 @@ static void print_op_move(struct compile_state *state,
                                arch_reg_str(src_reg),
                                arch_reg_str(dst_reg));
                }
-
+               /* Move from gpr to 64bit dividend */
+               else if ((src_regcm & (REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8_LO))  &&
+                       (dst_regcm & REGCM_DIVIDEND64)) {
+                       const char *extend;
+                       extend = is_signed(src->type)? "cltd":"movl $0, %edx";
+                       fprintf(fp, "\tmov %s, %%eax\n\t%s\n",
+                               arch_reg_str(src_reg), 
+                               extend);
+               }
+               /* Move from 64bit gpr to gpr */
+               else if ((src_regcm & REGCM_DIVIDEND64) &&
+                       (dst_regcm & (REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8_LO))) {
+                       if (dst_regcm & REGCM_GPR32) {
+                               src_reg = REG_EAX;
+                       } 
+                       else if (dst_regcm & REGCM_GPR16) {
+                               src_reg = REG_AX;
+                       }
+                       else if (dst_regcm & REGCM_GPR8_LO) {
+                               src_reg = REG_AL;
+                       }
+                       fprintf(fp, "\tmov %s, %s\n",
+                               arch_reg_str(src_reg),
+                               arch_reg_str(dst_reg));
+               }
+               /* Move from mmx/sse registers to 64bit gpr */
+               else if ((src_regcm & (REGCM_MMX | REGCM_XMM)) &&
+                       (dst_regcm & REGCM_DIVIDEND64)) {
+                       const char *extend;
+                       extend = is_signed(src->type)? "cltd": "movl $0, %edx";
+                       fprintf(fp, "\tmovd %s, %%eax\n\t%s\n",
+                               arch_reg_str(src_reg),
+                               extend);
+               }
+               /* Move from 64bit gpr to mmx/sse register */
+               else if ((src_regcm & REGCM_DIVIDEND64) &&
+                       (dst_regcm & (REGCM_XMM | REGCM_MMX))) {
+                       fprintf(fp, "\tmovd %%eax, %s\n",
+                               arch_reg_str(dst_reg));
+               }
 #if X86_4_8BIT_GPRS
                /* Move from 8bit gprs to  mmx/sse registers */
-               else if ((src_regcm & REGCM_GPR8) && (src_reg <= REG_DL) &&
+               else if ((src_regcm & REGCM_GPR8_LO) && (src_reg <= REG_DL) &&
                        (dst_regcm & (REGCM_MMX | REGCM_XMM))) {
                        const char *op;
                        int mid_reg;
@@ -16634,7 +17185,7 @@ static void print_op_move(struct compile_state *state,
                }
                /* Move from mmx/sse registers and 8bit gprs */
                else if ((src_regcm & (REGCM_MMX | REGCM_XMM)) &&
-                       (dst_regcm & REGCM_GPR8) && (dst_reg <= REG_DL)) {
+                       (dst_regcm & REGCM_GPR8_LO) && (dst_reg <= REG_DL)) {
                        int mid_reg;
                        mid_reg = (dst_reg - REGC_GPR8_FIRST) + REGC_GPR32_FIRST;
                        fprintf(fp, "\tmovd %s, %s\n",
@@ -16643,7 +17194,7 @@ static void print_op_move(struct compile_state *state,
                }
                /* Move from 32bit gprs to 8bit gprs */
                else if ((src_regcm & REGCM_GPR32) &&
-                       (dst_regcm & REGCM_GPR8)) {
+                       (dst_regcm & REGCM_GPR8_LO)) {
                        dst_reg = (dst_reg - REGC_GPR8_FIRST) + REGC_GPR32_FIRST;
                        if ((src_reg != dst_reg) || !omit_copy) {
                                fprintf(fp, "\tmov %s, %s\n",
@@ -16653,7 +17204,7 @@ static void print_op_move(struct compile_state *state,
                }
                /* Move from 16bit gprs to 8bit gprs */
                else if ((src_regcm & REGCM_GPR16) &&
-                       (dst_regcm & REGCM_GPR8)) {
+                       (dst_regcm & REGCM_GPR8_LO)) {
                        dst_reg = (dst_reg - REGC_GPR8_FIRST) + REGC_GPR16_FIRST;
                        if ((src_reg != dst_reg) || !omit_copy) {
                                fprintf(fp, "\tmov %s, %s\n",
@@ -16667,10 +17218,44 @@ static void print_op_move(struct compile_state *state,
                }
        }
        else {
-               fprintf(fp, "\tmov ");
-               print_const_val(state, src, fp);
-               fprintf(fp, ", %s\n",
-                       reg(state, dst, REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8));
+               int dst_reg;
+               int dst_regcm;
+               dst_reg = ID_REG(dst->id);
+               dst_regcm = arch_reg_regcm(state, dst_reg);
+               if (dst_regcm & (REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8_LO)) {
+                       fprintf(fp, "\tmov ");
+                       print_const_val(state, src, fp);
+                       fprintf(fp, ", %s\n",
+                               reg(state, dst, REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8_LO));
+               }
+               else if (dst_regcm & REGCM_DIVIDEND64) {
+                       if (size_of(state, dst->type) > 4) {
+                               internal_error(state, ins, "64bit constant...");
+                       }
+                       fprintf(fp, "\tmov $0, %%edx\n");
+                       fprintf(fp, "\tmov ");
+                       print_const_val(state, src, fp);
+                       fprintf(fp, ", %%eax\n");
+               }
+               else if (dst_regcm & REGCM_DIVIDEND32) {
+                       if (size_of(state, dst->type) > 2) {
+                               internal_error(state, ins, "32bit constant...");
+                       }
+                       fprintf(fp, "\tmov $0, %%dx\n");
+                       fprintf(fp, "\tmov ");
+                       print_const_val(state, src, fp);
+                       fprintf(fp, ", %%ax");
+               }
+               else if (dst_regcm & (REGCM_XMM | REGCM_MMX)) {
+                       long ref;
+                       ref = get_const_pool_ref(state, src, fp);
+                       fprintf(fp, "\tmovq L%s%lu, %s\n",
+                               state->label_prefix, ref,
+                               reg(state, dst, (REGCM_XMM | REGCM_MMX)));
+               }
+               else {
+                       internal_error(state, ins, "unknown copy immediate type");
+               }
        }
 }
 
@@ -16685,7 +17270,7 @@ static void print_op_load(struct compile_state *state,
        }
        fprintf(fp, "\tmov (%s), %s\n",
                reg(state, src, REGCM_GPR32),
-               reg(state, dst, REGCM_GPR8 | REGCM_GPR16 | REGCM_GPR32));
+               reg(state, dst, REGCM_GPR8_LO | REGCM_GPR16 | REGCM_GPR32));
 }
 
 
@@ -16693,8 +17278,8 @@ static void print_op_store(struct compile_state *state,
        struct triple *ins, FILE *fp)
 {
        struct triple *dst, *src;
-       dst = LHS(ins, 0);
-       src = RHS(ins, 0);
+       dst = RHS(ins, 0);
+       src = RHS(ins, 1);
        if (is_const(src) && (src->op == OP_INTCONST)) {
                long_t value;
                value = (long_t)(src->u.cval);
@@ -16706,7 +17291,7 @@ static void print_op_store(struct compile_state *state,
        else if (is_const(dst) && (dst->op == OP_INTCONST)) {
                fprintf(fp, "\tmov%s %s, 0x%08lx\n",
                        type_suffix(state, src->type),
-                       reg(state, src, REGCM_GPR8 | REGCM_GPR16 | REGCM_GPR32),
+                       reg(state, src, REGCM_GPR8_LO | REGCM_GPR16 | REGCM_GPR32),
                        dst->u.cval);
        }
        else {
@@ -16715,7 +17300,7 @@ static void print_op_store(struct compile_state *state,
                }
                fprintf(fp, "\tmov%s %s, (%s)\n",
                        type_suffix(state, src->type),
-                       reg(state, src, REGCM_GPR8 | REGCM_GPR16 | REGCM_GPR32),
+                       reg(state, src, REGCM_GPR8_LO | REGCM_GPR16 | REGCM_GPR32),
                        reg(state, dst, REGCM_GPR32));
        }
        
@@ -16742,7 +17327,7 @@ static void print_op_cmp(struct compile_state *state,
 {
        unsigned mask;
        int dreg;
-       mask = REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8;
+       mask = REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8_LO;
        dreg = check_reg(state, ins, REGCM_FLAGS);
        if (!reg_is_reg(state, dreg, REG_EFLAGS)) {
                internal_error(state, ins, "bad dest register for cmp");
@@ -16770,7 +17355,7 @@ static void print_op_test(struct compile_state *state,
        struct triple *ins, FILE *fp)
 {
        unsigned mask;
-       mask = REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8;
+       mask = REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8_LO;
        fprintf(fp, "\ttest %s, %s\n",
                reg(state, RHS(ins, 0), mask),
                reg(state, RHS(ins, 0), mask));
@@ -16857,7 +17442,7 @@ static void print_op_set(struct compile_state *state,
                break;
        }
        fprintf(fp, "\t%s %s\n",
-               sop, reg(state, set, REGCM_GPR8));
+               sop, reg(state, set, REGCM_GPR8_LO));
 }
 
 static void print_op_bit_scan(struct compile_state *state, 
@@ -16883,50 +17468,6 @@ static void print_op_bit_scan(struct compile_state *state,
                reg(state, ins, REGCM_GPR32));
 }
 
-static void print_const(struct compile_state *state,
-       struct triple *ins, FILE *fp)
-{
-       switch(ins->op) {
-       case OP_INTCONST:
-               switch(ins->type->type & TYPE_MASK) {
-               case TYPE_CHAR:
-               case TYPE_UCHAR:
-                       fprintf(fp, ".byte 0x%02lx\n", ins->u.cval);
-                       break;
-               case TYPE_SHORT:
-               case TYPE_USHORT:
-                       fprintf(fp, ".short 0x%04lx\n", ins->u.cval);
-                       break;
-               case TYPE_INT:
-               case TYPE_UINT:
-               case TYPE_LONG:
-               case TYPE_ULONG:
-                       fprintf(fp, ".int %lu\n", ins->u.cval);
-                       break;
-               default:
-                       internal_error(state, ins, "Unknown constant type");
-               }
-               break;
-       case OP_BLOBCONST:
-       {
-               unsigned char *blob;
-               size_t size, i;
-               size = size_of(state, ins->type);
-               blob = ins->u.blob;
-               for(i = 0; i < size; i++) {
-                       fprintf(fp, ".byte 0x%02x\n",
-                               blob[i]);
-               }
-               break;
-       }
-       default:
-               internal_error(state, ins, "Unknown constant type");
-               break;
-       }
-}
-
-#define TEXT_SECTION ".rom.text"
-#define DATA_SECTION ".rom.data"
 
 static void print_sdecl(struct compile_state *state,
        struct triple *ins, FILE *fp)
@@ -16970,7 +17511,6 @@ static void print_instruction(struct compile_state *state,
        case OP_SDECL:
                print_sdecl(state, ins, fp);
                break;
-       case OP_WRITE: 
        case OP_COPY:   
                print_op_move(state, ins, fp);
                break;
@@ -17020,6 +17560,15 @@ static void print_instruction(struct compile_state *state,
        case OP_HLT:
                fprintf(fp, "\thlt\n");
                break;
+       case OP_SDIVT:
+               fprintf(fp, "\tidiv %s\n", reg(state, RHS(ins, 1), REGCM_GPR32));
+               break;
+       case OP_UDIVT:
+               fprintf(fp, "\tdiv %s\n", reg(state, RHS(ins, 1), REGCM_GPR32));
+               break;
+       case OP_UMUL:
+               fprintf(fp, "\tmul %s\n", reg(state, RHS(ins, 1), REGCM_GPR32));
+               break;
        case OP_LABEL:
                if (!ins->use) {
                        return;
@@ -17029,11 +17578,9 @@ static void print_instruction(struct compile_state *state,
                /* Ignore OP_PIECE */
        case OP_PIECE:
                break;
-               /* Operations I am not yet certain how to handle */
-       case OP_UMUL:
+               /* Operations that should never get here */
        case OP_SDIV: case OP_UDIV:
        case OP_SMOD: case OP_UMOD:
-               /* Operations that should never get here */
        case OP_LTRUE:   case OP_LFALSE:  case OP_EQ:      case OP_NOTEQ:
        case OP_SLESS:   case OP_ULESS:   case OP_SMORE:   case OP_UMORE:
        case OP_SLESSEQ: case OP_ULESSEQ: case OP_SMOREEQ: case OP_UMOREEQ:
@@ -17050,6 +17597,8 @@ static void print_instructions(struct compile_state *state)
        int print_location;
        struct occurance *last_occurance;
        FILE *fp;
+       int max_inline_depth;
+       max_inline_depth = 0;
        print_location = 1;
        last_occurance = 0;
        fp = state->output;
@@ -17068,8 +17617,11 @@ static void print_instructions(struct compile_state *state)
                        }
                        else {
                                struct occurance *ptr;
+                               int inline_depth;
                                fprintf(fp, "\t/*\n");
+                               inline_depth = 0;
                                for(ptr = ins->occurance; ptr; ptr = ptr->parent) {
+                                       inline_depth++;
                                        fprintf(fp, "\t * %s,%s:%d.%d\n",
                                                ptr->function,
                                                ptr->filename,
@@ -17077,7 +17629,9 @@ static void print_instructions(struct compile_state *state)
                                                ptr->col);
                                }
                                fprintf(fp, "\t */\n");
-                               
+                               if (inline_depth > max_inline_depth) {
+                                       max_inline_depth = inline_depth;
+                               }
                        }
                        if (last_occurance) {
                                put_occurance(last_occurance);
@@ -17089,8 +17643,12 @@ static void print_instructions(struct compile_state *state)
                print_instruction(state, ins, fp);
                ins = ins->next;
        } while(ins != first);
-       
+       if (print_location) {
+               fprintf(fp, "/* max inline depth %d */\n",
+                       max_inline_depth);
+       }
 }
+
 static void generate_code(struct compile_state *state)
 {
        generate_local_labels(state);
diff --git a/util/romcc/tests/raminit_test5.c b/util/romcc/tests/raminit_test5.c
new file mode 100644 (file)
index 0000000..f386a75
--- /dev/null
@@ -0,0 +1,1392 @@
+
+
+
+
+
+
+
+typedef unsigned char uint8_t;
+typedef signed char int8_t;
+
+typedef unsigned short uint16_t;
+typedef signed short int16_t;
+
+typedef unsigned int uint32_t;
+typedef signed int int32_t;
+
+
+
+
+
+
+
+typedef unsigned char uint_least8_t;
+typedef signed char int_least8_t;
+
+typedef unsigned short uint_least16_t;
+typedef signed short int_least16_t;
+
+typedef unsigned int uint_least32_t;
+typedef signed int int_least32_t;
+
+
+
+
+
+
+
+typedef unsigned char uint_fast8_t;
+typedef signed char int_fast8_t;
+
+typedef unsigned int uint_fast16_t;
+typedef signed int int_fast16_t;
+
+typedef unsigned int uint_fast32_t;
+typedef signed int int_fast32_t;
+
+
+
+
+
+
+
+typedef int intptr_t;
+typedef unsigned int uintptr_t;
+
+
+
+
+
+
+typedef long int intmax_t;
+typedef unsigned long int uintmax_t;
+
+
+
+
+static void outb(unsigned char value, unsigned short port)
+{
+        __builtin_outb(value, port);
+}
+
+static void outw(unsigned short value, unsigned short port)
+{
+        __builtin_outw(value, port);
+}
+
+static void outl(unsigned int value, unsigned short port)
+{
+        __builtin_outl(value, port);
+}
+
+
+static unsigned char inb(unsigned short port)
+{
+        return __builtin_inb(port);
+}
+
+
+static unsigned char inw(unsigned short port)
+{
+        return __builtin_inw(port);
+}
+
+static unsigned char inl(unsigned short port)
+{
+        return __builtin_inl(port);
+}
+
+static void hlt(void)
+{
+        __builtin_hlt();
+}
+
+int log2(int value)
+{
+
+
+
+
+
+
+        return __builtin_bsr(value);
+}
+
+
+typedef __builtin_msr_t msr_t;
+
+static msr_t rdmsr(unsigned long index)
+{
+        return __builtin_rdmsr(index);
+}
+
+static void wrmsr(unsigned long index, msr_t msr)
+{
+        __builtin_wrmsr(index, msr.lo, msr.hi);
+}
+
+typedef unsigned device_t;
+
+static unsigned char pci_read_config8(device_t dev, unsigned where)
+{
+        unsigned addr;
+        addr = dev | where;
+        outl(0x80000000 | (addr & ~3), 0xCF8);
+        return inb(0xCFC + (addr & 3));
+}
+
+static unsigned short pci_read_config16(device_t dev, unsigned where)
+{
+        unsigned addr;
+        addr = dev | where;
+        outl(0x80000000 | (addr & ~3), 0xCF8);
+        return inw(0xCFC + (addr & 2));
+}
+
+static unsigned int pci_read_config32(device_t dev, unsigned where)
+{
+        unsigned addr;
+        addr = dev | where;
+        outl(0x80000000 | (addr & ~3), 0xCF8);
+        return inl(0xCFC);
+}
+
+static void pci_write_config8(device_t dev, unsigned where, unsigned char value)
+{
+        unsigned addr;
+        addr = dev | where;
+        outl(0x80000000 | (addr & ~3), 0xCF8);
+        outb(value, 0xCFC + (addr & 3));
+}
+
+static void pci_write_config16(device_t dev, unsigned where, unsigned short value)
+{
+        unsigned addr;
+        addr = dev | where;
+        outl(0x80000000 | (addr & ~3), 0xCF8);
+        outw(value, 0xCFC + (addr & 2));
+}
+
+static void pci_write_config32(device_t dev, unsigned where, unsigned int value)
+{
+        unsigned addr;
+        addr = dev | where;
+        outl(0x80000000 | (addr & ~3), 0xCF8);
+        outl(value, 0xCFC);
+}
+
+
+static device_t pci_locate_device(unsigned pci_id, device_t dev)
+{
+        for(; dev <= ( (((255) & 0xFF) << 16) | (((31) & 0x1f) << 11) | (((7) & 0x7) << 8)); dev += ( (((0) & 0xFF) << 16) | (((0) & 0x1f) << 11) | (((1) & 0x7) << 8))) {
+                unsigned int id;
+                id = pci_read_config32(dev, 0);
+                if (id == pci_id) {
+                        return dev;
+                }
+        }
+        return (0xffffffffU);
+}
+
+
+
+
+
+static int uart_can_tx_byte(void)
+{
+        return inb(0x3f8 + 0x05) & 0x20;
+}
+
+static void uart_wait_to_tx_byte(void)
+{
+        while(!uart_can_tx_byte())
+                ;
+}
+
+static void uart_wait_until_sent(void)
+{
+        while(!(inb(0x3f8 + 0x05) & 0x40))
+                ;
+}
+
+static void uart_tx_byte(unsigned char data)
+{
+        uart_wait_to_tx_byte();
+        outb(data, 0x3f8 + 0x00);
+
+        uart_wait_until_sent();
+}
+
+static void uart_init(void)
+{
+
+        outb(0x0, 0x3f8 + 0x01);
+
+        outb(0x01, 0x3f8 + 0x02);
+
+        outb(0x80 | 0x3, 0x3f8 + 0x03);
+
+        outb((115200/115200) & 0xFF, 0x3f8 + 0x00);
+        outb(((115200/115200) >> 8) & 0xFF, 0x3f8 + 0x01);
+
+        outb(0x3, 0x3f8 + 0x03);
+}
+
+
+
+
+
+static void __console_tx_byte(unsigned char byte)
+{
+        uart_tx_byte(byte);
+}
+
+static void __console_tx_nibble(unsigned nibble)
+{
+        unsigned char digit;
+        digit = nibble + '0';
+        if (digit > '9') {
+                digit += 39;
+        }
+        __console_tx_byte(digit);
+}
+
+static void __console_tx_char(int loglevel, unsigned char byte)
+{
+        if (8 > loglevel) {
+                uart_tx_byte(byte);
+        }
+}
+
+static void __console_tx_hex8(int loglevel, unsigned char value)
+{
+        if (8 > loglevel) {
+                __console_tx_nibble((value >> 4U) & 0x0fU);
+                __console_tx_nibble(value & 0x0fU);
+        }
+}
+
+static void __console_tx_hex16(int loglevel, unsigned short value)
+{
+        if (8 > loglevel) {
+                __console_tx_nibble((value >> 12U) & 0x0fU);
+                __console_tx_nibble((value >> 8U) & 0x0fU);
+                __console_tx_nibble((value >> 4U) & 0x0fU);
+                __console_tx_nibble(value & 0x0fU);
+        }
+}
+
+static void __console_tx_hex32(int loglevel, unsigned int value)
+{
+        if (8 > loglevel) {
+                __console_tx_nibble((value >> 28U) & 0x0fU);
+                __console_tx_nibble((value >> 24U) & 0x0fU);
+                __console_tx_nibble((value >> 20U) & 0x0fU);
+                __console_tx_nibble((value >> 16U) & 0x0fU);
+                __console_tx_nibble((value >> 12U) & 0x0fU);
+                __console_tx_nibble((value >> 8U) & 0x0fU);
+                __console_tx_nibble((value >> 4U) & 0x0fU);
+                __console_tx_nibble(value & 0x0fU);
+        }
+}
+
+static void __console_tx_string(int loglevel, const char *str)
+{
+        if (8 > loglevel) {
+                unsigned char ch;
+                while((ch = *str++) != '\0') {
+                        __console_tx_byte(ch);
+                }
+        }
+}
+
+static void print_emerg_char(unsigned char byte) { __console_tx_char(0, byte); }
+static void print_emerg_hex8(unsigned char value){ __console_tx_hex8(0, value); }
+static void print_emerg_hex16(unsigned short value){ __console_tx_hex16(0, value); }
+static void print_emerg_hex32(unsigned int value) { __console_tx_hex32(0, value); }
+static void print_emerg(const char *str) { __console_tx_string(0, str); }
+
+static void print_alert_char(unsigned char byte) { __console_tx_char(1, byte); }
+static void print_alert_hex8(unsigned char value) { __console_tx_hex8(1, value); }
+static void print_alert_hex16(unsigned short value){ __console_tx_hex16(1, value); }
+static void print_alert_hex32(unsigned int value) { __console_tx_hex32(1, value); }
+static void print_alert(const char *str) { __console_tx_string(1, str); }
+
+static void print_crit_char(unsigned char byte) { __console_tx_char(2, byte); }
+static void print_crit_hex8(unsigned char value) { __console_tx_hex8(2, value); }
+static void print_crit_hex16(unsigned short value){ __console_tx_hex16(2, value); }
+static void print_crit_hex32(unsigned int value) { __console_tx_hex32(2, value); }
+static void print_crit(const char *str) { __console_tx_string(2, str); }
+
+static void print_err_char(unsigned char byte) { __console_tx_char(3, byte); }
+static void print_err_hex8(unsigned char value) { __console_tx_hex8(3, value); }
+static void print_err_hex16(unsigned short value){ __console_tx_hex16(3, value); }
+static void print_err_hex32(unsigned int value) { __console_tx_hex32(3, value); }
+static void print_err(const char *str) { __console_tx_string(3, str); }
+
+static void print_warning_char(unsigned char byte) { __console_tx_char(4, byte); }
+static void print_warning_hex8(unsigned char value) { __console_tx_hex8(4, value); }
+static void print_warning_hex16(unsigned short value){ __console_tx_hex16(4, value); }
+static void print_warning_hex32(unsigned int value) { __console_tx_hex32(4, value); }
+static void print_warning(const char *str) { __console_tx_string(4, str); }
+
+static void print_notice_char(unsigned char byte) { __console_tx_char(5, byte); }
+static void print_notice_hex8(unsigned char value) { __console_tx_hex8(5, value); }
+static void print_notice_hex16(unsigned short value){ __console_tx_hex16(5, value); }
+static void print_notice_hex32(unsigned int value) { __console_tx_hex32(5, value); }
+static void print_notice(const char *str) { __console_tx_string(5, str); }
+
+static void print_info_char(unsigned char byte) { __console_tx_char(6, byte); }
+static void print_info_hex8(unsigned char value) { __console_tx_hex8(6, value); }
+static void print_info_hex16(unsigned short value){ __console_tx_hex16(6, value); }
+static void print_info_hex32(unsigned int value) { __console_tx_hex32(6, value); }
+static void print_info(const char *str) { __console_tx_string(6, str); }
+
+static void print_debug_char(unsigned char byte) { __console_tx_char(7, byte); }
+static void print_debug_hex8(unsigned char value) { __console_tx_hex8(7, value); }
+static void print_debug_hex16(unsigned short value){ __console_tx_hex16(7, value); }
+static void print_debug_hex32(unsigned int value) { __console_tx_hex32(7, value); }
+static void print_debug(const char *str) { __console_tx_string(7, str); }
+
+static void print_spew_char(unsigned char byte) { __console_tx_char(8, byte); }
+static void print_spew_hex8(unsigned char value) { __console_tx_hex8(8, value); }
+static void print_spew_hex16(unsigned short value){ __console_tx_hex16(8, value); }
+static void print_spew_hex32(unsigned int value) { __console_tx_hex32(8, value); }
+static void print_spew(const char *str) { __console_tx_string(8, str); }
+
+static void console_init(void)
+{
+        static const char console_test[] =
+                "\r\n\r\nLinuxBIOS-"
+                "1.1.0"
+                ".0Fallback"
+                " "
+                "Thu Jun 19 05:42:16 MDT 2003"
+                " starting...\r\n";
+        print_info(console_test);
+}
+
+
+static void die(const char *str)
+{
+        print_emerg(str);
+        do {
+                hlt();
+        } while(1);
+}
+
+
+
+
+
+
+
+
+
+static void write_phys(unsigned long addr, unsigned long value)
+{
+
+        asm volatile(
+                "movnti %1, (%0)"
+                :
+                : "r" (addr), "r" (value)
+                :
+                );
+
+
+
+
+
+}
+
+static unsigned long read_phys(unsigned long addr)
+{
+        volatile unsigned long *ptr;
+        ptr = (void *)addr;
+        return *ptr;
+}
+
+static void ram_fill(unsigned long start, unsigned long stop)
+{
+        unsigned long addr;
+
+
+
+        print_debug("DRAM fill: ");
+        print_debug_hex32(start);
+        print_debug("-");
+        print_debug_hex32(stop);
+        print_debug("\r\n");
+        for(addr = start; addr < stop ; addr += 4) {
+
+                if (!(addr & 0xffff)) {
+                        print_debug_hex32(addr);
+                        print_debug("\r");
+                }
+                write_phys(addr, addr);
+        };
+
+        print_debug_hex32(addr);
+        print_debug("\r\nDRAM filled\r\n");
+}
+
+static void ram_verify(unsigned long start, unsigned long stop)
+{
+        unsigned long addr;
+
+
+
+        print_debug("DRAM verify: ");
+        print_debug_hex32(start);
+        print_debug_char('-');
+        print_debug_hex32(stop);
+        print_debug("\r\n");
+        for(addr = start; addr < stop ; addr += 4) {
+                unsigned long value;
+
+                if (!(addr & 0xffff)) {
+                        print_debug_hex32(addr);
+                        print_debug("\r");
+                }
+                value = read_phys(addr);
+                if (value != addr) {
+
+                        print_err_hex32(addr);
+                        print_err_char(':');
+                        print_err_hex32(value);
+                        print_err("\r\n");
+                }
+        }
+
+        print_debug_hex32(addr);
+        print_debug("\r\nDRAM verified\r\n");
+}
+
+
+void ram_check(unsigned long start, unsigned long stop)
+{
+        int result;
+
+
+
+
+
+        print_debug("Testing DRAM : ");
+        print_debug_hex32(start);
+        print_debug("-");
+        print_debug_hex32(stop);
+        print_debug("\r\n");
+        ram_fill(start, stop);
+        ram_verify(start, stop);
+        print_debug("Done.\n");
+}
+
+
+static void enumerate_ht_chain(void)
+{
+
+
+
+
+
+        unsigned next_unitid, last_unitid;;
+        next_unitid = 1;
+        do {
+                uint32_t id;
+                uint8_t hdr_type, pos;
+                last_unitid = next_unitid;
+
+                id = pci_read_config32(( (((0) & 0xFF) << 16) | (((0) & 0x1f) << 11) | (((0) & 0x7) << 8)), 0x00);
+
+                if (((id & 0xffff) == 0x0000) || ((id & 0xffff) == 0xffff) ||
+                        (((id >> 16) & 0xffff) == 0xffff) ||
+                        (((id >> 16) & 0xffff) == 0x0000)) {
+                        break;
+                }
+                hdr_type = pci_read_config8(( (((0) & 0xFF) << 16) | (((0) & 0x1f) << 11) | (((0) & 0x7) << 8)), 0x0e);
+                pos = 0;
+                hdr_type &= 0x7f;
+
+                if ((hdr_type == 0) ||
+                        (hdr_type == 1)) {
+                        pos = pci_read_config8(( (((0) & 0xFF) << 16) | (((0) & 0x1f) << 11) | (((0) & 0x7) << 8)), 0x34);
+                }
+                while(pos != 0) {
+                        uint8_t cap;
+                        cap = pci_read_config8(( (((0) & 0xFF) << 16) | (((0) & 0x1f) << 11) | (((0) & 0x7) << 8)), pos + 0);
+                        if (cap == 0x08) {
+                                uint16_t flags;
+                                flags = pci_read_config16(( (((0) & 0xFF) << 16) | (((0) & 0x1f) << 11) | (((0) & 0x7) << 8)), pos + 2);
+                                if ((flags >> 13) == 0) {
+                                        unsigned count;
+                                        flags &= ~0x1f;
+                                        flags |= next_unitid & 0x1f;
+                                        count = (flags >> 5) & 0x1f;
+                                        pci_write_config16(( (((0) & 0xFF) << 16) | (((0) & 0x1f) << 11) | (((0) & 0x7) << 8)), pos + 2, flags);
+                                        next_unitid += count;
+                                        break;
+                                }
+                        }
+                        pos = pci_read_config8(( (((0) & 0xFF) << 16) | (((0) & 0x1f) << 11) | (((0) & 0x7) << 8)), pos + 1);
+                }
+        } while((last_unitid != next_unitid) && (next_unitid <= 0x1f));
+}
+
+
+
+static void enable_smbus(void)
+{
+        device_t dev;
+        dev = pci_locate_device(((((0x746b) & 0xFFFF) << 16) | ((0x1022) & 0xFFFF)), 0);
+        if (dev == (0xffffffffU)) {
+                die("SMBUS controller not found\r\n");
+        }
+        uint8_t enable;
+        print_debug("SMBus controller enabled\r\n");
+        pci_write_config32(dev, 0x58, 0x1000 | 1);
+        enable = pci_read_config8(dev, 0x41);
+        pci_write_config8(dev, 0x41, enable | (1 << 7));
+}
+
+
+static inline void smbus_delay(void)
+{
+        outb(0x80, 0x80);
+}
+
+static int smbus_wait_until_ready(void)
+{
+        unsigned long loops;
+        loops = (100*1000*10);
+        do {
+                unsigned short val;
+                smbus_delay();
+                val = inw(0x1000 + 0xe0);
+                if ((val & 0x800) == 0) {
+                        break;
+                }
+        } while(--loops);
+        return loops?0:-1;
+}
+
+static int smbus_wait_until_done(void)
+{
+        unsigned long loops;
+        loops = (100*1000*10);
+        do {
+                unsigned short val;
+                smbus_delay();
+
+                val = inw(0x1000 + 0xe0);
+                if (((val & 0x8) == 0) | ((val & 0x437) != 0)) {
+                        break;
+                }
+        } while(--loops);
+        return loops?0:-1;
+}
+
+static int smbus_read_byte(unsigned device, unsigned address)
+{
+        unsigned char global_control_register;
+        unsigned char global_status_register;
+        unsigned char byte;
+
+        if (smbus_wait_until_ready() < 0) {
+                return -1;
+        }
+
+
+
+        outw(inw(0x1000 + 0xe2) & ~((1<<10)|(1<<9)|(1<<8)|(1<<4)), 0x1000 + 0xe2);
+
+        outw(((device & 0x7f) << 1) | 1, 0x1000 + 0xe4);
+
+        outb(address & 0xFF, 0x1000 + 0xe8);
+
+        outw((inw(0x1000 + 0xe2) & ~7) | (0x2), 0x1000 + 0xe2);
+
+
+
+        outw(inw(0x1000 + 0xe0), 0x1000 + 0xe0);
+
+
+        outw(0, 0x1000 + 0xe6);
+
+
+        outw((inw(0x1000 + 0xe2) | (1 << 3)), 0x1000 + 0xe2);
+
+
+
+        if (smbus_wait_until_done() < 0) {
+                return -1;
+        }
+
+        global_status_register = inw(0x1000 + 0xe0);
+
+
+        byte = inw(0x1000 + 0xe6) & 0xff;
+
+        if (global_status_register != (1 << 4)) {
+                return -1;
+        }
+        return byte;
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+static void setup_resource_map(const unsigned int *register_values, int max)
+{
+        int i;
+        print_debug("setting up resource map....\r\n");
+        for(i = 0; i < max; i += 3) {
+                device_t dev;
+                unsigned where;
+                unsigned long reg;
+
+
+
+
+
+
+                dev = register_values[i] & ~0xff;
+                where = register_values[i] & 0xff;
+                reg = pci_read_config32(dev, where);
+                reg &= register_values[i+1];
+                reg |= register_values[i+2];
+                pci_write_config32(dev, where, reg);
+
+
+
+
+
+
+        }
+        print_debug("done.\r\n");
+}
+
+static void setup_default_resource_map(void)
+{
+        static const unsigned int register_values[] = {
+
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x44) & 0xFF)), 0x0000f8f8, 0x00000000,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x4C) & 0xFF)), 0x0000f8f8, 0x00000001,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x54) & 0xFF)), 0x0000f8f8, 0x00000002,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x5C) & 0xFF)), 0x0000f8f8, 0x00000003,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x64) & 0xFF)), 0x0000f8f8, 0x00000004,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x6C) & 0xFF)), 0x0000f8f8, 0x00000005,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x74) & 0xFF)), 0x0000f8f8, 0x00000006,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x7C) & 0xFF)), 0x0000f8f8, 0x00000007,
+
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x40) & 0xFF)), 0x0000f8fc, 0x00000000,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x48) & 0xFF)), 0x0000f8fc, 0x00000000,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x50) & 0xFF)), 0x0000f8fc, 0x00000000,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x58) & 0xFF)), 0x0000f8fc, 0x00000000,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x60) & 0xFF)), 0x0000f8fc, 0x00000000,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x68) & 0xFF)), 0x0000f8fc, 0x00000000,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x70) & 0xFF)), 0x0000f8fc, 0x00000000,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x78) & 0xFF)), 0x0000f8fc, 0x00000000,
+
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x84) & 0xFF)), 0x00000048, 0x00000000,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x8C) & 0xFF)), 0x00000048, 0x00000000,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x94) & 0xFF)), 0x00000048, 0x00000000,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x9C) & 0xFF)), 0x00000048, 0x00000000,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xA4) & 0xFF)), 0x00000048, 0x00000000,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xAC) & 0xFF)), 0x00000048, 0x00000000,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xB4) & 0xFF)), 0x00000048, 0x00000000,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xBC) & 0xFF)), 0x00000048, 0x00000000,
+
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x80) & 0xFF)), 0x000000f0, 0x00000000,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x88) & 0xFF)), 0x000000f0, 0x00000000,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x90) & 0xFF)), 0x000000f0, 0x00000000,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x98) & 0xFF)), 0x000000f0, 0x00000000,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xA0) & 0xFF)), 0x000000f0, 0x00000000,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xA8) & 0xFF)), 0x000000f0, 0x00000000,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xB0) & 0xFF)), 0x000000f0, 0x00000000,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xB8) & 0xFF)), 0x000000f0, 0x00000000,
+
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xC4) & 0xFF)), 0xFE000FC8, 0x01fff000,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xCC) & 0xFF)), 0xFE000FC8, 0x00000000,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xD4) & 0xFF)), 0xFE000FC8, 0x00000000,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xDC) & 0xFF)), 0xFE000FC8, 0x00000000,
+
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xC0) & 0xFF)), 0xFE000FCC, 0x00000003,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xC8) & 0xFF)), 0xFE000FCC, 0x00000000,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xD0) & 0xFF)), 0xFE000FCC, 0x00000000,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xD8) & 0xFF)), 0xFE000FCC, 0x00000000,
+
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xE0) & 0xFF)), 0x0000FC88, 0xff000003,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xE4) & 0xFF)), 0x0000FC88, 0x00000000,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xE8) & 0xFF)), 0x0000FC88, 0x00000000,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xEC) & 0xFF)), 0x0000FC88, 0x00000000,
+        };
+        int max;
+        max = sizeof(register_values)/sizeof(register_values[0]);
+        setup_resource_map(register_values, max);
+}
+
+static void sdram_set_registers(void)
+{
+        static const unsigned int register_values[] = {
+
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x44) & 0xFF)), 0x0000f8f8, 0x003f0000,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x4C) & 0xFF)), 0x0000f8f8, 0x00000001,
+
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x54) & 0xFF)), 0x0000f8f8, 0x00000002,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x5C) & 0xFF)), 0x0000f8f8, 0x00000003,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x64) & 0xFF)), 0x0000f8f8, 0x00000004,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x6C) & 0xFF)), 0x0000f8f8, 0x00000005,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x74) & 0xFF)), 0x0000f8f8, 0x00000006,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x7C) & 0xFF)), 0x0000f8f8, 0x00000007,
+
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x40) & 0xFF)), 0x0000f8fc, 0x00000003,
+
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x48) & 0xFF)), 0x0000f8fc, 0x00400000,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x50) & 0xFF)), 0x0000f8fc, 0x00400000,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x58) & 0xFF)), 0x0000f8fc, 0x00400000,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x60) & 0xFF)), 0x0000f8fc, 0x00400000,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x68) & 0xFF)), 0x0000f8fc, 0x00400000,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x70) & 0xFF)), 0x0000f8fc, 0x00400000,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x78) & 0xFF)), 0x0000f8fc, 0x00400000,
+
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x84) & 0xFF)), 0x00000048, 0x00e1ff00,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x8C) & 0xFF)), 0x00000048, 0x00dfff00,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x94) & 0xFF)), 0x00000048, 0x00e3ff00,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x9C) & 0xFF)), 0x00000048, 0x00000000,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xA4) & 0xFF)), 0x00000048, 0x00000000,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xAC) & 0xFF)), 0x00000048, 0x00000000,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xB4) & 0xFF)), 0x00000048, 0x00000b00,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xBC) & 0xFF)), 0x00000048, 0x00fe0b00,
+
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x80) & 0xFF)), 0x000000f0, 0x00e00003,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x88) & 0xFF)), 0x000000f0, 0x00d80003,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x90) & 0xFF)), 0x000000f0, 0x00e20003,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x98) & 0xFF)), 0x000000f0, 0x00000000,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xA0) & 0xFF)), 0x000000f0, 0x00000000,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xA8) & 0xFF)), 0x000000f0, 0x00000000,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xB0) & 0xFF)), 0x000000f0, 0x00000a03,
+
+
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xB8) & 0xFF)), 0x000000f0, 0x00400003,
+
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xC4) & 0xFF)), 0xFE000FC8, 0x0000d000,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xCC) & 0xFF)), 0xFE000FC8, 0x000ff000,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xD4) & 0xFF)), 0xFE000FC8, 0x00000000,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xDC) & 0xFF)), 0xFE000FC8, 0x00000000,
+
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xC0) & 0xFF)), 0xFE000FCC, 0x0000d003,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xC8) & 0xFF)), 0xFE000FCC, 0x00001013,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xD0) & 0xFF)), 0xFE000FCC, 0x00000000,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xD8) & 0xFF)), 0xFE000FCC, 0x00000000,
+
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xE0) & 0xFF)), 0x0000FC88, 0xff000003,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xE4) & 0xFF)), 0x0000FC88, 0x00000000,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xE8) & 0xFF)), 0x0000FC88, 0x00000000,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xEC) & 0xFF)), 0x0000FC88, 0x00000000,
+
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x07) << 8) | ((0x40) & 0xFF)), 0x001f01fe, 0x00000000,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x07) << 8) | ((0x44) & 0xFF)), 0x001f01fe, 0x00000000,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x07) << 8) | ((0x48) & 0xFF)), 0x001f01fe, 0x00000000,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x07) << 8) | ((0x4C) & 0xFF)), 0x001f01fe, 0x00000000,
+
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x07) << 8) | ((0x50) & 0xFF)), 0x001f01fe, 0x00000000,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x07) << 8) | ((0x54) & 0xFF)), 0x001f01fe, 0x00000000,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x07) << 8) | ((0x58) & 0xFF)), 0x001f01fe, 0x00000000,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x07) << 8) | ((0x5C) & 0xFF)), 0x001f01fe, 0x00000000,
+
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x07) << 8) | ((0x60) & 0xFF)), 0xC01f01ff, 0x00000000,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x07) << 8) | ((0x64) & 0xFF)), 0xC01f01ff, 0x00000000,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x07) << 8) | ((0x68) & 0xFF)), 0xC01f01ff, 0x00000000,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x07) << 8) | ((0x6C) & 0xFF)), 0xC01f01ff, 0x00000000,
+
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x07) << 8) | ((0x70) & 0xFF)), 0xC01f01ff, 0x00000000,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x07) << 8) | ((0x74) & 0xFF)), 0xC01f01ff, 0x00000000,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x07) << 8) | ((0x78) & 0xFF)), 0xC01f01ff, 0x00000000,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x07) << 8) | ((0x7C) & 0xFF)), 0xC01f01ff, 0x00000000,
+
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x07) << 8) | ((0x80) & 0xFF)), 0xffff8888, 0x00000000,
+
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x07) << 8) | ((0x88) & 0xFF)), 0xe8088008, 0x03623125,
+
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x07) << 8) | ((0x8c) & 0xFF)), 0xff8fe08e, 0x00000930,
+
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x07) << 8) | ((0x90) & 0xFF)), 0xf0000000,
+        (4 << 25)|(0 << 24)|
+        (0 << 23)|(0 << 22)|(0 << 21)|(0 << 20)|
+        (1 << 19)|(1 << 18)|(0 << 17)|(0 << 16)|
+        (2 << 14)|(0 << 13)|(0 << 12)|
+        (0 << 11)|(0 << 10)|(0 << 9)|(0 << 8)|
+        (0 << 3) |(0 << 1) |(0 << 0),
+
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x07) << 8) | ((0x94) & 0xFF)), 0xc180f0f0, 0x0e2b0a05,
+
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x07) << 8) | ((0x98) & 0xFF)), 0xfc00ffff, 0x00000000,
+
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((3) & 0x07) << 8) | ((0x58) & 0xFF)), 0xffe0e0e0, 0x00000000,
+
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((3) & 0x07) << 8) | ((0x5C) & 0xFF)), 0x0000003e, 0x00000000,
+
+
+
+
+
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((3) & 0x07) << 8) | ((0x60) & 0xFF)), 0xffffff00, 0x00000000,
+        };
+        int i;
+        int max;
+        print_debug("setting up CPU0 northbridge registers\r\n");
+        max = sizeof(register_values)/sizeof(register_values[0]);
+        for(i = 0; i < max; i += 3) {
+                device_t dev;
+                unsigned where;
+                unsigned long reg;
+
+
+
+
+
+
+                dev = register_values[i] & ~0xff;
+                where = register_values[i] & 0xff;
+                reg = pci_read_config32(dev, where);
+                reg &= register_values[i+1];
+                reg |= register_values[i+2];
+                pci_write_config32(dev, where, reg);
+
+
+
+
+
+
+
+        }
+        print_debug("done.\r\n");
+}
+
+
+struct dimm_size {
+        unsigned long side1;
+        unsigned long side2;
+};
+static struct dimm_size spd_get_dimm_size(unsigned device)
+{
+
+        struct dimm_size sz;
+        int value, low;
+        sz.side1 = 0;
+        sz.side2 = 0;
+
+
+
+
+
+        value = smbus_read_byte(device, 3);
+        if (value < 0) return sz;
+        sz.side1 += value & 0xf;
+
+        value = smbus_read_byte(device, 4);
+        if (value < 0) return sz;
+        sz.side1 += value & 0xf;
+
+        value = smbus_read_byte(device, 17);
+        if (value < 0) return sz;
+        sz.side1 += log2(value & 0xff);
+
+
+        value = smbus_read_byte(device, 7);
+        if (value < 0) return sz;
+        value &= 0xff;
+        value <<= 8;
+
+        low = smbus_read_byte(device, 6);
+        if (low < 0) return sz;
+        value = value | (low & 0xff);
+        sz.side1 += log2(value);
+
+
+        value = smbus_read_byte(device, 5);
+        if (value <= 1) return sz;
+
+
+        sz.side2 = sz.side1;
+
+        value = smbus_read_byte(device, 3);
+        if (value < 0) return sz;
+        if ((value & 0xf0) == 0) return sz;
+        sz.side2 -= (value & 0x0f);
+        sz.side2 += ((value >> 4) & 0x0f);
+
+        value = smbus_read_byte(device, 4);
+        if (value < 0) return sz;
+        sz.side2 -= (value & 0x0f);
+        sz.side2 += ((value >> 4) & 0x0f);
+        return sz;
+}
+
+static unsigned spd_to_dimm(unsigned device)
+{
+        return (device - (0xa << 3));
+}
+
+static void set_dimm_size(struct dimm_size sz, unsigned index)
+{
+        uint32_t base0, base1, map;
+
+
+        print_debug("set_dimm_size: (");
+        print_debug_hex32(sz.side1);
+        print_debug_char(',');
+        print_debug_hex32(sz.side2);
+        print_debug_char(',');
+        print_debug_hex32(index);
+        print_debug(")\r\n");
+
+        if (sz.side1 != sz.side2) {
+                sz.side2 = 0;
+        }
+        map = pci_read_config32(( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x7) << 8)), 0x80);
+        map &= ~(0xf << (index + 4));
+
+
+
+
+
+
+
+        base0 = base1 = 0;
+
+
+        if (sz.side1 >= (25 + 3)) {
+                base0 = (1 << ((sz.side1 - (25 + 3)) + 21)) | 1;
+                map |= (sz.side1 - (25 + 3)) << (index *4);
+        }
+
+
+        if (sz.side2 >= (25 + 3)) {
+                base1 = (1 << ((sz.side2 - (25 + 3)) + 21)) | 1;
+        }
+
+
+        pci_write_config32(( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x7) << 8)), 0x40 + (((index << 1)+0)<<2), base0);
+        pci_write_config32(( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x7) << 8)), 0x40 + (((index << 1)+1)<<2), base1);
+        pci_write_config32(( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x7) << 8)), 0x80, map);
+}
+
+static void spd_set_ram_size(void)
+{
+        unsigned device;
+        for(device = (0xa << 3);
+                device <= ((0xa << 3) +1);
+                device += 1)
+        {
+                struct dimm_size sz;
+                sz = spd_get_dimm_size(device);
+                set_dimm_size(sz, spd_to_dimm(device));
+        }
+}
+
+static void set_top_mem(unsigned tom_k)
+{
+
+        if (!tom_k) {
+                die("No memory");
+        }
+
+        msr_t msr;
+        msr.lo = (tom_k & 0x003fffff) << 10;
+        msr.hi = (tom_k & 0xffc00000) >> 22;
+        wrmsr(0xC001001A, msr);
+
+
+
+
+
+
+
+}
+
+static void order_dimms(void)
+{
+        unsigned long tom;
+        unsigned mask;
+        unsigned index;
+
+
+        tom = 0;
+        for(;;) {
+
+                unsigned canidate;
+                uint32_t csbase, csmask;
+                unsigned size;
+                csbase = 0;
+                canidate = 0;
+                for(index = 0; index < 8; index++) {
+                        uint32_t value;
+                        value = pci_read_config32(( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x7) << 8)), 0x40 + (index << 2));
+
+
+                        if (!(value & 1)) {
+                                continue;
+                        }
+
+
+                        if (value <= csbase) {
+                                continue;
+                        }
+
+
+                        if (tom & (1 << (index + 24))) {
+                                continue;
+                        }
+
+                        csbase = value;
+                        canidate = index;
+                }
+
+                if (csbase == 0) {
+                        break;
+                }
+
+
+                tom |= (1 << (canidate + 24));
+
+
+                size = csbase >> 21;
+
+
+                csbase = (tom << 21) | 1;
+
+
+                tom += size;
+
+
+                csmask = ((size -1) << 21);
+                csmask |= 0xfe00;
+
+
+                pci_write_config32(( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x7) << 8)), 0x40 + (canidate << 2), csbase);
+
+                pci_write_config32(( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x7) << 8)), 0x60 + (canidate << 2), csmask);
+
+        }
+        set_top_mem((tom & ~0xff000000) << 15);
+}
+
+static void spd_set_dram_timing(void)
+{
+
+}
+
+static void spd_set_ecc_mode(void)
+{
+        unsigned long dcl;
+        dcl = pci_read_config32(( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x7) << 8)), 0x90);
+
+        dcl &= ~(1<<17);
+        pci_write_config32(( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x7) << 8)), 0x90, dcl);
+
+}
+static void sdram_set_spd_registers(void)
+{
+        spd_set_ram_size();
+        spd_set_dram_timing();
+        spd_set_ecc_mode();
+        order_dimms();
+}
+
+
+static void sdram_enable(void)
+{
+        unsigned long dcl;
+
+
+        dcl = pci_read_config32(( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x7) << 8)), 0x90);
+        print_debug("dcl: ");
+        print_debug_hex32(dcl);
+        print_debug("\r\n");
+        dcl |= (1<<3);
+        pci_write_config32(( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x7) << 8)), 0x90, dcl);
+        dcl &= ~(1<<3);
+        dcl &= ~(1<<0);
+        dcl &= ~(1<<1);
+        dcl &= ~(1<<2);
+        dcl |= (1<<8);
+        pci_write_config32(( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x7) << 8)), 0x90, dcl);
+
+        print_debug("Initializing memory: ");
+        int loops = 0;
+        do {
+                dcl = pci_read_config32(( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x7) << 8)), 0x90);
+                loops += 1;
+                if ((loops & 1023) == 0) {
+                        print_debug(".");
+                }
+        } while(((dcl & (1<<8)) != 0) && (loops < 300000));
+        if (loops >= 300000) {
+                print_debug(" failed\r\n");
+        } else {
+                print_debug(" done\r\n");
+        }
+
+}
+
+static void sdram_first_normal_reference(void) {}
+static void sdram_enable_refresh(void) {}
+static void sdram_special_finishup(void) {}
+
+
+static void setup_coherent_ht_domain(void)
+{
+        static const unsigned int register_values[] = {
+
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((0) & 0x07) << 8) | ((0x40) & 0xFF)), 0xfff0f0f0, 0x00010101,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((0) & 0x07) << 8) | ((0x44) & 0xFF)), 0xfff0f0f0, 0x00010101,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((0) & 0x07) << 8) | ((0x48) & 0xFF)), 0xfff0f0f0, 0x00010101,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((0) & 0x07) << 8) | ((0x4c) & 0xFF)), 0xfff0f0f0, 0x00010101,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((0) & 0x07) << 8) | ((0x50) & 0xFF)), 0xfff0f0f0, 0x00010101,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((0) & 0x07) << 8) | ((0x54) & 0xFF)), 0xfff0f0f0, 0x00010101,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((0) & 0x07) << 8) | ((0x58) & 0xFF)), 0xfff0f0f0, 0x00010101,
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((0) & 0x07) << 8) | ((0x5c) & 0xFF)), 0xfff0f0f0, 0x00010101,
+
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((0) & 0x07) << 8) | ((0x68) & 0xFF)), 0x00800000, 0x0f00840f,
+
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((0) & 0x07) << 8) | ((0x6C) & 0xFF)), 0xffffff8c, 0x00000000 | (1 << 6) |(1 << 5)| (1 << 4),
+
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((0) & 0x07) << 8) | ((0x84) & 0xFF)), 0x00009c05, 0x11110020,
+
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((0) & 0x07) << 8) | ((0x88) & 0xFF)), 0xfffff0ff, 0x00000200,
+
+        ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((0) & 0x07) << 8) | ((0x94) & 0xFF)), 0xff000000, 0x00ff0000,
+
+
+
+
+
+        };
+        int i;
+        int max;
+        print_debug("setting up coherent ht domain....\r\n");
+        max = sizeof(register_values)/sizeof(register_values[0]);
+        for(i = 0; i < max; i += 3) {
+                device_t dev;
+                unsigned where;
+                unsigned long reg;
+
+
+
+
+
+
+                dev = register_values[i] & ~0xff;
+                where = register_values[i] & 0xff;
+                reg = pci_read_config32(dev, where);
+                reg &= register_values[i+1];
+                reg |= register_values[i+2];
+                pci_write_config32(dev, where, reg);
+
+
+
+
+
+
+        }
+        print_debug("done.\r\n");
+}
+
+
+void sdram_no_memory(void)
+{
+        print_err("No memory!!\r\n");
+        while(1) {
+                hlt();
+        }
+}
+
+
+void sdram_initialize(void)
+{
+        print_debug("Ram1\r\n");
+
+        sdram_set_registers();
+
+        print_debug("Ram2\r\n");
+
+        sdram_set_spd_registers();
+
+        print_debug("Ram3\r\n");
+
+
+
+
+        sdram_enable();
+
+        print_debug("Ram4\r\n");
+        sdram_first_normal_reference();
+
+        print_debug("Ram5\r\n");
+        sdram_enable_refresh();
+        sdram_special_finishup();
+
+        print_debug("Ram6\r\n");
+}
+
+
+static int boot_cpu(void)
+{
+        volatile unsigned long *local_apic;
+        unsigned long apic_id;
+        int bsp;
+        msr_t msr;
+        msr = rdmsr(0x1b);
+        bsp = !!(msr.lo & (1 << 8));
+        if (bsp) {
+                print_debug("Bootstrap processor\r\n");
+        } else {
+                print_debug("Application processor\r\n");
+        }
+
+        return bsp;
+}
+
+static int cpu_init_detected(void)
+{
+        unsigned long dcl;
+        int cpu_init;
+
+        unsigned long htic;
+
+        htic = pci_read_config32(( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((0) & 0x7) << 8)), 0x6c);
+
+        cpu_init = (htic & (1<<6));
+        if (cpu_init) {
+                print_debug("CPU INIT Detected.\r\n");
+        }
+        return cpu_init;
+}
+
+
+static void print_debug_pci_dev(unsigned dev)
+{
+        print_debug("PCI: ");
+        print_debug_hex8((dev >> 16) & 0xff);
+        print_debug_char(':');
+        print_debug_hex8((dev >> 11) & 0x1f);
+        print_debug_char('.');
+        print_debug_hex8((dev >> 8) & 7);
+}
+
+static void print_pci_devices(void)
+{
+        device_t dev;
+        for(dev = ( (((0) & 0xFF) << 16) | (((0) & 0x1f) << 11) | (((0) & 0x7) << 8));
+                dev <= ( (((0) & 0xFF) << 16) | (((0x1f) & 0x1f) << 11) | (((0x7) & 0x7) << 8));
+                dev += ( (((0) & 0xFF) << 16) | (((0) & 0x1f) << 11) | (((1) & 0x7) << 8))) {
+                uint32_t id;
+                id = pci_read_config32(dev, 0x00);
+                if (((id & 0xffff) == 0x0000) || ((id & 0xffff) == 0xffff) ||
+                        (((id >> 16) & 0xffff) == 0xffff) ||
+                        (((id >> 16) & 0xffff) == 0x0000)) {
+                        continue;
+                }
+                print_debug_pci_dev(dev);
+                print_debug("\r\n");
+        }
+}
+
+
+static void dump_pci_device(unsigned dev)
+{
+        int i;
+        print_debug_pci_dev(dev);
+        print_debug("\r\n");
+
+        for(i = 0; i <= 255; i++) {
+                unsigned char val;
+                if ((i & 0x0f) == 0) {
+                        print_debug_hex8(i);
+                        print_debug_char(':');
+                }
+                val = pci_read_config8(dev, i);
+                print_debug_char(' ');
+                print_debug_hex8(val);
+                if ((i & 0x0f) == 0x0f) {
+                        print_debug("\r\n");
+                }
+        }
+}
+
+static void dump_spd_registers(void)
+{
+        unsigned device;
+        device = (0xa << 3);
+        print_debug("\r\n");
+        while(device <= ((0xa << 3) +1)) {
+                int i;
+                print_debug("dimm: ");
+                print_debug_hex8(device);
+                for(i = 0; i < 256; i++) {
+                        int status;
+                        unsigned char byte;
+                        if ((i & 0xf) == 0) {
+                                print_debug("\r\n");
+                                print_debug_hex8(i);
+                                print_debug(": ");
+                        }
+                        status = smbus_read_byte(device, i);
+                        if (status < 0) {
+                                print_debug("bad device\r\n");
+                                break;
+                        }
+                        byte = status & 0xff;
+                        print_debug_hex8(byte);
+                        print_debug_char(' ');
+                }
+                device += 1;
+                print_debug("\r\n");
+        }
+}
+
+
+static void main(void)
+{
+        uart_init();
+        console_init();
+
+
+
+
+
+
+
+        if (boot_cpu() && !cpu_init_detected()) {
+                setup_default_resource_map();
+                setup_coherent_ht_domain();
+                enumerate_ht_chain();
+                print_pci_devices();
+                enable_smbus();
+                sdram_initialize();
+
+                dump_spd_registers();
+                dump_pci_device(( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x7) << 8)));
+
+
+                msr_t msr;
+                msr = rdmsr(0xC001001A);
+                print_debug("TOP_MEM: ");
+                print_debug_hex32(msr.hi);
+                print_debug_hex32(msr.lo);
+                print_debug("\r\n");
+                ram_check(0x00000000, msr.lo);
+        }
+}
diff --git a/util/romcc/tests/simple_test48.c b/util/romcc/tests/simple_test48.c
new file mode 100644 (file)
index 0000000..779ecfd
--- /dev/null
@@ -0,0 +1,13 @@
+
+static void main(void)
+{
+       int i;
+       i = __builtin_inb(0x1234);
+       goto next;
+       int j;
+       j = __builtin_inb(0xabcd);
+       __builtin_outb(j, 0xef90);
+ next:
+       __builtin_outb(i, 0x5678);
+               
+}
diff --git a/util/romcc/tests/simple_test49.c b/util/romcc/tests/simple_test49.c
new file mode 100644 (file)
index 0000000..a5a12c1
--- /dev/null
@@ -0,0 +1,15 @@
+
+static void main(void)
+{
+       int i;
+       i = __builtin_inb(0x1234);
+       if (i == 23) {
+               for(;;) {
+                       int j;
+                       j = __builtin_inb(0xabcd);
+                       __builtin_outb(j, 0xef90);
+               }
+       }
+       __builtin_outb(i, 0x5678);
+               
+}
diff --git a/util/romcc/tests/simple_test50.c b/util/romcc/tests/simple_test50.c
new file mode 100644 (file)
index 0000000..51c7c50
--- /dev/null
@@ -0,0 +1,43 @@
+typedef __builtin_div_t div_t;
+typedef __builtin_ldiv_t ldiv_t;
+typedef __builtin_udiv_t udiv_t;
+typedef __builtin_uldiv_t uldiv_t;
+
+static div_t div(int numer, int denom) 
+{ 
+       return __builtin_div(numer, denom); 
+}
+static ldiv_t ldiv(long numer, long denom) 
+{ 
+       return __builtin_ldiv(numer, denom); 
+}
+static udiv_t udiv(unsigned numer, unsigned denom)
+{ 
+       return __builtin_udiv(numer, denom); 
+}
+static uldiv_t uldiv(unsigned long numer, unsigned long denom)
+{
+       return __builtin_uldiv(numer, denom);
+}
+
+static void main(void)
+{
+       volatile long *sval = (volatile long *)0x1234;
+       volatile unsigned long *uval = (volatile unsigned long *)0x5678;
+       long int a, b, c;
+       unsigned long e, f, g;
+       ldiv_t lresult;
+       uldiv_t ulresult;
+
+       a = sval[0];
+       b = sval[1];
+       lresult = ldiv(a,b);
+       sval[2] = lresult.quot;
+       sval[3] = lresult.rem;
+
+       e = uval[0];
+       f = uval[1];
+       ulresult = uldiv(e, f);
+       uval[2] = ulresult.quot;
+       uval[3] = ulresult.rem;
+}
diff --git a/util/romcc/tests/simple_test51.c b/util/romcc/tests/simple_test51.c
new file mode 100644 (file)
index 0000000..424ed7a
--- /dev/null
@@ -0,0 +1,11 @@
+static void main(void)
+{
+       unsigned long a,b,c, d;
+       volatile unsigned long *val = (volatile unsigned long *)0x1234;
+       a = val[0];
+       b = val[1];
+       d = val[2];
+       c = a*b;
+       val[3] = c;
+       a = c / d;
+}
diff --git a/util/romcc/tests/simple_test52.c b/util/romcc/tests/simple_test52.c
new file mode 100644 (file)
index 0000000..3349de2
--- /dev/null
@@ -0,0 +1,5 @@
+static void main(void)
+{
+        if (__builtin_inb(0x1b)) {
+        }
+}
diff --git a/util/romcc/tests/simple_test53.c b/util/romcc/tests/simple_test53.c
new file mode 100644 (file)
index 0000000..a27f986
--- /dev/null
@@ -0,0 +1,10 @@
+static void main(void)
+{
+       unsigned dev;
+       dev = __builtin_inl(0xcd);
+       if (dev == (0xffffffffU)) {
+               do {
+                       __builtin_hlt();
+               } while(1);
+       }
+}
diff --git a/util/romcc/tests/simple_test54.c b/util/romcc/tests/simple_test54.c
new file mode 100644 (file)
index 0000000..ec3208f
--- /dev/null
@@ -0,0 +1,771 @@
+struct syscall_result {
+       long val;
+       int errno;
+};
+
+static struct syscall_result syscall_return(long result)
+{
+       struct syscall_result res;
+       if (((unsigned long)result) >= ((unsigned long)-125)) {
+               res.errno = - result;
+               res.val = -1;
+       } else {
+               res.errno = 0;
+               res.val = result;
+       }
+       return res;
+}
+
+static struct syscall_result syscall0(unsigned long nr)
+{
+       long res;
+       asm volatile(
+               "int $0x80"
+               : "=a" (res)
+               : "a" (nr));
+       return syscall_return(res);
+}
+
+static struct syscall_result syscall1(unsigned long nr, unsigned long arg1)
+{
+       long res;
+       asm volatile(
+               "int $0x80"
+               : "=a" (res)
+               : "a" (nr), "b" (arg1));
+       return syscall_return(res);
+       
+}
+
+static struct syscall_result syscall2(unsigned long nr, unsigned long arg1, unsigned long arg2)
+{
+       long res;
+       asm volatile(
+               "int $0x80"
+               : "=a" (res)
+               : "a" (nr), "b" (arg1), "c" (arg2));
+       return syscall_return(res);
+       
+}
+
+
+static struct syscall_result syscall3(unsigned long nr, unsigned long arg1, unsigned long arg2,
+       unsigned long arg3)
+{
+       long res;
+       asm volatile(
+               "int $0x80"
+               : "=a" (res)
+               : "a" (nr), "b" (arg1), "c" (arg2), "d" (arg3));
+       return syscall_return(res);
+       
+}
+
+static struct syscall_result syscall4(unsigned long nr, unsigned long arg1, unsigned long arg2,
+       unsigned long arg3, unsigned long arg4)
+{
+       long res;
+       asm volatile(
+               "int $0x80"
+               : "=a" (res)
+               : "a" (nr), "b" (arg1), "c" (arg2), "d" (arg3), "S" (arg4));
+       return syscall_return(res);
+       
+}
+
+static struct syscall_result syscall5(unsigned long nr, unsigned long arg1, unsigned long arg2,
+       unsigned long arg3, unsigned long arg4, unsigned long arg5)
+{
+       long res;
+       asm volatile(
+               "int $0x80"
+               : "=a" (res)
+               : "a" (nr), "b" (arg1), "c" (arg2), "d" (arg3), 
+               "S" (arg4), "D" (arg5));
+       return syscall_return(res);
+       
+}
+
+#define NR_exit                 1
+#define NR_fork                 2
+#define NR_read                 3
+#define NR_write                4
+#define NR_open                 5
+#define NR_close                6
+#define NR_waitpid              7
+#define NR_creat                8
+#define NR_link                 9
+#define NR_unlink              10
+#define NR_execve              11
+#define NR_chdir               12
+#define NR_time                13
+#define NR_mknod               14
+#define NR_chmod               15
+#define NR_lchown              16
+#define NR_break               17
+#define NR_oldstat             18
+#define NR_lseek               19
+#define NR_getpid              20
+#define NR_mount               21
+#define NR_umount              22
+#define NR_setuid              23
+#define NR_getuid              24
+#define NR_stime               25
+#define NR_ptrace              26
+#define NR_alarm               27
+#define NR_oldfstat            28
+#define NR_pause               29
+#define NR_utime               30
+#define NR_stty                31
+#define NR_gtty                32
+#define NR_access              33
+#define NR_nice                34
+#define NR_ftime               35
+#define NR_sync                36
+#define NR_kill                37
+#define NR_rename              38
+#define NR_mkdir               39
+#define NR_rmdir               40
+#define NR_dup                 41
+#define NR_pipe                42
+#define NR_times               43
+#define NR_prof                44
+#define NR_brk                 45
+#define NR_setgid              46
+#define NR_getgid              47
+#define NR_signal              48
+#define NR_geteuid             49
+#define NR_getegid             50
+#define NR_acct                51
+#define NR_umount2             52
+#define NR_lock                53
+#define NR_ioctl               54
+#define NR_fcntl               55
+#define NR_mpx                 56
+#define NR_setpgid             57
+#define NR_ulimit              58
+#define NR_oldolduname         59
+#define NR_umask               60
+#define NR_chroot              61
+#define NR_ustat               62
+#define NR_dup2                63
+#define NR_getppid             64
+#define NR_getpgrp             65
+#define NR_setsid              66
+#define NR_sigaction           67
+#define NR_sgetmask            68
+#define NR_ssetmask            69
+#define NR_setreuid            70
+#define NR_setregid            71
+#define NR_sigsuspend          72
+#define NR_sigpending          73
+#define NR_sethostname         74
+#define NR_setrlimit           75
+#define NR_getrlimit           76
+#define NR_getrusage           77
+#define NR_gettimeofday        78
+#define NR_settimeofday        79
+#define NR_getgroups           80
+#define NR_setgroups           81
+#define NR_select              82
+#define NR_symlink             83
+#define NR_oldlstat            84
+#define NR_readlink            85
+#define NR_uselib              86
+#define NR_swapon              87
+#define NR_reboot              88
+#define NR_readdir             89
+#define NR_mmap                90
+#define NR_munmap              91
+#define NR_truncate            92
+#define NR_ftruncate           93
+#define NR_fchmod              94
+#define NR_fchown              95
+#define NR_getpriority         96
+#define NR_setpriority         97
+#define NR_profil              98
+#define NR_statfs              99
+#define NR_fstatfs            100
+#define NR_ioperm             101
+#define NR_socketcall         102
+#define NR_syslog             103
+#define NR_setitimer          104
+#define NR_getitimer          105
+#define NR_stat               106
+#define NR_lstat              107
+#define NR_fstat              108
+#define NR_olduname           109
+#define NR_iopl               110
+#define NR_vhangup            111
+#define NR_idle               112
+#define NR_vm86old            113
+#define NR_wait4              114
+#define NR_swapoff            115
+#define NR_sysinfo            116
+#define NR_ipc                117
+#define NR_fsync              118
+#define NR_sigreturn          119
+#define NR_clone              120
+#define NR_setdomainname      121
+#define NR_uname              122
+#define NR_modify_ldt         123
+#define NR_adjtimex           124
+#define NR_mprotect           125
+#define NR_sigprocmask        126
+#define NR_create_module      127
+#define NR_init_module        128
+#define NR_delete_module      129
+#define NR_get_kernel_syms    130
+#define NR_quotactl           131
+#define NR_getpgid            132
+#define NR_fchdir             133
+#define NR_bdflush            134
+#define NR_sysfs              135
+#define NR_personality        136
+#define NR_afs_syscall        137 /* Syscall for Andrew File System */
+#define NR_setfsuid           138
+#define NR_setfsgid           139
+#define NR__llseek            140
+#define NR_getdents           141
+#define NR__newselect         142
+#define NR_flock              143
+#define NR_msync              144
+#define NR_readv              145
+#define NR_writev             146
+#define NR_getsid             147
+#define NR_fdatasync          148
+#define NR__sysctl            149
+#define NR_mlock              150
+#define NR_munlock            151
+#define NR_mlockall           152
+#define NR_munlockall         153
+#define NR_sched_setparam             154
+#define NR_sched_getparam             155
+#define NR_sched_setscheduler         156
+#define NR_sched_getscheduler         157
+#define NR_sched_yield                158
+#define NR_sched_get_priority_max     159
+#define NR_sched_get_priority_min     160
+#define NR_sched_rr_get_interval      161
+#define NR_nanosleep          162
+#define NR_mremap             163
+#define NR_setresuid          164
+#define NR_getresuid          165
+#define NR_vm86               166
+#define NR_query_module       167
+#define NR_poll               168
+#define NR_nfsservctl         169
+#define NR_setresgid          170
+#define NR_getresgid          171
+#define NR_prctl              172
+#define NR_rt_sigreturn       173
+#define NR_rt_sigaction       174
+#define NR_rt_sigprocmask     175
+#define NR_rt_sigpending      176
+#define NR_rt_sigtimedwait    177
+#define NR_rt_sigqueueinfo    178
+#define NR_rt_sigsuspend      179
+#define NR_pread              180
+#define NR_pwrite             181
+#define NR_chown              182
+#define NR_getcwd             183
+#define NR_capget             184
+#define NR_capset             185
+#define NR_sigaltstack        186
+#define NR_sendfile           187
+#define NR_getpmsg            188     /* some people actually want streams */
+#define NR_putpmsg            189     /* some people actually want streams */
+#define NR_vfork              190
+
+typedef long ssize_t;
+typedef unsigned long size_t;
+
+/* Standard file descriptors */
+#define STDIN_FILENO    0  /* Standard input */
+#define STDOUT_FILENO   1  /* Standard output */
+#define STDERR_FILENO   2  /* Standard error output */
+
+static ssize_t write(int fd, const void *buf, size_t count)
+{
+       struct syscall_result res;
+       res = syscall3(NR_write, fd, (unsigned long)buf, count);
+       return res.val;
+}
+
+static void _exit(int status)
+{
+       struct syscall_result res;
+       res = syscall1(NR_exit, status);
+}
+
+static const char *addr_of_char(unsigned char ch)
+{
+       static const char byte[] = {
+               0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 
+               0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+               0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 
+               0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
+               0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 
+               0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
+               0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 
+               0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
+               0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 
+               0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
+               0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 
+               0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
+               0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 
+               0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
+               0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 
+               0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
+               0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 
+               0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+               0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 
+               0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
+               0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 
+               0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
+               0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 
+               0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
+               0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 
+               0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
+               0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 
+               0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
+               0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 
+               0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
+               0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 
+               0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
+       };
+       return byte + ch;
+}
+
+static void console_tx_byte(unsigned char ch)
+{
+       write(STDOUT_FILENO, addr_of_char(ch), 1);
+}
+
+static void console_tx_nibble(unsigned nibble)
+{
+       unsigned char digit;
+       digit = nibble + '0';
+       if (digit > '9') {
+               digit += 39;
+       }
+       console_tx_byte(digit);
+}
+
+static void console_tx_char(unsigned char byte)
+{
+       console_tx_byte(byte);
+}
+
+static void console_tx_hex8(unsigned char value)
+{
+       console_tx_nibble((value >> 4U) & 0x0fU);
+       console_tx_nibble(value & 0x0fU);
+}
+
+static void console_tx_hex16(unsigned short value)
+{
+       console_tx_nibble((value >> 12U) & 0x0FU);
+       console_tx_nibble((value >>  8U) & 0x0FU);
+       console_tx_nibble((value >>  4U) & 0x0FU);
+       console_tx_nibble(value & 0x0FU);
+}
+
+static void console_tx_hex32(unsigned short value)
+{
+       console_tx_nibble((value >> 28U) & 0x0FU);
+       console_tx_nibble((value >> 24U) & 0x0FU);
+       console_tx_nibble((value >> 20U) & 0x0FU);
+       console_tx_nibble((value >> 16U) & 0x0FU);
+       console_tx_nibble((value >> 12U) & 0x0FU);
+       console_tx_nibble((value >>  8U) & 0x0FU);
+       console_tx_nibble((value >>  4U) & 0x0FU);
+       console_tx_nibble(value & 0x0FU);
+}
+
+static void console_tx_string(const char *str)
+{
+       unsigned char ch;
+       while((ch = *str++) != '\0') {
+               console_tx_byte(ch);
+       }
+}
+
+static void print_emerg_char(unsigned char byte) { console_tx_char(byte); }
+static void print_emerg_hex8(unsigned char value) { console_tx_hex8(value); }
+static void print_emerg_hex16(unsigned short value){ console_tx_hex16(value); }
+static void print_emerg_hex32(unsigned int value) { console_tx_hex32(value); }
+static void print_emerg(const char *str) { console_tx_string(str); }
+
+static void print_debug_char(unsigned char byte) { console_tx_char(byte); }
+static void print_debug_hex8(unsigned char value) { console_tx_hex8(value); }
+static void print_debug_hex16(unsigned short value){ console_tx_hex16(value); }
+static void print_debug_hex32(unsigned int value) { console_tx_hex32(value); }
+static void print_debug(const char *str) { console_tx_string(str); }
+
+
+int log2(int value)
+{
+       /* __builtin_bsr is a exactly equivalent to the x86 machine
+        * instruction with the exception that it returns -1  
+        * when the value presented to it is zero.
+        * Otherwise __builtin_bsr returns the zero based index of
+        * the highest bit set.
+        */
+       return __builtin_bsr(value);
+}
+
+
+static void die(const char *str)
+{
+       print_emerg(str);
+       do {
+               asm(" ");
+       } while(1);
+
+}
+
+static int smbus_read_byte(unsigned device, unsigned address)
+{
+       static const unsigned char dimm[] = {
+0x80, 0x08, 0x07, 0x0d, 0x0a, 0x02, 0x48, 0x00, 0x04, 0x60, 0x70, 0x02, 0x82, 0x08, 0x08, 0x01,
+0x0e, 0x04, 0x0c, 0x01, 0x02, 0x20, 0x00, 0x75, 0x70, 0x00, 0x00, 0x48, 0x30, 0x48, 0x2a, 0x40,
+0x80, 0x80, 0x45, 0x45, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x33,
+0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+
+0x80, 0x08, 0x07, 0x0d, 0x0a, 0x02, 0x48, 0x00, 0x04, 0x60, 0x70, 0x02, 0x82, 0x08, 0x08, 0x01,
+0x0e, 0x04, 0x0c, 0x01, 0x02, 0x20, 0x00, 0x75, 0x70, 0x00, 0x00, 0x48, 0x30, 0x48, 0x2a, 0x40,
+0x80, 0x80, 0x45, 0x45, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x33,
+0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+       };
+       return dimm[(device << 8) + address];
+}
+
+#define SMBUS_MEM_DEVICE_START 0x00
+#define SMBUS_MEM_DEVICE_END   0x01
+#define SMBUS_MEM_DEVICE_INC   1
+
+/* Function 2 */
+#define DRAM_CONFIG_HIGH   0x94
+#define  DCH_MEMCLK_SHIFT  20
+#define  DCH_MEMCLK_MASK   7
+#define  DCH_MEMCLK_100MHZ 0
+#define  DCH_MEMCLK_133MHZ 2
+#define  DCH_MEMCLK_166MHZ 5
+#define  DCH_MEMCLK_200MHZ 7
+
+/* Function 3 */
+#define NORTHBRIDGE_CAP    0xE8
+#define  NBCAP_128Bit         0x0001
+#define  NBCAP_MP             0x0002
+#define  NBCAP_BIG_MP         0x0004
+#define  NBCAP_ECC            0x0004
+#define  NBCAP_CHIPKILL_ECC   0x0010
+#define  NBCAP_MEMCLK_SHIFT   5
+#define  NBCAP_MEMCLK_MASK    3
+#define  NBCAP_MEMCLK_100MHZ  3
+#define  NBCAP_MEMCLK_133MHZ  2
+#define  NBCAP_MEMCLK_166MHZ  1
+#define  NBCAP_MEMCLK_200MHZ  0
+#define  NBCAP_MEMCTRL        0x0100
+
+typedef unsigned char uint8_t;
+typedef unsigned int uint32_t;
+
+static unsigned spd_to_dimm(unsigned device)
+{
+       return (device - SMBUS_MEM_DEVICE_START);
+}
+
+static void disable_dimm(unsigned index)
+{
+       print_debug("disabling dimm"); 
+       print_debug_hex8(index); 
+       print_debug("\r\n");
+#if 0
+       pci_write_config32(PCI_DEV(0, 0x18, 2), DRAM_CSBASE + (((index << 1)+0)<<2), 0);
+       pci_write_config32(PCI_DEV(0, 0x18, 2), DRAM_CSBASE + (((index << 1)+1)<<2), 0);
+#endif
+}
+
+
+struct mem_param {
+       uint8_t cycle_time;
+       uint32_t dch_memclk;
+};
+
+static const struct mem_param *get_mem_param(unsigned min_cycle_time)
+{
+       static const struct mem_param speed[] = {
+               {
+                       .cycle_time = 0xa0,
+                       .dch_memclk = DCH_MEMCLK_100MHZ << DCH_MEMCLK_SHIFT,
+               },
+               {
+                       .cycle_time = 0x75,
+                       .dch_memclk = DCH_MEMCLK_133MHZ << DCH_MEMCLK_SHIFT,
+               },
+               {
+                       .cycle_time = 0x60,
+                       .dch_memclk = DCH_MEMCLK_166MHZ << DCH_MEMCLK_SHIFT,
+               },
+               {
+                       .cycle_time = 0x50,
+                       .dch_memclk = DCH_MEMCLK_200MHZ << DCH_MEMCLK_SHIFT,
+               },
+               {
+                       .cycle_time = 0x00,
+               },
+       };
+       const struct mem_param *param;
+       for(param = &speed[0]; param->cycle_time ; param++) {
+               if (min_cycle_time > (param+1)->cycle_time) {
+                       break;
+               }
+       }
+       if (!param->cycle_time) {
+               die("min_cycle_time to low");
+       }
+       return param;
+}
+
+#if 1
+static void debug(int c)
+{
+       print_debug_char(c);
+       print_debug_char('\r');
+       print_debug_char('\n');
+}
+#endif
+static const struct mem_param *spd_set_memclk(void)
+{
+       /* Compute the minimum cycle time for these dimms */
+       const struct mem_param *param;
+       unsigned min_cycle_time, min_latency;
+       unsigned device;
+       uint32_t value;
+
+       static const int latency_indicies[] = { 26, 23, 9 };
+       static const unsigned char min_cycle_times[] = {
+               [NBCAP_MEMCLK_200MHZ] = 0x50, /* 5ns */
+               [NBCAP_MEMCLK_166MHZ] = 0x60, /* 6ns */
+               [NBCAP_MEMCLK_133MHZ] = 0x75, /* 7.5ns */
+               [NBCAP_MEMCLK_100MHZ] = 0xa0, /* 10ns */
+       };
+
+
+#if 0
+       value = pci_read_config32(PCI_DEV(0, 0x18, 3), NORTHBRIDGE_CAP);
+#else
+       value = 0x50;
+#endif
+       min_cycle_time = min_cycle_times[(value >> NBCAP_MEMCLK_SHIFT) & NBCAP_MEMCLK_MASK];
+       min_latency = 2;
+
+#if 1
+       print_debug("min_cycle_time: "); 
+       print_debug_hex8(min_cycle_time); 
+       print_debug(" min_latency: ");
+       print_debug_hex8(min_latency);
+       print_debug("\r\n");
+#endif
+
+       /* Compute the least latency with the fastest clock supported
+        * by both the memory controller and the dimms.
+        */
+       for(device = SMBUS_MEM_DEVICE_START;
+               device <= SMBUS_MEM_DEVICE_END;
+               device += SMBUS_MEM_DEVICE_INC)
+       {
+               int new_cycle_time, new_latency;
+               int index;
+               int latencies;
+               int latency;
+
+               debug('A');
+               /* First find the supported CAS latencies
+                * Byte 18 for DDR SDRAM is interpreted:
+                * bit 0 == CAS Latency = 1.0
+                * bit 1 == CAS Latency = 1.5
+                * bit 2 == CAS Latency = 2.0
+                * bit 3 == CAS Latency = 2.5
+                * bit 4 == CAS Latency = 3.0
+                * bit 5 == CAS Latency = 3.5
+                * bit 6 == TBD
+                * bit 7 == TBD
+                */
+               new_cycle_time = 0xa0;
+               new_latency = 5;
+
+               latencies = smbus_read_byte(device, 18);
+               if (latencies <= 0) continue;
+
+               debug('B');
+               /* Compute the lowest cas latency supported */
+               latency = log2(latencies) -2;
+
+               /* Loop through and find a fast clock with a low latency */
+               for(index = 0; index < 3; index++, latency++) {
+                       int value;
+                       debug('C');
+                       if ((latency < 2) || (latency > 4) ||
+                               (!(latencies & (1 << latency)))) {
+                               continue;
+                       }
+                       debug('D');
+                       value = smbus_read_byte(device, latency_indicies[index]);
+                       if (value < 0) continue;
+
+                       debug('E');
+                       /* Only increase the latency if we decreas the clock */
+                       if ((value >= min_cycle_time) && (value < new_cycle_time)) {
+                               new_cycle_time = value;
+                               new_latency = latency;
+#if 1
+                               print_debug("device: ");
+                               print_debug_hex8(device);
+                               print_debug(" new_cycle_time: "); 
+                               print_debug_hex8(new_cycle_time); 
+                               print_debug(" new_latency: ");
+                               print_debug_hex8(new_latency);
+                               print_debug("\r\n");
+#endif
+                       }
+                       debug('G');
+               }
+               debug('H');
+#if 1
+               print_debug("device: ");
+               print_debug_hex8(device);
+               print_debug(" new_cycle_time: "); 
+               print_debug_hex8(new_cycle_time); 
+               print_debug(" new_latency: ");
+               print_debug_hex8(new_latency);
+               print_debug("\r\n");
+#endif
+               if (new_latency > 4){
+                       continue;
+               }
+               debug('I');
+               /* Does min_latency need to be increased? */
+               if (new_cycle_time > min_cycle_time) {
+                       min_cycle_time = new_cycle_time;
+               }
+               /* Does min_cycle_time need to be increased? */
+               if (new_latency > min_latency) {
+                       min_latency = new_latency;
+               }
+#if 1
+               print_debug("device: ");
+               print_debug_hex8(device);
+               print_debug(" min_cycle_time: "); 
+               print_debug_hex8(min_cycle_time); 
+               print_debug(" min_latency: ");
+               print_debug_hex8(min_latency);
+               print_debug("\r\n");
+#endif
+       }
+       /* Make a second pass through the dimms and disable
+        * any that cannot support the selected memclk and cas latency.
+        */
+       for(device = SMBUS_MEM_DEVICE_START;
+               device <= SMBUS_MEM_DEVICE_END;
+               device += SMBUS_MEM_DEVICE_INC)
+       {
+               int latencies;
+               int latency;
+               int index;
+               int value;
+               int dimm;
+               latencies = smbus_read_byte(device, 18);
+               if (latencies <= 0) {
+                       goto dimm_err;
+               }
+
+               /* Compute the lowest cas latency supported */
+               latency = log2(latencies) -2;
+
+               /* Walk through searching for the selected latency */
+               for(index = 0; index < 3; index++, latency++) {
+                       if (!(latencies & (1 << latency))) {
+                               continue;
+                       }
+                       if (latency == min_latency)
+                               break;
+               }
+               /* If I can't find the latency or my index is bad error */
+               if ((latency != min_latency) || (index >= 3)) {
+                       goto dimm_err;
+               }
+               
+               /* Read the min_cycle_time for this latency */
+               value = smbus_read_byte(device, latency_indicies[index]);
+               
+               /* All is good if the selected clock speed 
+                * is what I need or slower.
+                */
+               if (value <= min_cycle_time) {
+                       continue;
+               }
+               /* Otherwise I have an error, disable the dimm */
+       dimm_err:
+               disable_dimm(spd_to_dimm(device));
+       }
+#if 1
+       print_debug("min_cycle_time: "); 
+       print_debug_hex8(min_cycle_time); 
+       print_debug(" min_latency: ");
+       print_debug_hex8(min_latency);
+       print_debug("\r\n");
+#endif
+       /* Now that I know the minimum cycle time lookup the memory parameters */
+       param = get_mem_param(min_cycle_time);
+
+#if 0
+       /* Update DRAM Config High with our selected memory speed */
+       value = pci_read_config32(PCI_DEV(0, 0x18, 2), DRAM_CONFIG_HIGH);
+       value &= ~(DCH_MEMCLK_MASK << DCH_MEMCLK_SHIFT);
+       value |= param->dch_memclk;
+       pci_write_config32(PCI_DEV(0, 0x18, 2), DRAM_CONFIG_HIGH, value);
+
+       static const unsigned latencies[] = { 1, 5, 2 };
+       /* Update DRAM Timing Low wiht our selected cas latency */
+       value = pci_read_config32(PCI_DEV(0, 0x18, 2), DRAM_CONFIG_LOW);
+       value &= ~7;
+       value |= latencies[min_latency - 2];
+       pci_write_config32(PCI_DEV(0, 0x18, 2), DRAM_CONFIG_LOW, value);
+#endif
+       
+       return param;
+}
+
+static void main(void)
+{
+       const struct mem_param *param;
+       param = spd_set_memclk();
+       _exit(0);
+}