Move C labels to start-of-line
[coreboot.git] / src / northbridge / amd / amdk8 / raminit.c
index e34c93d5c72a46455c4a6476e037a907cfa656b3..9cb7c60004b7d1eddefab539af5c68b695c8ec5b 100644 (file)
@@ -4,69 +4,53 @@
        2005.02 yhlu add E0 memory hole support
 */
 
-#include <cpu/x86/mem.h>
 #include <cpu/x86/cache.h>
 #include <cpu/x86/mtrr.h>
+#include <stdlib.h>
+#include <reset.h>
 #include "raminit.h"
 #include "amdk8.h"
-
-#if (CONFIG_LB_MEM_TOPK & (CONFIG_LB_MEM_TOPK -1)) != 0
-# error "CONFIG_LB_MEM_TOPK must be a power of 2"
+#if CONFIG_HAVE_OPTION_TABLE
+#include "option_table.h"
 #endif
 
-#ifndef K8_4RANK_DIMM_SUPPORT
-#define K8_4RANK_DIMM_SUPPORT 0
+#if (CONFIG_RAMTOP & (CONFIG_RAMTOP -1)) != 0
+# error "CONFIG_RAMTOP must be a power of 2"
 #endif
 
-#if 1
-static void setup_resource_map(const unsigned int *register_values, int max)
+void setup_resource_map(const unsigned int *register_values, int max)
 {
        int i;
-//     print_debug("setting up resource map....");
-#if 0
-       print_debug("\r\n");
-#endif
-       for(i = 0; i < max; i += 3) {
+//     printk(BIOS_DEBUG, "setting up resource map....");
+       for (i = 0; i < max; i += 3) {
                device_t dev;
                unsigned where;
                unsigned long reg;
-#if 0
-       #if CONFIG_USE_INIT
-               prink_debug("%08x <- %08x\r\n", register_values[i], register_values[i+2]);
-       #else
-               print_debug_hex32(register_values[i]);
-               print_debug(" <-");
-               print_debug_hex32(register_values[i+2]);
-               print_debug("\r\n");
-       #endif
-#endif
-               dev = register_values[i] & ~0xff;
-               where = register_values[i] & 0xff;
+               dev = register_values[i] & ~0xfff;
+               where = register_values[i] & 0xfff;
                reg = pci_read_config32(dev, where);
                reg &= register_values[i+1];
                reg |= register_values[i+2];
                pci_write_config32(dev, where, reg);
-#if 0
-               reg = pci_read_config32(register_values[i]);
-               reg &= register_values[i+1];
-               reg |= register_values[i+2] & ~register_values[i+1];
-               pci_write_config32(register_values[i], reg);
-#endif
        }
-//     print_debug("done.\r\n");
+//     printk(BIOS_DEBUG, "done.\n");
 }
-#endif
 
 static int controller_present(const struct mem_controller *ctrl)
 {
-        return pci_read_config32(ctrl->f0, 0) == 0x11001022;
+       return pci_read_config32(ctrl->f0, 0) == 0x11001022;
 }
 
+#if CONFIG_RAMINIT_SYSINFO
+static void sdram_set_registers(const struct mem_controller *ctrl, struct sys_info *sysinfo)
+#else
 static void sdram_set_registers(const struct mem_controller *ctrl)
+#endif
 {
        static const unsigned int register_values[] = {
 
-       /* Careful set limit registers before base registers which contain the enables */
+       /* Careful set limit registers before base registers which
+          contain the enables */
        /* DRAM Limit i Registers
         * F1:0x44 i = 0
         * F1:0x4C i = 1
@@ -186,7 +170,7 @@ static void sdram_set_registers(const struct mem_controller *ctrl)
         * [29:21] Address Mask (33-25)
         *         The bits with an address mask of 1 are excluded from address comparison
         * [31:30] Reserved
-        * 
+        *
         */
        PCI_ADDR(0, 0x18, 2, 0x60), 0xC01f01ff, 0x00000000,
        PCI_ADDR(0, 0x18, 2, 0x64), 0xC01f01ff, 0x00000000,
@@ -199,7 +183,7 @@ static void sdram_set_registers(const struct mem_controller *ctrl)
        /* DRAM Bank Address Mapping Register
         * F2:0x80
         * Specify the memory module size
-        * [ 2: 0] CS1/0 
+        * [ 2: 0] CS1/0
         * [ 6: 4] CS3/2
         * [10: 8] CS5/4
         * [14:12] CS7/6
@@ -210,7 +194,7 @@ static void sdram_set_registers(const struct mem_controller *ctrl)
         *         100 = 512Mbyte (Rows = 13 & Col = 11)|(Rows = 14 & Col = 10)
         *         101 = 1Gbyte   (Rows = 14 & Col = 11)|(Rows = 13 & Col = 12)
         *         110 = 2Gbyte   (Rows = 14 & Col = 12)
-        *         111 = reserved 
+        *         111 = reserved
         * [ 3: 3] Reserved
         * [ 7: 7] Reserved
         * [11:11] Reserved
@@ -326,8 +310,8 @@ static void sdram_set_registers(const struct mem_controller *ctrl)
         *         0 = Disabled
         *         1 = Enabled
         * [ 3: 3] Disable DQS Hystersis  (FIXME handle this one carefully)
-        *         0 = Enable DQS input filter 
-        *         1 = Disable DQS input filtering 
+        *         0 = Enable DQS input filter
+        *         1 = Disable DQS input filtering
         * [ 7: 4] Reserved
         * [ 8: 8] DRAM_Init
         *         0 = Initialization done or not yet started.
@@ -384,12 +368,12 @@ static void sdram_set_registers(const struct mem_controller *ctrl)
         *         111 = Oldest entry in DCQ can be bypassed 7 times
         * [31:28] Reserved
         */
-       PCI_ADDR(0, 0x18, 2, 0x90), 0xf0000000, 
-       (4 << 25)|(0 << 24)| 
-       (0 << 23)|(0 << 22)|(0 << 21)|(0 << 20)| 
-       (1 << 19)|(0 << 18)|(1 << 17)|(0 << 16)| 
-       (2 << 14)|(0 << 13)|(0 << 12)| 
-       (0 << 11)|(0 << 10)|(0 << 9)|(0 << 8)| 
+       PCI_ADDR(0, 0x18, 2, 0x90), 0xf0000000,
+       (4 << 25)|(0 << 24)|
+       (0 << 23)|(0 << 22)|(0 << 21)|(0 << 20)|
+       (1 << 19)|(0 << 18)|(1 << 17)|(0 << 16)|
+       (2 << 14)|(0 << 13)|(0 << 12)|
+       (0 << 11)|(0 << 10)|(0 << 9)|(0 << 8)|
        (0 << 3) |(0 << 1) |(0 << 0),
        /* DRAM Config High Register
         * F2:0x94
@@ -474,6 +458,14 @@ static void sdram_set_registers(const struct mem_controller *ctrl)
         * [31:26] Reserved
         */
        PCI_ADDR(0, 0x18, 2, 0x98), 0xfc00ffff, 0x00000000,
+       /* MCA NB Status Low reg */
+       PCI_ADDR(0, 0x18, 3, 0x48), 0x00f00000, 0x00000000,
+       /* MCA NB Status high reg */
+       PCI_ADDR(0, 0x18, 3, 0x4c), 0x01801e8c, 0x00000000,
+       /* MCA NB address Low reg */
+       PCI_ADDR(0, 0x18, 3, 0x50), 0x00000007, 0x00000000,
+       /* MCA NB address high reg */
+       PCI_ADDR(0, 0x18, 3, 0x54), 0xffffff00, 0x00000000,
        /* DRAM Scrub Control Register
         * F3:0x58
         * [ 4: 0] DRAM Scrube Rate
@@ -527,48 +519,26 @@ static void sdram_set_registers(const struct mem_controller *ctrl)
        int i;
        int max;
 
-#if 1
-        if (!controller_present(ctrl)) {
-//                print_debug("No memory controller present\r\n");
-                return;
-        }
-#endif
-       print_spew("setting up CPU");
-       print_spew_hex8(ctrl->node_id);
-       print_spew(" northbridge registers\r\n");
-       max = sizeof(register_values)/sizeof(register_values[0]);
-       for(i = 0; i < max; i += 3) {
+       if (!controller_present(ctrl)) {
+//             printk(BIOS_DEBUG, "No memory controller present\n");
+               return;
+       }
+       printk(BIOS_SPEW, "setting up CPU%02x northbridge registers\n", ctrl->node_id);
+       max = ARRAY_SIZE(register_values);
+       for (i = 0; i < max; i += 3) {
                device_t dev;
                unsigned where;
                unsigned long reg;
-#if 0
-        #if CONFIG_USE_INIT
-                prink_debug("%08x <- %08x\r\n", register_values[i], register_values[i+2]);
-        #else
-               print_spew_hex32(register_values[i]);
-               print_spew(" <-");
-               print_spew_hex32(register_values[i+2]);
-               print_spew("\r\n");
-       #endif
-#endif
-               dev = (register_values[i] & ~0xff) - PCI_DEV(0, 0x18, 0) + ctrl->f0;
-               where = register_values[i] & 0xff;
+               dev = (register_values[i] & ~0xfff) - PCI_DEV(0, 0x18, 0) + ctrl->f0;
+               where = register_values[i] & 0xfff;
                reg = pci_read_config32(dev, where);
                reg &= register_values[i+1];
                reg |= register_values[i+2];
                pci_write_config32(dev, where, reg);
-#if 0
-
-               reg = pci_read_config32(register_values[i]);
-               reg &= register_values[i+1];
-               reg |= register_values[i+2];
-               pci_write_config32(register_values[i], reg);
-#endif
        }
-       print_spew("done.\r\n");
+       printk(BIOS_SPEW, "done.\n");
 }
 
-
 static void hw_enable_ecc(const struct mem_controller *ctrl)
 {
        uint32_t dcl, nbcap;
@@ -578,11 +548,10 @@ static void hw_enable_ecc(const struct mem_controller *ctrl)
        if (nbcap & NBCAP_ECC) {
                dcl |= DCL_DimmEccEn;
        }
-       if (read_option(CMOS_VSTART_ECC_memory, CMOS_VLEN_ECC_memory, 1) == 0) {
+       if (read_option(ECC_memory, 1) == 0) {
                dcl &= ~DCL_DimmEccEn;
        }
        pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dcl);
-       
 }
 
 static int is_dual_channel(const struct mem_controller *ctrl)
@@ -594,11 +563,13 @@ static int is_dual_channel(const struct mem_controller *ctrl)
 
 static int is_opteron(const struct mem_controller *ctrl)
 {
-       /* Test to see if I am an Opteron.  
-        * FIXME Testing dual channel capability is correct for now
-        * but a beter test is probably required.
+       /* Test to see if I am an Opteron.  Socket 939 based Athlon64
+        * have dual channel capability, too, so we need a better test
+        * for Opterons.
+        * However, all code uses is_opteron() to find out whether to
+        * use dual channel, so if we really check for opteron here, we
+        * need to fix up all code using this function, too.
         */
-#warning "FIXME implement a better test for opterons"
        uint32_t nbcap;
        nbcap = pci_read_config32(ctrl->f3, NORTHBRIDGE_CAP);
        return !!(nbcap & NBCAP_128Bit);
@@ -612,7 +583,7 @@ static int is_registered(const struct mem_controller *ctrl)
         */
        uint32_t dcl;
        dcl = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
-       return !(dcl & DCL_UnBufDimm);
+       return !(dcl & DCL_UnBuffDimm);
 }
 
 struct dimm_size {
@@ -620,7 +591,7 @@ struct dimm_size {
        unsigned long side2;
        unsigned long rows;
        unsigned long col;
-#if K8_4RANK_DIMM_SUPPORT == 1
+#if CONFIG_QRANK_DIMM_SUPPORT
        unsigned long rank;
 #endif
 };
@@ -634,7 +605,7 @@ static struct dimm_size spd_get_dimm_size(unsigned device)
        sz.side2 = 0;
        sz.rows = 0;
        sz.col = 0;
-#if K8_4RANK_DIMM_SUPPORT == 1
+#if CONFIG_QRANK_DIMM_SUPPORT
        sz.rank = 0;
 #endif
 
@@ -664,7 +635,7 @@ static struct dimm_size spd_get_dimm_size(unsigned device)
        if (value < 0) goto hw_err;
        value &= 0xff;
        value <<= 8;
-       
+
        low = spd_read_byte(device, 6); /* (low byte) */
        if (low < 0) goto hw_err;
        value = value | (low & 0xff);
@@ -678,7 +649,7 @@ static struct dimm_size spd_get_dimm_size(unsigned device)
        if ((value != 2) && (value != 4 )) {
                goto val_err;
        }
-#if K8_4RANK_DIMM_SUPPORT == 1
+#if CONFIG_QRANK_DIMM_SUPPORT
        sz.rank = value;
 #endif
 
@@ -700,72 +671,43 @@ static struct dimm_size spd_get_dimm_size(unsigned device)
        goto out;
 
  val_err:
-       die("Bad SPD value\r\n");
+       die("Bad SPD value\n");
        /* If an hw_error occurs report that I have no memory */
 hw_err:
        sz.side1 = 0;
        sz.side2 = 0;
        sz.rows = 0;
        sz.col = 0;
-#if K8_4RANK_DIMM_SUPPORT == 1
+#if CONFIG_QRANK_DIMM_SUPPORT
        sz.rank = 0;
 #endif
- out:
+out:
        return sz;
 }
 
-static const unsigned cs_map_aa[15] = {
-       /* (row=12, col=8)(14, 12) ---> (0, 0) (2, 4) */
-       0, 1, 3, 6, 0,
-       0, 2, 4, 7, 9,
-       0, 0, 5, 8,10,
-};
 
 static void set_dimm_size(const struct mem_controller *ctrl, struct dimm_size sz, unsigned index)
 {
-       uint32_t base0, base1, map;
+       uint32_t base0, base1;
        uint32_t dch;
 
        if (sz.side1 != sz.side2) {
                sz.side2 = 0;
        }
-       map = pci_read_config32(ctrl->f2, DRAM_BANK_ADDR_MAP);
-       map &= ~(0xf << (index * 4));
-#if K8_4RANK_DIMM_SUPPORT == 1
-        if(sz.rank == 4) {
-                map &= ~(0xf << ( (index + 2) * 4));
-        }
-#endif
 
        /* For each base register.
         * Place the dimm size in 32 MB quantities in the bits 31 - 21.
         * The initialize dimm size is in bits.
         * Set the base enable bit0.
         */
-       
+
        base0 = base1 = 0;
 
        /* Make certain side1 of the dimm is at least 32MB */
        if (sz.side1 >= (25 +3)) {
-               if(is_cpu_pre_d0()) {
-                       map |= (sz.side1 - (25 + 3)) << (index *4);
-#if K8_4RANK_DIMM_SUPPORT == 1
-                       if(sz.rank == 4) {
-                             map |= (sz.side1 - (25 + 3)) << ( (index + 2) * 4);
-                               }
-#endif
-               }
-               else {
-                       map |= cs_map_aa[(sz.rows - 12) * 5 + (sz.col - 8) ] << (index*4);
-#if K8_4RANK_DIMM_SUPPORT == 1
-                       if(sz.rank == 4) {
-                              map |=  cs_map_aa[(sz.rows - 12) * 5 + (sz.col - 8) ] << ( (index + 2) * 4);
-                               }
-#endif
-               }
                base0 = (1 << ((sz.side1 - (25 + 3)) + 21)) | 1;
        }
-       
+
        /* Make certain side2 of the dimm is at least 32MB */
        if (sz.side2 >= (25 + 3)) {
                base1 = (1 << ((sz.side2 - (25 + 3)) + 21)) | 1;
@@ -784,21 +726,19 @@ static void set_dimm_size(const struct mem_controller *ctrl, struct dimm_size sz
        /* Set the appropriate DIMM base address register */
        pci_write_config32(ctrl->f2, DRAM_CSBASE + (((index << 1)+0)<<2), base0);
        pci_write_config32(ctrl->f2, DRAM_CSBASE + (((index << 1)+1)<<2), base1);
-#if K8_4RANK_DIMM_SUPPORT == 1
-       if(sz.rank == 4) {
+#if CONFIG_QRANK_DIMM_SUPPORT
+       if (sz.rank == 4) {
                pci_write_config32(ctrl->f2, DRAM_CSBASE + (((index << 1)+4)<<2), base0);
                pci_write_config32(ctrl->f2, DRAM_CSBASE + (((index << 1)+5)<<2), base1);
        }
 #endif
 
-       pci_write_config32(ctrl->f2, DRAM_BANK_ADDR_MAP, map);
-       
        /* Enable the memory clocks for this DIMM */
        if (base0) {
                dch = pci_read_config32(ctrl->f2, DRAM_CONFIG_HIGH);
                dch |= DCH_MEMCLK_EN0 << index;
-#if K8_4RANK_DIMM_SUPPORT == 1
-               if(sz.rank == 4) {
+#if CONFIG_QRANK_DIMM_SUPPORT
+               if (sz.rank == 4) {
                        dch |= DCH_MEMCLK_EN0 << (index + 2);
                }
 #endif
@@ -806,11 +746,55 @@ static void set_dimm_size(const struct mem_controller *ctrl, struct dimm_size sz
        }
 }
 
+static void set_dimm_map(const struct mem_controller *ctrl, struct dimm_size sz, unsigned index)
+{
+       static const unsigned cs_map_aa[] = {
+               /* (row=12, col=8)(14, 12) ---> (0, 0) (2, 4) */
+               0, 1, 3, 6, 0,
+               0, 2, 4, 7, 9,
+               0, 0, 5, 8,10,
+       };
+
+       uint32_t map;
+
+       map = pci_read_config32(ctrl->f2, DRAM_BANK_ADDR_MAP);
+       map &= ~(0xf << (index * 4));
+#if CONFIG_QRANK_DIMM_SUPPORT
+       if (sz.rank == 4) {
+               map &= ~(0xf << ( (index + 2) * 4));
+       }
+#endif
+
+
+       /* Make certain side1 of the dimm is at least 32MB */
+       if (sz.side1 >= (25 +3)) {
+               if (is_cpu_pre_d0()) {
+                       map |= (sz.side1 - (25 + 3)) << (index *4);
+#if CONFIG_QRANK_DIMM_SUPPORT
+                       if (sz.rank == 4) {
+                               map |= (sz.side1 - (25 + 3)) << ( (index + 2) * 4);
+                       }
+#endif
+               }
+               else {
+                       map |= cs_map_aa[(sz.rows - 12) * 5 + (sz.col - 8) ] << (index*4);
+#if CONFIG_QRANK_DIMM_SUPPORT
+                       if (sz.rank == 4) {
+                               map |=  cs_map_aa[(sz.rows - 12) * 5 + (sz.col - 8) ] << ( (index + 2) * 4);
+                       }
+#endif
+               }
+       }
+
+       pci_write_config32(ctrl->f2, DRAM_BANK_ADDR_MAP, map);
+
+}
+
 static long spd_set_ram_size(const struct mem_controller *ctrl, long dimm_mask)
 {
        int i;
-       
-       for(i = 0; i < DIMM_SOCKETS; i++) {
+
+       for (i = 0; i < DIMM_SOCKETS; i++) {
                struct dimm_size sz;
                if (!(dimm_mask & (1 << i))) {
                        continue;
@@ -820,6 +804,7 @@ static long spd_set_ram_size(const struct mem_controller *ctrl, long dimm_mask)
                        return -1; /* Report SPD error */
                }
                set_dimm_size(ctrl, sz, i);
+               set_dimm_map (ctrl, sz, i);
        }
        return dimm_mask;
 }
@@ -847,13 +832,13 @@ static void route_dram_accesses(const struct mem_controller *ctrl,
 
        limit_reg = 0x44 + index;
        base_reg = 0x40 + index;
-       for(device = PCI_DEV(0, 0x18, 1); device <= PCI_DEV(0, 0x1f, 1); device += PCI_DEV(0, 1, 0)) {
+       for (device = PCI_DEV(0, 0x18, 1); device <= PCI_DEV(0, 0x1f, 1); device += PCI_DEV(0, 1, 0)) {
                pci_write_config32(device, limit_reg, limit);
                pci_write_config32(device, base_reg, base);
        }
 }
 
-static void set_top_mem(unsigned tom_k)
+static void set_top_mem(unsigned tom_k, unsigned hole_startk)
 {
        /* Error if I don't have memory */
        if (!tom_k) {
@@ -861,22 +846,32 @@ static void set_top_mem(unsigned tom_k)
        }
 
        /* Report the amount of memory. */
-       print_spew("RAM: 0x");
-       print_spew_hex32(tom_k);
-       print_spew(" KB\r\n");
+       printk(BIOS_DEBUG, "RAM end at 0x%08x kB\n", tom_k);
 
        /* Now set top of memory */
        msr_t msr;
-       msr.lo = (tom_k & 0x003fffff) << 10;
-       msr.hi = (tom_k & 0xffc00000) >> 22;
-       wrmsr(TOP_MEM2, msr);
+       if (tom_k > (4*1024*1024)) {
+               printk(BIOS_SPEW, "Handling memory mapped above 4 GB\n");
+               printk(BIOS_SPEW, "Upper RAM end at 0x%08x kB\n", tom_k);
+               msr.lo = (tom_k & 0x003fffff) << 10;
+               msr.hi = (tom_k & 0xffc00000) >> 22;
+               wrmsr(TOP_MEM2, msr);
+               printk(BIOS_SPEW, "Correcting memory amount mapped below 4 GB\n");
+       }
 
        /* Leave a 64M hole between TOP_MEM and TOP_MEM2
         * so I can see my rom chip and other I/O devices.
         */
        if (tom_k >= 0x003f0000) {
+#if CONFIG_HW_MEM_HOLE_SIZEK != 0
+               if (hole_startk != 0) {
+                       tom_k = hole_startk;
+               } else
+#endif
                tom_k = 0x3f0000;
+               printk(BIOS_SPEW, "Adjusting lower RAM end\n");
        }
+       printk(BIOS_SPEW, "Lower RAM end at 0x%08x kB\n", tom_k);
        msr.lo = (tom_k & 0x003fffff) << 10;
        msr.hi = (tom_k & 0xffc00000) >> 22;
        wrmsr(TOP_MEM, msr);
@@ -885,29 +880,29 @@ static void set_top_mem(unsigned tom_k)
 static unsigned long interleave_chip_selects(const struct mem_controller *ctrl)
 {
        /* 35 - 25 */
-       static const uint32_t csbase_low[] = { 
-       /* 32MB */      (1 << (13 - 4)),
-       /* 64MB */      (1 << (14 - 4)),
-       /* 128MB */     (1 << (14 - 4)), 
-       /* 256MB */     (1 << (15 - 4)),
-       /* 512MB */     (1 << (15 - 4)),
-       /* 1GB */       (1 << (16 - 4)),
-       /* 2GB */       (1 << (16 - 4)), 
+       static const uint8_t csbase_low_shift[] = {
+       /* 32MB */      (13 - 4),
+       /* 64MB */      (14 - 4),
+       /* 128MB */     (14 - 4),
+       /* 256MB */     (15 - 4),
+       /* 512MB */     (15 - 4),
+       /* 1GB */       (16 - 4),
+       /* 2GB */       (16 - 4),
        };
 
-        static const uint32_t csbase_low_d0[] = {
-        /* 32MB */      (1 << (13 - 4)),
-        /* 64MB */      (1 << (14 - 4)),
-        /* 128MB */     (1 << (14 - 4)),
-       /* 128MB */     (1 << (15 - 4)),
-        /* 256MB */     (1 << (15 - 4)),
-        /* 512MB */     (1 << (15 - 4)),
-        /* 256MB */     (1 << (16 - 4)),
-        /* 512MB */     (1 << (16 - 4)),
-        /* 1GB */       (1 << (16 - 4)),
-       /* 1GB */       (1 << (17 - 4)),
-        /* 2GB */       (1 << (17 - 4)),
-        };
+       static const uint8_t csbase_low_d0_shift[] = {
+       /* 32MB */      (13 - 4),
+       /* 64MB */      (14 - 4),
+       /* 128MB */     (14 - 4),
+       /* 128MB */     (15 - 4),
+       /* 256MB */     (15 - 4),
+       /* 512MB */     (15 - 4),
+       /* 256MB */     (16 - 4),
+       /* 512MB */     (16 - 4),
+       /* 1GB */       (16 - 4),
+       /* 1GB */       (17 - 4),
+       /* 2GB */       (17 - 4),
+       };
 
        /* cs_base_high is not changed */
 
@@ -924,13 +919,13 @@ static unsigned long interleave_chip_selects(const struct mem_controller *ctrl)
        chip_selects = 0;
        common_size = 0;
        common_cs_mode = 0;
-       for(index = 0; index < 8; index++) {
+       for (index = 0; index < 8; index++) {
                unsigned size;
                unsigned cs_mode;
                uint32_t value;
-               
+
                value = pci_read_config32(ctrl->f2, DRAM_CSBASE + (index << 2));
-               
+
                /* Is it enabled? */
                if (!(value & 1)) {
                        continue;
@@ -946,15 +941,15 @@ static unsigned long interleave_chip_selects(const struct mem_controller *ctrl)
                }
 
                value = pci_read_config32(ctrl->f2, DRAM_BANK_ADDR_MAP);
-                cs_mode =( value >> ((index>>1)*4)) & 0xf;
-                if(cs_mode == 0 ) continue;
-                if(common_cs_mode == 0) {
-                       common_cs_mode = cs_mode;
-                }
-                /* The size differed fail */
-                if(common_cs_mode != cs_mode) {
-                        return 0;
-                }
+               cs_mode =( value >> ((index>>1)*4)) & 0xf;
+               if (cs_mode == 0 ) continue;
+               if (common_cs_mode == 0) {
+                       common_cs_mode = cs_mode;
+               }
+               /* The cs_mode differed fail */
+               if (common_cs_mode != cs_mode) {
+                       return 0;
+               }
        }
 
        /* Chip selects can only be interleaved when there is
@@ -966,36 +961,36 @@ static unsigned long interleave_chip_selects(const struct mem_controller *ctrl)
        }
 
        /* Find the bits of csbase that we need to interleave on */
-       if(is_cpu_pre_d0()){
-               csbase_inc = csbase_low[common_cs_mode];
-               if(is_dual_channel(ctrl)) {
-                /* Also we run out of address mask bits if we try and interleave 8 4GB dimms */
-                       if ((bits == 3) && (common_size == (1 << (32 - 3)))) {
-//                                     print_debug("8 4GB chip selects cannot be interleaved\r\n");
-                               return 0;
-                       }  
+       if (is_cpu_pre_d0()){
+               csbase_inc = 1 << csbase_low_shift[common_cs_mode];
+               if (is_dual_channel(ctrl)) {
+               /* Also we run out of address mask bits if we try and interleave 8 4GB dimms */
+                       if ((bits == 3) && (common_size == (1 << (32 - 3)))) {
+//                                     printk(BIOS_DEBUG, "8 4GB chip selects cannot be interleaved\n");
+                               return 0;
+                       }
                        csbase_inc <<=1;
                }
        }
        else {
-               csbase_inc = csbase_low_d0[common_cs_mode];
-               if(is_dual_channel(ctrl)) {
-                       if( (bits==3) && (common_cs_mode > 8)) {
-//                             print_debug("8 cs_mode>8 chip selects cannot be interleaved\r\n");
-                               return 0;
+               csbase_inc = 1 << csbase_low_d0_shift[common_cs_mode];
+               if (is_dual_channel(ctrl)) {
+                       if ( (bits==3) && (common_cs_mode > 8)) {
+//                             printk(BIOS_DEBUG, "8 cs_mode>8 chip selects cannot be interleaved\n");
+                               return 0;
                        }
                        csbase_inc <<=1;
-                }   
+               }
        }
 
-       /* Compute the initial values for csbase and csbask. 
+       /* Compute the initial values for csbase and csbask.
         * In csbase just set the enable bit and the base to zero.
         * In csmask set the mask bits for the size and page level interleave.
         */
        csbase = 0 | 1;
        csmask = (((common_size  << bits) - 1) << 21);
        csmask |= 0xfe00 & ~((csbase_inc << bits) - csbase_inc);
-       for(index = 0; index < 8; index++) {
+       for (index = 0; index < 8; index++) {
                uint32_t value;
 
                value = pci_read_config32(ctrl->f2, DRAM_CSBASE + (index << 2));
@@ -1007,8 +1002,8 @@ static unsigned long interleave_chip_selects(const struct mem_controller *ctrl)
                pci_write_config32(ctrl->f2, DRAM_CSMASK + (index << 2), csmask);
                csbase += csbase_inc;
        }
-       
-       print_spew("Interleaved\r\n");
+
+       printk(BIOS_SPEW, "Interleaved\n");
 
        /* Return the memory size in K */
        return common_size << (15 + bits);
@@ -1020,14 +1015,14 @@ static unsigned long order_chip_selects(const struct mem_controller *ctrl)
 
        /* Remember which registers we have used in the high 8 bits of tom */
        tom = 0;
-       for(;;) {
-               /* Find the largest remaining canidate */
-               unsigned index, canidate;
+       for (;;) {
+               /* Find the largest remaining candidate */
+               unsigned index, candidate;
                uint32_t csbase, csmask;
                unsigned size;
                csbase = 0;
-               canidate = 0;
-               for(index = 0; index < 8; index++) {
+               candidate = 0;
+               for (index = 0; index < 8; index++) {
                        uint32_t value;
                        value = pci_read_config32(ctrl->f2, DRAM_CSBASE + (index << 2));
 
@@ -1035,21 +1030,22 @@ static unsigned long order_chip_selects(const struct mem_controller *ctrl)
                        if (!(value & 1)) {
                                continue;
                        }
-                       
+
                        /* Is it greater? */
                        if (value <= csbase) {
                                continue;
                        }
-                       
+
                        /* Has it already been selected */
                        if (tom & (1 << (index + 24))) {
                                continue;
                        }
-                       /* I have a new canidate */
+                       /* I have a new candidate */
                        csbase = value;
-                       canidate = index;
+                       candidate = index;
                }
-               /* See if I have found a new canidate */
+
+               /* See if I have found a new candidate */
                if (csbase == 0) {
                        break;
                }
@@ -1058,7 +1054,7 @@ static unsigned long order_chip_selects(const struct mem_controller *ctrl)
                size = csbase >> 21;
 
                /* Remember I have used this register */
-               tom |= (1 << (canidate + 24));
+               tom |= (1 << (candidate + 24));
 
                /* Recompute the cs base register value */
                csbase = (tom << 21) | 1;
@@ -1071,22 +1067,22 @@ static unsigned long order_chip_selects(const struct mem_controller *ctrl)
                csmask |= 0xfe00;               /* For now don't optimize */
 
                /* Write the new base register */
-               pci_write_config32(ctrl->f2, DRAM_CSBASE + (canidate << 2), csbase);
+               pci_write_config32(ctrl->f2, DRAM_CSBASE + (candidate << 2), csbase);
                /* Write the new mask register */
-               pci_write_config32(ctrl->f2, DRAM_CSMASK + (canidate << 2), csmask);
-               
+               pci_write_config32(ctrl->f2, DRAM_CSMASK + (candidate << 2), csmask);
+
        }
        /* Return the memory size in K */
        return (tom & ~0xff000000) << 15;
 }
 
-unsigned long memory_end_k(const struct mem_controller *ctrl, int max_node_id)
+static unsigned long memory_end_k(const struct mem_controller *ctrl, int max_node_id)
 {
        unsigned node_id;
        unsigned end_k;
        /* Find the last memory address used */
        end_k = 0;
-       for(node_id = 0; node_id < max_node_id; node_id++) {
+       for (node_id = 0; node_id < max_node_id; node_id++) {
                uint32_t limit, base;
                unsigned index;
                index = node_id << 3;
@@ -1100,73 +1096,48 @@ unsigned long memory_end_k(const struct mem_controller *ctrl, int max_node_id)
        return end_k;
 }
 
-#if K8_E0_MEM_HOLE_SIZEK != 0
-#define K8_E0_MEM_HOLE_LIMITK 4*1024*1024
-#define K8_E0_MEM_HOLE_BASEK (K8_E0_MEM_HOLE_LIMITK - K8_E0_MEM_HOLE_SIZEK )
-
-static void set_e0_mem_hole(const struct mem_controller *ctrl, unsigned base_k)
-{
-        /* Route the addresses to the controller node */
-        unsigned val;
-
-       val = pci_read_config32(ctrl->f1,0xf0);
-
-       val &= 0x00ff00fe;
-        val = (K8_E0_MEM_HOLE_BASEK << 10) | ((K8_E0_MEM_HOLE_SIZEK+base_k)>>(16-10)) | 1;
-
-       pci_write_config32(ctrl->f1, 0xf0, val);
-}
-       
-#endif
-
 static void order_dimms(const struct mem_controller *ctrl)
 {
        unsigned long tom_k, base_k;
 
-       if (read_option(CMOS_VSTART_interleave_chip_selects, CMOS_VLEN_interleave_chip_selects, 1) != 0) {
+       if (read_option(interleave_chip_selects, 1) != 0) {
                tom_k = interleave_chip_selects(ctrl);
        } else {
-               print_debug("Interleaving disabled\r\n");
+               printk(BIOS_DEBUG, "Interleaving disabled\n");
                tom_k = 0;
        }
+
        if (!tom_k) {
                tom_k = order_chip_selects(ctrl);
        }
+
        /* Compute the memory base address */
        base_k = memory_end_k(ctrl, ctrl->node_id);
        tom_k += base_k;
-#if K8_E0_MEM_HOLE_SIZEK != 0
-       if(!is_cpu_pre_e0()) {
-                /* See if I need to check the range cover hole */
-                if ((base_k <= K8_E0_MEM_HOLE_BASEK) && (tom_k > K8_E0_MEM_HOLE_BASEK)) {
-                       tom_k += K8_E0_MEM_HOLE_SIZEK;
-                }
-       }
-#endif
        route_dram_accesses(ctrl, base_k, tom_k);
-       set_top_mem(tom_k);
+       set_top_mem(tom_k, 0);
 }
 
 static long disable_dimm(const struct mem_controller *ctrl, unsigned index, long dimm_mask)
 {
-       print_debug("disabling dimm"); 
-       print_debug_hex8(index); 
-       print_debug("\r\n");
+       printk(BIOS_DEBUG, "disabling dimm %02x\n", index);
        pci_write_config32(ctrl->f2, DRAM_CSBASE + (((index << 1)+0)<<2), 0);
        pci_write_config32(ctrl->f2, DRAM_CSBASE + (((index << 1)+1)<<2), 0);
        dimm_mask &= ~(1 << index);
        return dimm_mask;
 }
 
-static long spd_handle_unbuffered_dimms(const struct mem_controller *ctrl, long dimm_mask)
+static long spd_handle_unbuffered_dimms(const struct mem_controller *ctrl,
+                                       long dimm_mask)
 {
        int i;
        int registered;
        int unbuffered;
+       int has_dualch = is_opteron(ctrl);
        uint32_t dcl;
        unbuffered = 0;
        registered = 0;
-       for(i = 0; (i < DIMM_SOCKETS); i++) {
+       for (i = 0; (i < DIMM_SOCKETS); i++) {
                int value;
                if (!(dimm_mask & (1 << i))) {
                        continue;
@@ -1175,10 +1146,11 @@ static long spd_handle_unbuffered_dimms(const struct mem_controller *ctrl, long
                if (value < 0) {
                        return -1;
                }
+
                /* Registered dimm ? */
                if (value & (1 << 1)) {
                        registered = 1;
-               } 
+               }
                /* Otherwise it must be an unbuffered dimm */
                else {
                        unbuffered = 1;
@@ -1187,26 +1159,30 @@ static long spd_handle_unbuffered_dimms(const struct mem_controller *ctrl, long
        if (unbuffered && registered) {
                die("Mixed buffered and registered dimms not supported");
        }
-#if 1
-       //By yhlu for debug Athlon64 939 can do dual channel, but it use unbuffer DIMM
-       if (unbuffered && is_opteron(ctrl)) {
-               die("Unbuffered Dimms not supported on Opteron");
-       }
-#endif
 
        dcl = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
-       dcl &= ~DCL_UnBufDimm;
+       dcl &= ~DCL_UnBuffDimm;
        if (unbuffered) {
-               dcl |= DCL_UnBufDimm;
+               if ((has_dualch) && (!is_cpu_pre_d0())) {
+                       dcl |= DCL_UnBuffDimm;
+#if CONFIG_CPU_AMD_SOCKET_939
+                       if ((cpuid_eax(1) & 0x30) == 0x30) {
+                               /* CS[7:4] is copy of CS[3:0], should be set for 939 socket */
+                               dcl |= DCL_UpperCSMap;
+                       }
+#endif
+               } else {
+                       dcl |= DCL_UnBuffDimm;
+               }
        }
        pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dcl);
-#if 0
+
        if (is_registered(ctrl)) {
-               print_debug("Registered\r\n");
+               printk(BIOS_SPEW, "Registered\n");
        } else {
-               print_debug("Unbuffered\r\n");
+               printk(BIOS_SPEW, "Unbuffered\n");
        }
-#endif
+
        return dimm_mask;
 }
 
@@ -1215,7 +1191,7 @@ static unsigned int spd_detect_dimms(const struct mem_controller *ctrl)
        unsigned dimm_mask;
        int i;
        dimm_mask = 0;
-       for(i = 0; i < DIMM_SOCKETS; i++) {
+       for (i = 0; i < DIMM_SOCKETS; i++) {
                int byte;
                unsigned device;
                device = ctrl->channel0[i];
@@ -1241,7 +1217,7 @@ static long spd_enable_2channels(const struct mem_controller *ctrl, long dimm_ma
        int i;
        uint32_t nbcap;
        /* SPD addresses to verify are identical */
-       static const unsigned addresses[] = {
+       static const uint8_t addresses[] = {
                2,      /* Type should be DDR SDRAM */
                3,      /* *Row addresses */
                4,      /* *Column addresses */
@@ -1254,8 +1230,8 @@ static long spd_enable_2channels(const struct mem_controller *ctrl, long dimm_ma
                17,     /* *Logical Banks */
                18,     /* *Supported CAS Latencies */
                21,     /* *SDRAM Module Attributes */
-               23,     /* *Cycle time at CAS Latnecy (CLX - 0.5) */
-               26,     /* *Cycle time at CAS Latnecy (CLX - 1.0) */
+               23,     /* *Cycle time at CAS Latency (CLX - 0.5) */
+               25,     /* *Cycle time at CAS Latency (CLX - 1.0) */
                27,     /* *tRP Row precharge time */
                28,     /* *Minimum Row Active to Row Active Delay (tRRD) */
                29,     /* *tRCD RAS to CAS */
@@ -1265,7 +1241,7 @@ static long spd_enable_2channels(const struct mem_controller *ctrl, long dimm_ma
        };
        /* If the dimms are not in pairs do not do dual channels */
        if ((dimm_mask & ((1 << DIMM_SOCKETS) - 1)) !=
-               ((dimm_mask >> DIMM_SOCKETS) & ((1 << DIMM_SOCKETS) - 1))) { 
+               ((dimm_mask >> DIMM_SOCKETS) & ((1 << DIMM_SOCKETS) - 1))) {
                goto single_channel;
        }
        /* If the cpu is not capable of doing dual channels don't do dual channels */
@@ -1273,7 +1249,7 @@ static long spd_enable_2channels(const struct mem_controller *ctrl, long dimm_ma
        if (!(nbcap & NBCAP_128Bit)) {
                goto single_channel;
        }
-       for(i = 0; (i < 4) && (ctrl->channel0[i]); i++) {
+       for (i = 0; (i < 4) && (ctrl->channel0[i]); i++) {
                unsigned device0, device1;
                int value0, value1;
                int j;
@@ -1283,7 +1259,7 @@ static long spd_enable_2channels(const struct mem_controller *ctrl, long dimm_ma
                }
                device0 = ctrl->channel0[i];
                device1 = ctrl->channel1[i];
-               for(j = 0; j < sizeof(addresses)/sizeof(addresses[0]); j++) {
+               for (j = 0; j < ARRAY_SIZE(addresses); j++) {
                        unsigned addr;
                        addr = addresses[j];
                        value0 = spd_read_byte(device0, addr);
@@ -1299,7 +1275,7 @@ static long spd_enable_2channels(const struct mem_controller *ctrl, long dimm_ma
                        }
                }
        }
-       print_spew("Enabling dual channel memory\r\n");
+       printk(BIOS_SPEW, "Enabling dual channel memory\n");
        uint32_t dcl;
        dcl = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
        dcl &= ~DCL_32ByteEn;
@@ -1319,14 +1295,17 @@ struct mem_param {
        uint32_t dch_memclk;
        uint16_t dch_tref4k, dch_tref8k;
        uint8_t  dtl_twr;
+       uint8_t  dtl_twtr;
+       uint8_t  dtl_trwt[3][3]; /* first index is CAS_LAT 2/2.5/3 and 128/registered64/64 */
+       uint8_t  rdpreamble[4]; /* 0 is for registered, 1 for 1-2 DIMMS, 2 and 3 for 3 or 4 unreg dimm slots */
        char name[9];
 };
 
-static const struct mem_param *get_mem_param(unsigned min_cycle_time)
+static const struct mem_param *get_mem_param(int freq)
 {
        static const struct mem_param speed[] = {
-               {
-                       .name       = "100Mhz\r\n",
+               [NBCAP_MEMCLK_100MHZ] = {
+                       .name       = "100MHz",
                        .cycle_time = 0xa0,
                        .divisor    = (10 <<1),
                        .tRC        = 0x46,
@@ -1335,9 +1314,12 @@ static const struct mem_param *get_mem_param(unsigned min_cycle_time)
                        .dch_tref4k = DTH_TREF_100MHZ_4K,
                        .dch_tref8k = DTH_TREF_100MHZ_8K,
                        .dtl_twr    = 2,
+                       .dtl_twtr   = 1,
+                       .dtl_trwt   = { { 2, 2, 3 }, { 3, 3, 4 }, { 3, 3, 4 }},
+                       .rdpreamble = { ((9 << 1) + 0), ((9 << 1) + 0), ((9 << 1) + 0), ((9 << 1) + 0) }
                },
-               {
-                       .name       = "133Mhz\r\n",
+               [NBCAP_MEMCLK_133MHZ] = {
+                       .name       = "133MHz",
                        .cycle_time = 0x75,
                        .divisor    = (7<<1)+1,
                        .tRC        = 0x41,
@@ -1346,9 +1328,12 @@ static const struct mem_param *get_mem_param(unsigned min_cycle_time)
                        .dch_tref4k = DTH_TREF_133MHZ_4K,
                        .dch_tref8k = DTH_TREF_133MHZ_8K,
                        .dtl_twr    = 2,
+                       .dtl_twtr   = 1,
+                       .dtl_trwt   = { { 2, 2, 3 }, { 3, 3, 4 }, { 3, 3, 4 }},
+                       .rdpreamble = { ((8 << 1) + 0), ((7 << 1) + 0), ((7 << 1) + 1), ((7 << 1) + 0) }
                },
-               {
-                       .name       = "166Mhz\r\n",
+               [NBCAP_MEMCLK_166MHZ] = {
+                       .name       = "166MHz",
                        .cycle_time = 0x60,
                        .divisor    = (6<<1),
                        .tRC        = 0x3C,
@@ -1357,9 +1342,12 @@ static const struct mem_param *get_mem_param(unsigned min_cycle_time)
                        .dch_tref4k = DTH_TREF_166MHZ_4K,
                        .dch_tref8k = DTH_TREF_166MHZ_8K,
                        .dtl_twr    = 3,
+                       .dtl_twtr   = 1,
+                       .dtl_trwt   = { { 3, 2, 3 }, { 3, 3, 4 }, { 4, 3, 4 }},
+                       .rdpreamble = { ((7 << 1) + 1), ((6 << 1) + 0), ((6 << 1) + 1), ((6 << 1) + 0) }
                },
-               {
-                       .name       = "200Mhz\r\n",
+               [NBCAP_MEMCLK_200MHZ] = {
+                       .name       = "200MHz",
                        .cycle_time = 0x50,
                        .divisor    = (5<<1),
                        .tRC        = 0x37,
@@ -1368,24 +1356,15 @@ static const struct mem_param *get_mem_param(unsigned min_cycle_time)
                        .dch_tref4k = DTH_TREF_200MHZ_4K,
                        .dch_tref8k = DTH_TREF_200MHZ_8K,
                        .dtl_twr    = 3,
-               },
-               {
-                       .cycle_time = 0x00,
-               },
+                       .dtl_twtr   = 2,
+                       .dtl_trwt   = { { 0, 2, 3 }, { 3, 3, 4 }, { 3, 3, 4 }},
+                       .rdpreamble = { ((7 << 1) + 0), ((5 << 1) + 0), ((5 << 1) + 1), ((5 << 1) + 1) }
+               }
        };
        const struct mem_param *param;
-       for(param = &speed[0]; param->cycle_time ; param++) {
-               if (min_cycle_time > (param+1)->cycle_time) {
-                       break;
-               }
-       }
-       if (!param->cycle_time) {
-               die("min_cycle_time to low");
-       }
-       print_spew(param->name);
-#ifdef DRAM_MIN_CYCLE_TIME
-       print_debug(param->name);
-#endif
+
+       param = speed + freq;
+       printk(BIOS_SPEW, "%s\n", param->name);
        return param;
 }
 
@@ -1393,178 +1372,310 @@ struct spd_set_memclk_result {
        const struct mem_param *param;
        long dimm_mask;
 };
+
+static int spd_dimm_loading_socket(const struct mem_controller *ctrl, long dimm_mask, int *freq_1t)
+{
+
+#if CONFIG_CPU_AMD_SOCKET_939
+
+/* + 1 raise so we detect 0 as bad field */
+#define DDR200 (NBCAP_MEMCLK_100MHZ + 1)
+#define DDR333 (NBCAP_MEMCLK_166MHZ + 1)
+#define DDR400 (NBCAP_MEMCLK_200MHZ + 1)
+#define DDR_2T 0x80
+#define DDR_MASK 0x7
+
+#define DDR200_2T (DDR_2T | DDR200)
+#define DDR333_2T (DDR_2T | DDR333)
+#define DDR400_2T (DDR_2T | DDR400)
+
+/*
+       Following table comes directly from BKDG (unbuffered DIMM support)
+       [Y][X] Y = ch0_0, ch1_0, ch0_1, ch1_1 1=present 0=empty
+         X uses same layout but 1 means double rank 0 is single rank/empty
+
+       Following tables come from BKDG the ch{0_0,1_0,0_1,1_1} maps to
+       MEMCS_{1L,1H,2L,2H} in i the PDF. PreE is table 45, and revE table 46.
+*/
+
+       static const unsigned char dimm_loading_config_preE[16][16] = {
+               [0x8] = {[0x0] = DDR400,[0x8] = DDR400},
+               [0x2] = {[0x0] = DDR333,[0x2] = DDR400},
+               [0xa] = {[0x0] = DDR400_2T,[0x2] = DDR400_2T,
+                        [0x8] = DDR400_2T,[0xa] = DDR333_2T},
+               [0xc] = {[0x0] = DDR400,[0xc] = DDR400},
+               [0x3] = {[0x0] = DDR333,[0x3] = DDR400},
+               [0xf] = {[0x0] = DDR400_2T,[0x3] = DDR400_2T,
+                        [0xc] = DDR400_2T,[0xf] = DDR333_2T},
+       };
+
+       static const unsigned char dimm_loading_config_revE[16][16] = {
+               [0x8] = {[0x0] = DDR400, [0x8] = DDR400},
+               [0x2] = {[0x0] = DDR333, [0x2] = DDR400},
+               [0x4] = {[0x0] = DDR400, [0x4] = DDR400},
+               [0x1] = {[0x0] = DDR333, [0x1] = DDR400},
+               [0xa] = {[0x0] = DDR400_2T, [0x2] = DDR400_2T,
+                        [0x8] = DDR400_2T, [0xa] = DDR333_2T},
+               [0x5] = {[0x0] = DDR400_2T, [0x1] = DDR400_2T,
+                        [0x4] = DDR400_2T, [0x5] = DDR333_2T},
+               [0xc] = {[0x0] = DDR400, [0xc] = DDR400, [0x4] = DDR400, [0x8] = DDR400},
+               [0x3] = {[0x0] = DDR333, [0x1] = DDR333, [0x2] = DDR333, [0x3] = DDR400},
+               [0xe] = {[0x0] = DDR400_2T, [0x4] = DDR400_2T, [0x2] = DDR400_2T,
+                        [0x6] = DDR400_2T, [0x8] = DDR400_2T, [0xc] = DDR400_2T,
+                        [0xa] = DDR333_2T, [0xe] = DDR333_2T},
+               [0xb] = {[0x0] = DDR333, [0x1] = DDR400_2T, [0x2] = DDR333_2T,
+                        [0x3] = DDR400_2T, [0x8] = DDR333_2T, [0x9] = DDR400_2T,
+                        [0xa] = DDR333_2T, [0xb] = DDR333_2T},
+               [0xd] = {[0x0] = DDR400_2T, [0x8] = DDR400_2T, [0x1] = DDR400_2T,
+                        [0x9] = DDR333_2T, [0x4] = DDR400_2T, [0xc] = DDR400_2T,
+                        [0x5] = DDR333_2T, [0xd] = DDR333_2T},
+               [0x7] = {[0x0] = DDR333,    [0x2] = DDR400_2T, [0x1] = DDR333_2T,
+                        [0x3] = DDR400_2T, [0x4] = DDR333_2T, [0x6] = DDR400_2T,
+                        [0x5] = DDR333_2T, [0x7] = DDR333_2T},
+               [0xf] = {[0x0] = DDR400_2T, [0x1] = DDR400_2T, [0x4] = DDR400_2T,
+                        [0x5] = DDR333_2T, [0x2] = DDR400_2T, [0x3] = DDR400_2T,
+                        [0x6] = DDR400_2T, [0x7] = DDR333_2T, [0x8] = DDR400_2T,
+                        [0x9] = DDR400_2T, [0xc] = DDR400_2T, [0xd] = DDR333_2T,
+                        [0xa] = DDR333_2T, [0xb] = DDR333_2T, [0xe] = DDR333_2T,
+                        [0xf] = DDR333_2T},
+       };
+       /*The dpos matches channel positions defined in BKDG and above arrays
+         The rpos is bitmask of dual rank dimms in same order as dpos */
+       unsigned int dloading = 0, i, rpos = 0, dpos = 0;
+       const unsigned char (*dimm_loading_config)[16] = dimm_loading_config_revE;
+       int rank;
+       uint32_t dcl;
+
+       if (is_cpu_pre_e0()) {
+               dimm_loading_config = dimm_loading_config_preE;
+       }
+
+       /* only DIMMS two per channel */
+       for (i = 0; i < 2; i++) {
+               if ((dimm_mask & (1 << i))) {
+                       /* read rank channel 0 */
+                       rank = spd_read_byte(ctrl->channel0[i], 5);
+                       if (rank < 0) goto hw_error;
+                       rpos |= (rank == 2) ? (1 << (3 - (i * 2))) : 0;
+                       dpos |= (1 << (3 - (i * 2)));
+               }
+
+               if ((dimm_mask & (1 << (i+DIMM_SOCKETS)))) {
+                       /* read rank channel 1*/
+                       rank = spd_read_byte(ctrl->channel1[i], 5);
+                       if (rank < 0) goto hw_error;
+                       rpos |= (rank == 2) ? (1 << (2 - (i * 2))) : 0;
+                       dpos |= (1 << (2 - (i * 2)));
+               }
+       }
+       /* now the lookup, decode the max speed DDR400_2T etc */
+       dloading = dimm_loading_config[dpos][rpos] & DDR_MASK;
+#if 0
+       printk(BIOS_DEBUG, "XXX %x %x dload %x 2T %x\n", dpos,rpos, dloading, dimm_loading_config[dpos][rpos] & DDR_2T);
+#endif
+hw_error:
+       if (dloading != 0) {
+               /* we have valid combination check the restrictions */
+               dcl = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
+               dcl |= ((dimm_loading_config[dpos][rpos] & DDR_2T) || CONFIG_K8_FORCE_2T_DRAM_TIMING) ? (DCL_En2T) : 0;
+               /* Set DuallDimm is second channel is completely empty (revD+) */
+               if (((cpuid_eax(1) & 0xfff0f) >= 0x10f00) && ((dpos & 0x5) == 0)) {
+                       printk(BIOS_DEBUG, "Setting DualDIMMen\n");
+                       dcl |= DCL_DualDIMMen;
+               }
+               pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dcl);
+               return dloading - 1;
+       } else {
+               /* if we don't find it we se it to DDR400 */
+               printk(BIOS_WARNING, "Detected strange DIMM configuration, may not work! (or bug)\n");
+               return NBCAP_MEMCLK_200MHZ;
+       }
+
+#elif CONFIG_CPU_AMD_SOCKET_754
+
+#define CFGIDX(DIMM1,DIMM2,DIMM3) ((DIMM3)*9+(DIMM2)*3+(DIMM1))
+
+#define EMPTY 0
+#define X8S_X16 1
+#define X8D 2
+
+#define DDR200 NBCAP_MEMCLK_100MHZ
+#define DDR333 NBCAP_MEMCLK_166MHZ
+#define DDR400 NBCAP_MEMCLK_200MHZ
+
+       /* this is table 42 from the BKDG, ignoring footnote 4,
+        * with the EMPTY, EMPTY, EMPTY row added */
+       static const unsigned char cfgtable[][2] = {
+               [CFGIDX(EMPTY,          EMPTY,          EMPTY   )] = { DDR400, DDR400 },
+               [CFGIDX(X8S_X16,        EMPTY,          EMPTY   )] = { DDR400, DDR400 },
+               [CFGIDX(EMPTY,          X8S_X16,        EMPTY   )] = { DDR400, DDR400 },
+               [CFGIDX(EMPTY,          EMPTY,          X8S_X16 )] = { DDR400, DDR400 },
+               [CFGIDX(X8D,            EMPTY,          EMPTY   )] = { DDR400, DDR400 },
+               [CFGIDX(EMPTY,          X8D,            EMPTY   )] = { DDR400, DDR400 },
+               [CFGIDX(EMPTY,          EMPTY,          X8D     )] = { DDR400, DDR400 },
+               [CFGIDX(X8S_X16,        X8S_X16,        EMPTY   )] = { DDR400, DDR400 },
+               [CFGIDX(X8S_X16,        X8D,            EMPTY   )] = { DDR400, DDR400 },
+               [CFGIDX(X8S_X16,        EMPTY,          X8S_X16 )] = { DDR400, DDR400 },
+               [CFGIDX(X8S_X16,        EMPTY,          X8D     )] = { DDR400, DDR400 },
+               [CFGIDX(X8D,            X8S_X16,        EMPTY   )] = { DDR400, DDR400 },
+               [CFGIDX(X8D,            X8D,            EMPTY   )] = { DDR333, DDR333 },
+               [CFGIDX(X8D,            EMPTY,          X8S_X16 )] = { DDR400, DDR400 },
+               [CFGIDX(X8D,            EMPTY,          X8D     )] = { DDR333, DDR333 },
+               [CFGIDX(EMPTY,          X8S_X16,        X8S_X16 )] = { DDR333, DDR400 },
+               [CFGIDX(EMPTY,          X8S_X16,        X8D     )] = { DDR200, DDR400 },
+               [CFGIDX(EMPTY,          X8D,            X8S_X16 )] = { DDR200, DDR400 },
+               [CFGIDX(EMPTY,          X8D,            X8D     )] = { DDR200, DDR333 },
+               [CFGIDX(X8S_X16,        X8S_X16,        X8S_X16 )] = { DDR333, DDR400 },
+               [CFGIDX(X8S_X16,        X8S_X16,        X8D     )] = { DDR200, DDR333 },
+               [CFGIDX(X8S_X16,        X8D,            X8S_X16 )] = { DDR200, DDR333 },
+               [CFGIDX(X8S_X16,        X8D,            X8D     )] = { DDR200, DDR333 },
+               [CFGIDX(X8D,            X8S_X16,        X8S_X16 )] = { DDR333, DDR333 },
+               [CFGIDX(X8D,            X8S_X16,        X8D     )] = { DDR200, DDR333 },
+               [CFGIDX(X8D,            X8D,            X8S_X16 )] = { DDR200, DDR333 },
+               [CFGIDX(X8D,            X8D,            X8D     )] = { DDR200, DDR333 }
+       };
+
+       int i, rank, width, dimmtypes[3];
+       const unsigned char *cfg;
+
+       for (i = 0; i < 3; i++) {
+               if (dimm_mask & (1 << i)) {
+                       rank = spd_read_byte(ctrl->channel0[i], 5);
+                       width = spd_read_byte(ctrl->channel0[i], 13);
+                       if (rank < 0 || width < 0) die("failed to read SPD");
+                       width &= 0x7f;
+                       /* this is my guess as to how the criteria in the table
+                        * are to be understood:
+                        */
+                       dimmtypes[i] = width >= (rank == 1 ? 8 : 16) ? X8S_X16 : X8D;
+               } else {
+                       dimmtypes[i] = EMPTY;
+               }
+       }
+       cfg = cfgtable[CFGIDX(dimmtypes[0], dimmtypes[1], dimmtypes[2])];
+       *freq_1t = cfg[0];
+       return is_cpu_c0() ? cfg[0] : cfg[1];
+
+#else /* CONFIG_CPU_AMD_SOCKET_* */
+
+/* well, there are socket 940 boards supported which obviously fail to
+ * compile with this */
+//     #error load dependent memory clock limiting is not implemented for this socket
+
+       /* see BKDG 4.1.3--if you just want to test a setup that doesn't
+        * require limiting, you may use the following code */
+
+       *freq_1t = NBCAP_MEMCLK_200MHZ;
+       return NBCAP_MEMCLK_200MHZ;
+
+#endif /* CONFIG_CPU_AMD_SOCKET_* */
+
+}
+
 static struct spd_set_memclk_result spd_set_memclk(const struct mem_controller *ctrl, long dimm_mask)
 {
-       /* Compute the minimum cycle time for these dimms */
        struct spd_set_memclk_result result;
-       unsigned min_cycle_time, min_latency, bios_cycle_time;
-       int i;
+       unsigned char cl_at_freq[NBCAP_MEMCLK_MASK + 1];
+       int dimm, freq, max_freq_bios, max_freq_dloading, max_freq_1t;
        uint32_t value;
 
-       static const int latency_indicies[] = { 26, 23, 9 };
-       static const unsigned char min_cycle_times[] = {
+       static const uint8_t spd_min_cycle_time_indices[] = { 9, 23, 25 };
+       static const unsigned char cycle_time_at_freq[] = {
                [NBCAP_MEMCLK_200MHZ] = 0x50, /* 5ns */
                [NBCAP_MEMCLK_166MHZ] = 0x60, /* 6ns */
                [NBCAP_MEMCLK_133MHZ] = 0x75, /* 7.5ns */
                [NBCAP_MEMCLK_100MHZ] = 0xa0, /* 10ns */
        };
 
-
-       value = pci_read_config32(ctrl->f3, NORTHBRIDGE_CAP);
-       min_cycle_time = min_cycle_times[(value >> NBCAP_MEMCLK_SHIFT) & NBCAP_MEMCLK_MASK];
-       bios_cycle_time = min_cycle_times[
-               read_option(CMOS_VSTART_max_mem_clock, CMOS_VLEN_max_mem_clock, 0)];
-       if (bios_cycle_time > min_cycle_time) {
-               min_cycle_time = bios_cycle_time;
-       }
-       min_latency = 2;
-
-       /* Compute the least latency with the fastest clock supported
-        * by both the memory controller and the dimms.
+       /* BEWARE that the constants for frequencies order in reverse of what
+        * would be intuitive. 200 MHz has the lowest constant, 100 MHz the
+        * highest. Thus, all comparisons and traversal directions having to
+        * do with frequencies are/have to be the opposite of what would be
+        * intuitive.
         */
-       for(i = 0; i < DIMM_SOCKETS; i++) {
-               int new_cycle_time, new_latency;
-               int index;
-               int latencies;
-               int latency;
 
-               if (!(dimm_mask & (1 << i))) {
+       /* the CLs supported by the controller: */
+       memset(cl_at_freq, 0x1c, sizeof(cl_at_freq));
+       memset(cl_at_freq, 0x00,
+               (pci_read_config32(ctrl->f3, NORTHBRIDGE_CAP) >>
+                NBCAP_MEMCLK_SHIFT) & NBCAP_MEMCLK_MASK);
+       max_freq_bios = read_option(max_mem_clock, 0);
+       if (max_freq_bios <= NBCAP_MEMCLK_100MHZ)
+               memset(cl_at_freq, 0x00, max_freq_bios);
+       for (dimm = 0; dimm < DIMM_SOCKETS; dimm++) {
+               int x,i,spd_cls,cl,spd_min_cycle_time;
+               unsigned char cl_at_freq_mask[sizeof(cl_at_freq)];
+
+               if (!(dimm_mask & (1 << dimm)))
                        continue;
-               }
-
-               /* First find the supported CAS latencies
-                * Byte 18 for DDR SDRAM is interpreted:
+               /* Byte 18 for DDR SDRAM is interpreted:
                 * bit 0 == CAS Latency = 1.0
                 * bit 1 == CAS Latency = 1.5
                 * bit 2 == CAS Latency = 2.0
                 * bit 3 == CAS Latency = 2.5
                 * bit 4 == CAS Latency = 3.0
                 * bit 5 == CAS Latency = 3.5
-                * bit 6 == TBD
+                * bit 6 == CAS Latency = 4.0
                 * bit 7 == TBD
                 */
-               new_cycle_time = 0xa0;
-               new_latency = 5;
-
-               latencies = spd_read_byte(ctrl->channel0[i], 18);
-               if (latencies <= 0) continue;
-
-               /* Compute the lowest cas latency supported */
-               latency = log2(latencies) -2;
-
-               /* Loop through and find a fast clock with a low latency */
-               for(index = 0; index < 3; index++, latency++) {
-                       int value;
-                       if ((latency < 2) || (latency > 4) ||
-                               (!(latencies & (1 << latency)))) {
+               spd_cls = spd_read_byte(ctrl->channel0[dimm], 18);
+               if (spd_cls <= 0)
+                       goto hw_error;
+               memset(cl_at_freq_mask, 0x00, sizeof(cl_at_freq_mask));
+               for (cl = 1 << log2(spd_cls), i = 0; i < 3; cl >>= 1, i++) {
+                       if (!(spd_cls & cl))
                                continue;
-                       }
-                       value = spd_read_byte(ctrl->channel0[i], latency_indicies[index]);
-                       if (value < 0) {
+                       spd_min_cycle_time = spd_read_byte(ctrl->channel0[dimm],
+                                       spd_min_cycle_time_indices[i]);
+                       if (spd_min_cycle_time < 0)
                                goto hw_error;
-                       }
-
-                       /* Only increase the latency if we decreas the clock */
-                       if ((value >= min_cycle_time) && (value < new_cycle_time)) {
-                               new_cycle_time = value;
-                               new_latency = latency;
-                       }
-               }
-               if (new_latency > 4){
-                       continue;
-               }
-               /* Does min_latency need to be increased? */
-               if (new_cycle_time > min_cycle_time) {
-                       min_cycle_time = new_cycle_time;
-               }
-               /* Does min_cycle_time need to be increased? */
-               if (new_latency > min_latency) {
-                       min_latency = new_latency;
+                       if ((!spd_min_cycle_time) || (spd_min_cycle_time & 0x0f) > 9)
+                               continue;
+                       for (x = 0; x < sizeof(cl_at_freq_mask); x++)
+                               if (cycle_time_at_freq[x] >= spd_min_cycle_time)
+                                       cl_at_freq_mask[x] |= cl;
                }
+               for (x = 0; x < sizeof(cl_at_freq_mask); x++)
+                       cl_at_freq[x] &= cl_at_freq_mask[x];
        }
-       /* Make a second pass through the dimms and disable
-        * any that cannot support the selected memclk and cas latency.
-        */
-       
-       for(i = 0; (i < 4) && (ctrl->channel0[i]); i++) {
-               int latencies;
-               int latency;
-               int index;
-               int value;
-               if (!(dimm_mask & (1 << i))) {
-                       continue;
-               }
-               latencies = spd_read_byte(ctrl->channel0[i], 18);
-               if (latencies < 0) goto hw_error;
-               if (latencies == 0) {
-                       goto dimm_err;
-               }
 
-               /* Compute the lowest cas latency supported */
-               latency = log2(latencies) -2;
+       freq = NBCAP_MEMCLK_200MHZ;
+       while (freq < sizeof(cl_at_freq) && !cl_at_freq[freq])
+               freq++;
 
-               /* Walk through searching for the selected latency */
-               for(index = 0; index < 3; index++, latency++) {
-                       if (!(latencies & (1 << latency))) {
-                               continue;
-                       }
-                       if (latency == min_latency)
-                               break;
-               }
-               /* If I can't find the latency or my index is bad error */
-               if ((latency != min_latency) || (index >= 3)) {
-                       goto dimm_err;
-               }
-               
-               /* Read the min_cycle_time for this latency */
-               value = spd_read_byte(ctrl->channel0[i], latency_indicies[index]);
-               if (value < 0) goto hw_error;
-               
-               /* All is good if the selected clock speed 
-                * is what I need or slower.
-                */
-               if (value <= min_cycle_time) {
-                       continue;
-               }
-               /* Otherwise I have an error, disable the dimm */
-       dimm_err:
-               dimm_mask = disable_dimm(ctrl, i, dimm_mask);
+       max_freq_dloading = spd_dimm_loading_socket(ctrl, dimm_mask, &max_freq_1t);
+       if (max_freq_dloading > freq) {
+               printk(BIOS_WARNING, "Memory speed reduced due to signal loading conditions\n");
+               freq = max_freq_dloading;
+               while (freq < sizeof(cl_at_freq) && !cl_at_freq[freq])
+                       freq++;
        }
-#if 0
-//down speed for full load 4 rank support
-#if K8_4RANK_DIMM_SUPPORT
-       if(dimm_mask == (3|(3<<DIMM_SOCKETS)) ) {
-               int ranks = 4;
-               for(i = 0; (i < 4) && (ctrl->channel0[i]); i++) {
-                       int val;
-                       if (!(dimm_mask & (1 << i))) {
-                               continue;
-                       }
-                       val = spd_read_byte(ctrl->channel0[i], 5);
-                       if(val!=ranks) {
-                               ranks = val;
-                               break;
-                       }
-               }
-               if(ranks==4) {
-                       if(min_cycle_time <= 0x50 ) {
-                               min_cycle_time = 0x60;
-                       }
-               }
-               
+
+       /* if the next lower frequency gives a CL at least one whole cycle
+        * shorter, select that (see end of BKDG 4.1.1.1) */
+       if (freq < sizeof(cl_at_freq)-1 && cl_at_freq[freq+1] &&
+               log2f(cl_at_freq[freq]) - log2f(cl_at_freq[freq+1]) >= 2)
+                       freq++;
+
+       if (freq == sizeof(cl_at_freq))
+               goto hw_error;
+
+#if CONFIG_CPU_AMD_SOCKET_754
+       if (freq < max_freq_1t || CONFIG_K8_FORCE_2T_DRAM_TIMING) {
+               pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW,
+                       pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW) | DCL_En2T);
        }
 #endif
-#endif
-       /* Now that I know the minimum cycle time lookup the memory parameters */
-       result.param = get_mem_param(min_cycle_time);
+
+       result.param = get_mem_param(freq);
 
        /* Update DRAM Config High with our selected memory speed */
        value = pci_read_config32(ctrl->f2, DRAM_CONFIG_HIGH);
        value &= ~(DCH_MEMCLK_MASK << DCH_MEMCLK_SHIFT);
 #if 0
        /* Improves DQS centering by correcting for case when core speed multiplier and MEMCLK speed result in odd clock divisor, by selecting the next lowest memory speed, required only at DDR400 and higher speeds with certain DIMM loadings ---- cheating???*/
-       if(!is_cpu_pre_e0()) {
-               if(min_cycle_time==0x50) {
+       if (!is_cpu_pre_e0()) {
+               if (min_cycle_time==0x50) {
                        value |= 1<<31;
                }
        }
@@ -1574,12 +1685,13 @@ static struct spd_set_memclk_result spd_set_memclk(const struct mem_controller *
        pci_write_config32(ctrl->f2, DRAM_CONFIG_HIGH, value);
 
        static const unsigned latencies[] = { DTL_CL_2, DTL_CL_2_5, DTL_CL_3 };
+
        /* Update DRAM Timing Low with our selected cas latency */
        value = pci_read_config32(ctrl->f2, DRAM_TIMING_LOW);
        value &= ~(DTL_TCL_MASK << DTL_TCL_SHIFT);
-       value |= latencies[min_latency - 2] << DTL_TCL_SHIFT;
+       value |= latencies[log2f(cl_at_freq[freq]) - 2] << DTL_TCL_SHIFT;
        pci_write_config32(ctrl->f2, DRAM_TIMING_LOW, value);
-       
+
        result.dimm_mask = dimm_mask;
        return result;
  hw_error:
@@ -1798,7 +1910,7 @@ static int update_dimm_x4(const struct mem_controller *ctrl, const struct mem_pa
 {
        uint32_t dcl;
        int value;
-#if K8_4RANK_DIMM_SUPPORT == 1
+#if CONFIG_QRANK_DIMM_SUPPORT
        int rank;
 #endif
        int dimm;
@@ -1807,16 +1919,16 @@ static int update_dimm_x4(const struct mem_controller *ctrl, const struct mem_pa
                return -1;
        }
 
-#if K8_4RANK_DIMM_SUPPORT == 1
-       rank = spd_read_byte(ctrl->channel0[i], 5);       /* number of physical banks */
+#if CONFIG_QRANK_DIMM_SUPPORT
+       rank = spd_read_byte(ctrl->channel0[i], 5);     /* number of physical banks */
        if (rank < 0) {
-               return -1;      
+               return -1;
        }
 #endif
 
        dimm = 1<<(DCL_x4DIMM_SHIFT+i);
-#if K8_4RANK_DIMM_SUPPORT == 1
-       if(rank==4) {
+#if CONFIG_QRANK_DIMM_SUPPORT
+       if (rank==4) {
                dimm |= 1<<(DCL_x4DIMM_SHIFT+i+2);
        }
 #endif
@@ -1850,7 +1962,7 @@ static int count_dimms(const struct mem_controller *ctrl)
        int dimms;
        unsigned index;
        dimms = 0;
-       for(index = 0; index < 8; index += 2) {
+       for (index = 0; index < 8; index += 2) {
                uint32_t csbase;
                csbase = pci_read_config32(ctrl->f2, (DRAM_CSBASE + (index << 2)));
                if (csbase & 1) {
@@ -1863,79 +1975,51 @@ static int count_dimms(const struct mem_controller *ctrl)
 static void set_Twtr(const struct mem_controller *ctrl, const struct mem_param *param)
 {
        uint32_t dth;
-       unsigned clocks;
-       clocks = 1; /* AMD says hard code this */
+
        dth = pci_read_config32(ctrl->f2, DRAM_TIMING_HIGH);
        dth &= ~(DTH_TWTR_MASK << DTH_TWTR_SHIFT);
-       dth |= ((clocks - DTH_TWTR_BASE) << DTH_TWTR_SHIFT);
+       dth |= ((param->dtl_twtr - DTH_TWTR_BASE) << DTH_TWTR_SHIFT);
        pci_write_config32(ctrl->f2, DRAM_TIMING_HIGH, dth);
 }
 
 static void set_Trwt(const struct mem_controller *ctrl, const struct mem_param *param)
 {
        uint32_t dth, dtl;
-       unsigned divisor;
        unsigned latency;
        unsigned clocks;
+       int lat, mtype;
 
        clocks = 0;
        dtl = pci_read_config32(ctrl->f2, DRAM_TIMING_LOW);
        latency = (dtl >> DTL_TCL_SHIFT) & DTL_TCL_MASK;
-       divisor = param->divisor;
 
        if (is_opteron(ctrl)) {
-               if (latency == DTL_CL_2) {
-                       if (divisor == ((6 << 0) + 0)) {
-                               /* 166Mhz */
-                               clocks = 3;
-                       }
-                       else if (divisor > ((6 << 0)+0)) {
-                               /* 100Mhz && 133Mhz */
-                               clocks = 2;
-                       }
-               }
-               else if (latency == DTL_CL_2_5) {
-                       clocks = 3;
-               }
-               else if (latency == DTL_CL_3) {
-                       if (divisor == ((6 << 0)+0)) {
-                               /* 166Mhz */
-                               clocks = 4;
-                       }
-                       else if (divisor > ((6 << 0)+0)) {
-                               /* 100Mhz && 133Mhz */
-                               clocks = 3;
-                       }
-               }
+               mtype = 0; /* dual channel */
+       } else if (is_registered(ctrl)) {
+               mtype = 1; /* registered 64bit interface */
+       } else {
+               mtype = 2; /* unbuffered 64bit interface */
        }
-       else /* Athlon64 */ {
-               if (is_registered(ctrl)) {
-                       if (latency == DTL_CL_2) {
-                               clocks = 2;
-                       }
-                       else if (latency == DTL_CL_2_5) {
-                               clocks = 3;
-                       }
-                       else if (latency == DTL_CL_3) {
-                               clocks = 3;
-                       }
-               }
-               else /* Unbuffered */{
-                       if (latency == DTL_CL_2) {
-                               clocks = 3;
-                       }
-                       else if (latency == DTL_CL_2_5) {
-                               clocks = 4;
-                       }
-                       else if (latency == DTL_CL_3) {
-                               clocks = 4;
-                       }
-               }
+
+       switch (latency) {
+               case DTL_CL_2:
+                       lat = 0;
+                       break;
+               case DTL_CL_2_5:
+                       lat = 1;
+                       break;
+               case DTL_CL_3:
+                       lat = 2;
+                       break;
+               default:
+                       die("Unknown LAT for Trwt");
        }
+
+       clocks = param->dtl_trwt[lat][mtype];
        if ((clocks < DTH_TRWT_MIN) || (clocks > DTH_TRWT_MAX)) {
-               die("Unknown Trwt\r\n");
+               die("Unknown Trwt\n");
        }
-       
+
        dth = pci_read_config32(ctrl->f2, DRAM_TIMING_HIGH);
        dth &= ~(DTH_TRWT_MASK << DTH_TRWT_SHIFT);
        dth |= ((clocks - DTH_TRWT_BASE) << DTH_TRWT_SHIFT);
@@ -1963,83 +2047,38 @@ static void set_Twcl(const struct mem_controller *ctrl, const struct mem_param *
 static void set_read_preamble(const struct mem_controller *ctrl, const struct mem_param *param)
 {
        uint32_t dch;
-       unsigned divisor;
        unsigned rdpreamble;
-       divisor = param->divisor;
-       dch = pci_read_config32(ctrl->f2, DRAM_CONFIG_HIGH);
-       dch &= ~(DCH_RDPREAMBLE_MASK << DCH_RDPREAMBLE_SHIFT);
-       rdpreamble = 0;
-       if (is_registered(ctrl)) {
-               if (divisor == ((10 << 1)+0)) {
-                       /* 100Mhz, 9ns */
-                       rdpreamble = ((9 << 1)+ 0);
-               }
-               else if (divisor == ((7 << 1)+1)) {
-                       /* 133Mhz, 8ns */
-                       rdpreamble = ((8 << 1)+0);
-               }
-               else if (divisor == ((6 << 1)+0)) {
-                       /* 166Mhz, 7.5ns */
-                       rdpreamble = ((7 << 1)+1);
-               }
-               else if (divisor == ((5 << 1)+0)) {
-                       /* 200Mhz,  7ns */
-                       rdpreamble = ((7 << 1)+0);
+       int slots, i;
+
+       slots = 0;
+
+       for (i = 0; i < 4; i++) {
+               if (ctrl->channel0[i]) {
+                       slots += 1;
                }
        }
-       else {
-               int slots;
-               int i;
-               slots = 0;
-               for(i = 0; i < 4; i++) {
-                       if (ctrl->channel0[i]) {
-                               slots += 1;
-                       }
-               }
-               if (divisor == ((10 << 1)+0)) {
-                       /* 100Mhz */
-                       if (slots <= 2) {
-                               /* 9ns */
-                               rdpreamble = ((9 << 1)+0);
-                       } else {
-                               /* 14ns */
-                               rdpreamble = ((14 << 1)+0);
-                       }
-               }
-               else if (divisor == ((7 << 1)+1)) {
-                       /* 133Mhz */
-                       if (slots <= 2) {
-                               /* 7ns */
-                               rdpreamble = ((7 << 1)+0);
-                       } else {
-                               /* 11 ns */
-                               rdpreamble = ((11 << 1)+0);
-                       }
-               }
-               else if (divisor == ((6 << 1)+0)) {
-                       /* 166Mhz */
-                       if (slots <= 2) {
-                               /* 6ns */
-                               rdpreamble = ((7 << 1)+0);
-                       } else {
-                               /* 9ns */
-                               rdpreamble = ((9 << 1)+0);
-                       }
-               }
-               else if (divisor == ((5 << 1)+0)) {
-                       /* 200Mhz */
-                       if (slots <= 2) {
-                               /* 5ns */
-                               rdpreamble = ((5 << 1)+0);
-                       } else {
-                               /* 7ns */
-                               rdpreamble = ((7 << 1)+0);
-                       }
-               }
+
+       /* map to index to param.rdpreamble array */
+       if (is_registered(ctrl)) {
+               i = 0;
+       } else if (slots < 3) {
+               i = 1;
+       } else if (slots == 3) {
+               i = 2;
+       } else if (slots == 4) {
+               i = 3;
+       } else {
+               die("Unknown rdpreamble for this nr of slots");
        }
+
+       dch = pci_read_config32(ctrl->f2, DRAM_CONFIG_HIGH);
+       dch &= ~(DCH_RDPREAMBLE_MASK << DCH_RDPREAMBLE_SHIFT);
+       rdpreamble = param->rdpreamble[i];
+
        if ((rdpreamble < DCH_RDPREAMBLE_MIN) || (rdpreamble > DCH_RDPREAMBLE_MAX)) {
                die("Unknown rdpreamble");
        }
+
        dch |= (rdpreamble - DCH_RDPREAMBLE_BASE) << DCH_RDPREAMBLE_SHIFT;
        pci_write_config32(ctrl->f2, DRAM_CONFIG_HIGH, dch);
 }
@@ -2059,7 +2098,7 @@ static void set_max_async_latency(const struct mem_controller *ctrl, const struc
                if (dimms == 4) {
                        /* 9ns */
                        async_lat = 9;
-               } 
+               }
                else {
                        /* 8ns */
                        async_lat = 8;
@@ -2096,9 +2135,9 @@ static void set_idle_cycle_limit(const struct mem_controller *ctrl, const struct
 static long spd_set_dram_timing(const struct mem_controller *ctrl, const struct mem_param *param, long dimm_mask)
 {
        int i;
-       
+
        init_Tref(ctrl, param);
-       for(i = 0; i < DIMM_SOCKETS; i++) {
+       for (i = 0; i < DIMM_SOCKETS; i++) {
                int rc;
                if (!(dimm_mask & (1 << i))) {
                        continue;
@@ -2113,7 +2152,7 @@ static long spd_set_dram_timing(const struct mem_controller *ctrl, const struct
 
                /* DRAM Timing High Register */
                if ((rc = update_dimm_Tref(ctrl, param, i)) <= 0) goto dimm_err;
-       
+
 
                /* DRAM Config Low */
                if ((rc = update_dimm_x4 (ctrl, param, i)) <= 0) goto dimm_err;
@@ -2140,14 +2179,18 @@ static long spd_set_dram_timing(const struct mem_controller *ctrl, const struct
        return dimm_mask;
 }
 
-static void sdram_set_spd_registers(const struct mem_controller *ctrl) 
+#if CONFIG_RAMINIT_SYSINFO
+static void sdram_set_spd_registers(const struct mem_controller *ctrl, struct sys_info *sysinfo)
+#else
+static void sdram_set_spd_registers(const struct mem_controller *ctrl)
+#endif
 {
        struct spd_set_memclk_result result;
        const struct mem_param *param;
        long dimm_mask;
 #if 1
        if (!controller_present(ctrl)) {
-//             print_debug("No memory controller present\r\n");
+//             printk(BIOS_DEBUG, "No memory controller present\n");
                return;
        }
 #endif
@@ -2155,22 +2198,22 @@ static void sdram_set_spd_registers(const struct mem_controller *ctrl)
        activate_spd_rom(ctrl);
        dimm_mask = spd_detect_dimms(ctrl);
        if (!(dimm_mask & ((1 << DIMM_SOCKETS) - 1))) {
-               print_debug("No memory for this cpu\r\n");
+               printk(BIOS_DEBUG, "No memory for this cpu\n");
                return;
        }
-       dimm_mask = spd_enable_2channels(ctrl, dimm_mask);        
-       if (dimm_mask < 0) 
+       dimm_mask = spd_enable_2channels(ctrl, dimm_mask);
+       if (dimm_mask < 0)
                goto hw_spd_err;
-       dimm_mask = spd_set_ram_size(ctrl , dimm_mask);           
-       if (dimm_mask < 0) 
+       dimm_mask = spd_set_ram_size(ctrl , dimm_mask);
+       if (dimm_mask < 0)
                goto hw_spd_err;
-       dimm_mask = spd_handle_unbuffered_dimms(ctrl, dimm_mask); 
-       if (dimm_mask < 0) 
+       dimm_mask = spd_handle_unbuffered_dimms(ctrl, dimm_mask);
+       if (dimm_mask < 0)
                goto hw_spd_err;
        result = spd_set_memclk(ctrl, dimm_mask);
        param     = result.param;
        dimm_mask = result.dimm_mask;
-       if (dimm_mask < 0) 
+       if (dimm_mask < 0)
                goto hw_spd_err;
        dimm_mask = spd_set_dram_timing(ctrl, param , dimm_mask);
        if (dimm_mask < 0)
@@ -2179,23 +2222,145 @@ static void sdram_set_spd_registers(const struct mem_controller *ctrl)
        return;
  hw_spd_err:
        /* Unrecoverable error reading SPD data */
-       print_err("SPD error - reset\r\n");
+       printk(BIOS_ERR, "SPD error - reset\n");
        hard_reset();
        return;
 }
 
+#if CONFIG_HW_MEM_HOLE_SIZEK != 0
+static uint32_t hoist_memory(int controllers, const struct mem_controller *ctrl,unsigned hole_startk, int i)
+{
+       int ii;
+       uint32_t carry_over;
+       device_t dev;
+       uint32_t base, limit;
+       uint32_t basek;
+       uint32_t hoist;
+       int j;
+
+       carry_over = (4*1024*1024) - hole_startk;
+
+       for (ii=controllers - 1;ii>i;ii--) {
+               base  = pci_read_config32(ctrl[0].f1, 0x40 + (ii << 3));
+               if ((base & ((1<<1)|(1<<0))) != ((1<<1)|(1<<0))) {
+                       continue;
+               }
+               limit = pci_read_config32(ctrl[0].f1, 0x44 + (ii << 3));
+               for (j = 0; j < controllers; j++) {
+                       pci_write_config32(ctrl[j].f1, 0x44 + (ii << 3), limit + (carry_over << 2));
+                       pci_write_config32(ctrl[j].f1, 0x40 + (ii << 3), base + (carry_over << 2));
+               }
+       }
+       limit = pci_read_config32(ctrl[0].f1, 0x44 + (i << 3));
+       for (j = 0; j < controllers; j++) {
+               pci_write_config32(ctrl[j].f1, 0x44 + (i << 3), limit + (carry_over << 2));
+       }
+       dev = ctrl[i].f1;
+       base  = pci_read_config32(dev, 0x40 + (i << 3));
+       basek  = (base & 0xffff0000) >> 2;
+       if (basek == hole_startk) {
+               //don't need set memhole here, because hole off set will be 0, overflow
+               //so need to change base reg instead, new basek will be 4*1024*1024
+               base &= 0x0000ffff;
+               base |= (4*1024*1024)<<2;
+               for (j = 0; j < controllers; j++) {
+                       pci_write_config32(ctrl[j].f1, 0x40 + (i<<3), base);
+               }
+       }
+       else {
+               hoist = /* hole start address */
+                       ((hole_startk << 10) & 0xff000000) +
+                       /* hole address to memory controller address */
+                       (((basek + carry_over) >> 6) & 0x0000ff00) +
+                       /* enable */
+                       1;
+               pci_write_config32(dev, 0xf0, hoist);
+       }
+
+       return carry_over;
+}
+
+static void set_hw_mem_hole(int controllers, const struct mem_controller *ctrl)
+{
+
+       uint32_t hole_startk;
+       int i;
+
+       hole_startk = 4*1024*1024 - CONFIG_HW_MEM_HOLE_SIZEK;
+
+       printk(BIOS_SPEW, "Handling memory hole at 0x%08x (default)\n", hole_startk);
+#if CONFIG_HW_MEM_HOLE_SIZE_AUTO_INC == 1
+       /* We need to double check if hole_startk is valid.
+        * If it is equal to the dram base address in K (base_k),
+        * we need to decrease it.
+        */
+       uint32_t basek_pri;
+       for (i=0; i<controllers; i++) {
+                       uint32_t base;
+                       unsigned base_k;
+                       base  = pci_read_config32(ctrl[0].f1, 0x40 + (i << 3));
+                       if ((base & ((1<<1)|(1<<0))) != ((1<<1)|(1<<0))) {
+                               continue;
+                       }
+                       base_k = (base & 0xffff0000) >> 2;
+                       if (base_k == hole_startk) {
+                               /* decrease memory hole startk to make sure it is
+                                * in the middle of the previous node
+                                */
+                               hole_startk -= (base_k - basek_pri)>>1;
+                               break; /* only one hole */
+                       }
+                       basek_pri = base_k;
+       }
+
+       printk(BIOS_SPEW, "Handling memory hole at 0x%08x (adjusted)\n", hole_startk);
+#endif
+       /* Find node number that needs the memory hole configured */
+       for (i=0; i<controllers; i++) {
+                       uint32_t base, limit;
+                       unsigned base_k, limit_k;
+                       base  = pci_read_config32(ctrl[0].f1, 0x40 + (i << 3));
+                       if ((base & ((1<<1)|(1<<0))) != ((1<<1)|(1<<0))) {
+                               continue;
+                       }
+                       limit = pci_read_config32(ctrl[0].f1, 0x44 + (i << 3));
+                       base_k = (base & 0xffff0000) >> 2;
+                       limit_k = ((limit + 0x00010000) & 0xffff0000) >> 2;
+                       if ((base_k <= hole_startk) && (limit_k > hole_startk)) {
+                               unsigned end_k;
+                               hoist_memory(controllers, ctrl, hole_startk, i);
+                               end_k = memory_end_k(ctrl, controllers);
+                               set_top_mem(end_k, hole_startk);
+                               break; /* only one hole */
+                       }
+       }
+
+}
+
+#endif
+
 #define TIMEOUT_LOOPS 300000
+#if CONFIG_RAMINIT_SYSINFO
+static void sdram_enable(int controllers, const struct mem_controller *ctrl, struct sys_info *sysinfo)
+#else
 static void sdram_enable(int controllers, const struct mem_controller *ctrl)
+#endif
 {
        int i;
+       u32 whatWait = 0;
+#if CONFIG_HAVE_ACPI_RESUME == 1
+       int suspend = acpi_is_wakeup_early();
+#else
+       int suspend = 0;
+#endif
 
        /* Error if I don't have memory */
        if (memory_end_k(ctrl, controllers) == 0) {
-               die("No memory\r\n");
+               die("No memory\n");
        }
 
        /* Before enabling memory start the memory clocks */
-       for(i = 0; i < controllers; i++) {
+       for (i = 0; i < controllers; i++) {
                uint32_t dch;
                if (!controller_present(ctrl + i))
                        continue;
@@ -2213,10 +2378,11 @@ static void sdram_enable(int controllers, const struct mem_controller *ctrl)
                }
        }
 
+       /* We need to wait a minimum of 20 MEMCLKS to enable the InitDram */
        /* And if necessary toggle the the reset on the dimms by hand */
        memreset(controllers, ctrl);
 
-       for(i = 0; i < controllers; i++) {
+       for (i = 0; i < controllers; i++) {
                uint32_t dcl, dch;
                if (!controller_present(ctrl + i))
                        continue;
@@ -2230,7 +2396,7 @@ static void sdram_enable(int controllers, const struct mem_controller *ctrl)
                dcl = pci_read_config32(ctrl[i].f2, DRAM_CONFIG_LOW);
                if (dcl & DCL_DimmEccEn) {
                        uint32_t mnc;
-                       print_spew("ECC enabled\r\n");
+                       printk(BIOS_SPEW, "ECC enabled\n");
                        mnc = pci_read_config32(ctrl[i].f3, MCA_NB_CONFIG);
                        mnc |= MNC_ECC_EN;
                        if (dcl & DCL_128BitEn) {
@@ -2238,17 +2404,35 @@ static void sdram_enable(int controllers, const struct mem_controller *ctrl)
                        }
                        pci_write_config32(ctrl[i].f3, MCA_NB_CONFIG, mnc);
                }
-               dcl |= DCL_DisDqsHys;
-               pci_write_config32(ctrl[i].f2, DRAM_CONFIG_LOW, dcl);
+
+               if (!suspend) {
+                       dcl |= DCL_DisDqsHys;
+                       pci_write_config32(ctrl[i].f2, DRAM_CONFIG_LOW, dcl);
+               }
                dcl &= ~DCL_DisDqsHys;
                dcl &= ~DCL_DLL_Disable;
                dcl &= ~DCL_D_DRV;
                dcl &= ~DCL_QFC_EN;
-               dcl |= DCL_DramInit;
-               pci_write_config32(ctrl[i].f2, DRAM_CONFIG_LOW, dcl);
 
+               if (suspend) {
+                       enable_lapic();
+                       init_timer();
+                       dcl |= (DCL_ESR | DCL_SRS);
+                       /* Handle errata 85 Insufficient Delay Between MEMCLK Startup
+                          and CKE Assertion During Resume From S3 */
+                       udelay(10); /* for unregistered */
+                       if (is_registered(&ctrl[i])) {
+                               udelay(100); /* 110us for registered (we wait 10us already) */
+                       }
+                       whatWait = DCL_ESR;
+               } else {
+                       dcl |= DCL_DramInit;
+                       whatWait = DCL_DramInit;
+               }
+               pci_write_config32(ctrl[i].f2, DRAM_CONFIG_LOW, dcl);
        }
-       for(i = 0; i < controllers; i++) {
+
+       for (i = 0; i < controllers; i++) {
                uint32_t dcl, dch;
                if (!controller_present(ctrl + i))
                        continue;
@@ -2258,47 +2442,43 @@ static void sdram_enable(int controllers, const struct mem_controller *ctrl)
                        continue;
                }
 
-               print_debug("Initializing memory: ");
+               printk(BIOS_DEBUG, "Initializing memory: ");
                int loops = 0;
                do {
                        dcl = pci_read_config32(ctrl[i].f2, DRAM_CONFIG_LOW);
-                       loops += 1;
+                       loops++;
                        if ((loops & 1023) == 0) {
-                               print_debug(".");
+                               printk(BIOS_DEBUG, ".");
                        }
-               } while(((dcl & DCL_DramInit) != 0) && (loops < TIMEOUT_LOOPS));
+               } while(((dcl & whatWait) != 0) && (loops < TIMEOUT_LOOPS));
                if (loops >= TIMEOUT_LOOPS) {
-                       print_debug(" failed\r\n");
+                       printk(BIOS_DEBUG, " failed\n");
                        continue;
                }
+
                if (!is_cpu_pre_c0()) {
                        /* Wait until it is safe to touch memory */
+#if 0
+                       /* the registers are marked read-only but code zeros them */
                        dcl &= ~(DCL_MemClrStatus | DCL_DramEnable);
                        pci_write_config32(ctrl[i].f2, DRAM_CONFIG_LOW, dcl);
+#endif
                        do {
                                dcl = pci_read_config32(ctrl[i].f2, DRAM_CONFIG_LOW);
-                       } while(((dcl & DCL_MemClrStatus) == 0) || ((dcl & DCL_DramEnable) == 0) );
+                       } while(((dcl & DCL_MemClrStatus) == 0) || ((dcl & DCL_DramEnable) == 0) ||
+                                       ((dcl & DCL_SRS)));
                }
 
-                       // init e0 mem hole here
-#if K8_E0_MEM_HOLE_SIZEK != 0
-               if (!is_cpu_pre_e0()) {
-                        uint32_t base, limit;
-                        unsigned base_k, limit_k;
-                        base  = pci_read_config32(ctrl->f1, 0x40 + (i << 3));
-                        limit = pci_read_config32(ctrl->f1, 0x44 + (i << 3));
-                        base_k = (base & 0xffff0000) >> 2;
-                        limit_k = ((limit + 0x00010000) & 0xffff0000) >> 2;
-                        if ((base_k <= K8_E0_MEM_HOLE_BASEK) && (limit_k > K8_E0_MEM_HOLE_BASEK)) {
-                                set_e0_mem_hole(ctrl+i, base_k);
-                        }
-                }
-        
-#endif  
-
-               print_debug(" done\r\n");
+               printk(BIOS_DEBUG, " done\n");
        }
 
+#if CONFIG_HW_MEM_HOLE_SIZEK != 0
+        // init hw mem hole here
+       /* DramHoleValid bit only can be set after MemClrStatus is set by Hardware */
+       if (!is_cpu_pre_e0())
+               set_hw_mem_hole(controllers, ctrl);
+#endif
+
        //FIXME add enable node interleaving here -- yhlu
        /*needed?
                1. check how many nodes we have , if not all has ram installed get out
@@ -2308,19 +2488,31 @@ static void sdram_enable(int controllers, const struct mem_controller *ctrl)
                5. for node interleaving we need to set mem hole to every node ( need recalcute hole offset in f0 for every node)
        */
 
-#if CONFIG_DCACHE_RAM == 0
-       /* Make certain the first 1M of memory is intialized */
-       print_debug("Clearing initial memory region: ");
-
-       /* Use write combine caching while we setup the  first 1M */
-       cache_lbmem(MTRR_TYPE_WRCOMB);
+}
 
-       /* clear memory 1meg */
-       clear_memory((void *)0, CONFIG_LB_MEM_TOPK << 10);
+static void set_sysinfo_in_ram(unsigned val)
+{
+}
 
-       /* The first 1M is now setup, use it */
-       cache_lbmem(MTRR_TYPE_WRBACK);
-       
-       print_debug(" done\r\n");
-#endif
+void fill_mem_ctrl(int controllers, struct mem_controller *ctrl_a,
+                         const uint16_t *spd_addr)
+{
+       int i;
+       int j;
+       struct mem_controller *ctrl;
+       for (i=0;i<controllers; i++) {
+               ctrl = &ctrl_a[i];
+               ctrl->node_id = i;
+               ctrl->f0 = PCI_DEV(0, 0x18+i, 0);
+               ctrl->f1 = PCI_DEV(0, 0x18+i, 1);
+               ctrl->f2 = PCI_DEV(0, 0x18+i, 2);
+               ctrl->f3 = PCI_DEV(0, 0x18+i, 3);
+
+               if (spd_addr == (void *)0) continue;
+
+               for (j=0;j<DIMM_SOCKETS;j++) {
+                       ctrl->channel0[j] = spd_addr[(i*2+0)*DIMM_SOCKETS + j];
+                       ctrl->channel1[j] = spd_addr[(i*2+1)*DIMM_SOCKETS + j];
+               }
+       }
 }