Move C labels to start-of-line
[coreboot.git] / src / northbridge / amd / amdk8 / raminit.c
index 437ef2655da77b1db50cd424a3b679efa2d64a93..9cb7c60004b7d1eddefab539af5c68b695c8ec5b 100644 (file)
-#include <cpu/k8/mtrr.h>
+/*     This should be done by Eric
+       2004.11 yhlu add 4 rank DIMM support
+       2004.12 yhlu add D0 support
+       2005.02 yhlu add E0 memory hole support
+*/
+
+#include <cpu/x86/cache.h>
+#include <cpu/x86/mtrr.h>
+#include <stdlib.h>
+#include <reset.h>
 #include "raminit.h"
+#include "amdk8.h"
+#if CONFIG_HAVE_OPTION_TABLE
+#include "option_table.h"
+#endif
+
+#if (CONFIG_RAMTOP & (CONFIG_RAMTOP -1)) != 0
+# error "CONFIG_RAMTOP must be a power of 2"
+#endif
 
-#define ENABLE_IOMMU 1
-
-/* Function 2 */
-#define DRAM_CSBASE       0x40
-#define DRAM_CSMASK       0x60
-#define DRAM_BANK_ADDR_MAP 0x80
-#define DRAM_TIMING_LOW           0x88
-#define         DTL_TCL_SHIFT     0
-#define         DTL_TCL_MASK      0x7
-#define          DTL_CL_2         1
-#define          DTL_CL_3         2
-#define          DTL_CL_2_5       5
-#define         DTL_TRC_SHIFT     4
-#define         DTL_TRC_MASK      0xf
-#define          DTL_TRC_BASE     7
-#define          DTL_TRC_MIN      7
-#define          DTL_TRC_MAX      22
-#define         DTL_TRFC_SHIFT    8
-#define         DTL_TRFC_MASK     0xf
-#define          DTL_TRFC_BASE    9
-#define          DTL_TRFC_MIN     9
-#define          DTL_TRFC_MAX     24
-#define         DTL_TRCD_SHIFT    12
-#define         DTL_TRCD_MASK     0x7
-#define          DTL_TRCD_BASE    0
-#define          DTL_TRCD_MIN     2
-#define          DTL_TRCD_MAX     6
-#define         DTL_TRRD_SHIFT    16
-#define         DTL_TRRD_MASK     0x7
-#define          DTL_TRRD_BASE    0
-#define          DTL_TRRD_MIN     2
-#define          DTL_TRRD_MAX     4
-#define         DTL_TRAS_SHIFT    20
-#define         DTL_TRAS_MASK     0xf
-#define          DTL_TRAS_BASE    0
-#define          DTL_TRAS_MIN     5
-#define          DTL_TRAS_MAX     15
-#define         DTL_TRP_SHIFT     24
-#define         DTL_TRP_MASK      0x7
-#define          DTL_TRP_BASE     0
-#define          DTL_TRP_MIN      2
-#define          DTL_TRP_MAX      6
-#define         DTL_TWR_SHIFT     28
-#define         DTL_TWR_MASK      0x1
-#define          DTL_TWR_BASE     2
-#define          DTL_TWR_MIN      2
-#define          DTL_TWR_MAX      3
-#define DRAM_TIMING_HIGH   0x8c
-#define         DTH_TWTR_SHIFT    0
-#define         DTH_TWTR_MASK     0x1
-#define          DTH_TWTR_BASE    1
-#define          DTH_TWTR_MIN     1
-#define          DTH_TWTR_MAX     2
-#define         DTH_TRWT_SHIFT    4
-#define         DTH_TRWT_MASK     0x7
-#define          DTH_TRWT_BASE    1
-#define          DTH_TRWT_MIN     1
-#define          DTH_TRWT_MAX     6
-#define         DTH_TREF_SHIFT    8
-#define         DTH_TREF_MASK     0x1f
-#define          DTH_TREF_100MHZ_4K 0x00
-#define          DTH_TREF_133MHZ_4K 0x01
-#define          DTH_TREF_166MHZ_4K 0x02
-#define          DTH_TREF_200MHZ_4K 0x03
-#define          DTH_TREF_100MHZ_8K 0x08
-#define          DTH_TREF_133MHZ_8K 0x09
-#define          DTH_TREF_166MHZ_8K 0x0A
-#define          DTH_TREF_200MHZ_8K 0x0B
-#define         DTH_TWCL_SHIFT     20
-#define         DTH_TWCL_MASK      0x7
-#define          DTH_TWCL_BASE     1
-#define          DTH_TWCL_MIN      1
-#define          DTH_TWCL_MAX      2
-#define DRAM_CONFIG_LOW           0x90
-#define         DCL_DLL_Disable   (1<<0)
-#define         DCL_D_DRV         (1<<1)
-#define         DCL_QFC_EN        (1<<2)
-#define         DCL_DisDqsHys     (1<<3)
-#define         DCL_DramInit      (1<<8)
-#define         DCL_DramEnable    (1<<10)
-#define         DCL_MemClrStatus  (1<<11)
-#define         DCL_ESR           (1<<12)
-#define         DCL_SRS           (1<<13)
-#define         DCL_128BitEn      (1<<16)
-#define         DCL_DimmEccEn     (1<<17)
-#define         DCL_UnBufDimm     (1<<18)
-#define         DCL_32ByteEn      (1<<19)
-#define         DCL_x4DIMM_SHIFT  20
-#define DRAM_CONFIG_HIGH   0x94
-#define         DCH_ASYNC_LAT_SHIFT  0
-#define         DCH_ASYNC_LAT_MASK   0xf
-#define          DCH_ASYNC_LAT_BASE  0
-#define          DCH_ASYNC_LAT_MIN   0
-#define          DCH_ASYNC_LAT_MAX   15
-#define         DCH_RDPREAMBLE_SHIFT 8
-#define         DCH_RDPREAMBLE_MASK  0xf
-#define          DCH_RDPREAMBLE_BASE ((2<<1)+0) /* 2.0 ns */
-#define          DCH_RDPREAMBLE_MIN  ((2<<1)+0) /* 2.0 ns */
-#define          DCH_RDPREAMBLE_MAX  ((9<<1)+1) /* 9.5 ns */
-#define         DCH_IDLE_LIMIT_SHIFT 16
-#define         DCH_IDLE_LIMIT_MASK  0x7
-#define          DCH_IDLE_LIMIT_0    0
-#define          DCH_IDLE_LIMIT_4    1
-#define          DCH_IDLE_LIMIT_8    2
-#define          DCH_IDLE_LIMIT_16   3
-#define          DCH_IDLE_LIMIT_32   4
-#define          DCH_IDLE_LIMIT_64   5
-#define          DCH_IDLE_LIMIT_128  6
-#define          DCH_IDLE_LIMIT_256  7
-#define         DCH_DYN_IDLE_CTR_EN (1 << 19)
-#define         DCH_MEMCLK_SHIFT     20
-#define         DCH_MEMCLK_MASK      0x7
-#define          DCH_MEMCLK_100MHZ   0
-#define          DCH_MEMCLK_133MHZ   2
-#define          DCH_MEMCLK_166MHZ   5
-#define          DCH_MEMCLK_200MHZ   7
-#define         DCH_MEMCLK_VALID     (1 << 25)
-#define         DCH_MEMCLK_EN0       (1 << 26) 
-#define         DCH_MEMCLK_EN1       (1 << 27) 
-#define         DCH_MEMCLK_EN2       (1 << 28) 
-#define         DCH_MEMCLK_EN3       (1 << 29) 
-
-/* Function 3 */
-#define MCA_NB_CONFIG      0x44
-#define   MNC_ECC_EN       (1 << 22)
-#define   MNC_CHIPKILL_EN  (1 << 23)
-#define SCRUB_CONTROL     0x58
-#define          SCRUB_NONE        0
-#define          SCRUB_40ns        1
-#define          SCRUB_80ns        2
-#define          SCRUB_160ns       3
-#define          SCRUB_320ns       4
-#define          SCRUB_640ns       5
-#define          SCRUB_1_28us      6
-#define          SCRUB_2_56us      7
-#define          SCRUB_5_12us      8
-#define          SCRUB_10_2us      9
-#define          SCRUB_20_5us     10
-#define          SCRUB_41_0us     11
-#define          SCRUB_81_9us     12
-#define          SCRUB_163_8us    13
-#define          SCRUB_327_7us    14
-#define          SCRUB_655_4us    15
-#define          SCRUB_1_31ms     16
-#define          SCRUB_2_62ms     17
-#define          SCRUB_5_24ms     18 
-#define          SCRUB_10_49ms    19
-#define          SCRUB_20_97ms    20
-#define          SCRUB_42ms       21
-#define          SCRUB_84ms       22
-#define         SC_DRAM_SCRUB_RATE_SHFIT  0
-#define         SC_DRAM_SCRUB_RATE_MASK   0x1f
-#define         SC_L2_SCRUB_RATE_SHIFT    8
-#define         SC_L2_SCRUB_RATE_MASK     0x1f
-#define         SC_L1D_SCRUB_RATE_SHIFT   16
-#define         SC_L1D_SCRUB_RATE_MASK    0x1f
-#define SCRUB_ADDR_LOW    0x5C
-#define SCRUB_ADDR_HIGH           0x60
-#define NORTHBRIDGE_CAP           0xE8
-#define         NBCAP_128Bit         0x0001
-#define         NBCAP_MP             0x0002
-#define         NBCAP_BIG_MP         0x0004
-#define         NBCAP_ECC            0x0004
-#define         NBCAP_CHIPKILL_ECC   0x0010
-#define         NBCAP_MEMCLK_SHIFT   5
-#define         NBCAP_MEMCLK_MASK    3
-#define         NBCAP_MEMCLK_100MHZ  3
-#define         NBCAP_MEMCLK_133MHZ  2
-#define         NBCAP_MEMCLK_166MHZ  1
-#define         NBCAP_MEMCLK_200MHZ  0
-#define         NBCAP_MEMCTRL        0x0100
-
-
-static void setup_resource_map(const unsigned int *register_values, int max)
+void setup_resource_map(const unsigned int *register_values, int max)
 {
        int i;
-       print_debug("setting up resource map....\r\n");
-       for(i = 0; i < max; i += 3) {
+//     printk(BIOS_DEBUG, "setting up resource map....");
+       for (i = 0; i < max; i += 3) {
                device_t dev;
                unsigned where;
                unsigned long reg;
-#if 0
-               print_debug_hex32(register_values[i]);
-               print_debug(" <-");
-               print_debug_hex32(register_values[i+2]);
-               print_debug("\r\n");
-#endif
-               dev = register_values[i] & ~0xff;
-               where = register_values[i] & 0xff;
+               dev = register_values[i] & ~0xfff;
+               where = register_values[i] & 0xfff;
                reg = pci_read_config32(dev, where);
                reg &= register_values[i+1];
                reg |= register_values[i+2];
                pci_write_config32(dev, where, reg);
-#if 0
-               reg = pci_read_config32(register_values[i]);
-               reg &= register_values[i+1];
-               reg |= register_values[i+2] & ~register_values[i+1];
-               pci_write_config32(register_values[i], reg);
-#endif
        }
-       print_debug("done.\r\n");
+//     printk(BIOS_DEBUG, "done.\n");
 }
 
-static void setup_default_resource_map(void)
+static int controller_present(const struct mem_controller *ctrl)
 {
-       static const unsigned int register_values[] = {
-       /* Careful set limit registers before base registers which contain the enables */
-       /* DRAM Limit i Registers
-        * F1:0x44 i = 0
-        * F1:0x4C i = 1
-        * F1:0x54 i = 2
-        * F1:0x5C i = 3
-        * F1:0x64 i = 4
-        * F1:0x6C i = 5
-        * F1:0x74 i = 6
-        * F1:0x7C i = 7
-        * [ 2: 0] Destination Node ID
-        *         000 = Node 0
-        *         001 = Node 1
-        *         010 = Node 2
-        *         011 = Node 3
-        *         100 = Node 4
-        *         101 = Node 5
-        *         110 = Node 6
-        *         111 = Node 7
-        * [ 7: 3] Reserved
-        * [10: 8] Interleave select
-        *         specifies the values of A[14:12] to use with interleave enable.
-        * [15:11] Reserved
-        * [31:16] DRAM Limit Address i Bits 39-24
-        *         This field defines the upper address bits of a 40 bit  address
-        *         that define the end of the DRAM region.
-        */
-       PCI_ADDR(0, 0x18, 1, 0x44), 0x0000f8f8, 0x00000000,
-       PCI_ADDR(0, 0x18, 1, 0x4C), 0x0000f8f8, 0x00000001,
-       PCI_ADDR(0, 0x18, 1, 0x54), 0x0000f8f8, 0x00000002,
-       PCI_ADDR(0, 0x18, 1, 0x5C), 0x0000f8f8, 0x00000003,
-       PCI_ADDR(0, 0x18, 1, 0x64), 0x0000f8f8, 0x00000004,
-       PCI_ADDR(0, 0x18, 1, 0x6C), 0x0000f8f8, 0x00000005,
-       PCI_ADDR(0, 0x18, 1, 0x74), 0x0000f8f8, 0x00000006,
-       PCI_ADDR(0, 0x18, 1, 0x7C), 0x0000f8f8, 0x00000007,
-       /* DRAM Base i Registers
-        * F1:0x40 i = 0
-        * F1:0x48 i = 1
-        * F1:0x50 i = 2
-        * F1:0x58 i = 3
-        * F1:0x60 i = 4
-        * F1:0x68 i = 5
-        * F1:0x70 i = 6
-        * F1:0x78 i = 7
-        * [ 0: 0] Read Enable
-        *         0 = Reads Disabled
-        *         1 = Reads Enabled
-        * [ 1: 1] Write Enable
-        *         0 = Writes Disabled
-        *         1 = Writes Enabled
-        * [ 7: 2] Reserved
-        * [10: 8] Interleave Enable
-        *         000 = No interleave
-        *         001 = Interleave on A[12] (2 nodes)
-        *         010 = reserved
-        *         011 = Interleave on A[12] and A[14] (4 nodes)
-        *         100 = reserved
-        *         101 = reserved
-        *         110 = reserved
-        *         111 = Interleve on A[12] and A[13] and A[14] (8 nodes)
-        * [15:11] Reserved
-        * [13:16] DRAM Base Address i Bits 39-24
-        *         This field defines the upper address bits of a 40-bit address
-        *         that define the start of the DRAM region.
-        */
-       PCI_ADDR(0, 0x18, 1, 0x40), 0x0000f8fc, 0x00000000,
-       PCI_ADDR(0, 0x18, 1, 0x48), 0x0000f8fc, 0x00000000,
-       PCI_ADDR(0, 0x18, 1, 0x50), 0x0000f8fc, 0x00000000,
-       PCI_ADDR(0, 0x18, 1, 0x58), 0x0000f8fc, 0x00000000,
-       PCI_ADDR(0, 0x18, 1, 0x60), 0x0000f8fc, 0x00000000,
-       PCI_ADDR(0, 0x18, 1, 0x68), 0x0000f8fc, 0x00000000,
-       PCI_ADDR(0, 0x18, 1, 0x70), 0x0000f8fc, 0x00000000,
-       PCI_ADDR(0, 0x18, 1, 0x78), 0x0000f8fc, 0x00000000,
-
-       /* Memory-Mapped I/O Limit i Registers
-        * F1:0x84 i = 0
-        * F1:0x8C i = 1
-        * F1:0x94 i = 2
-        * F1:0x9C i = 3
-        * F1:0xA4 i = 4
-        * F1:0xAC i = 5
-        * F1:0xB4 i = 6
-        * F1:0xBC i = 7
-        * [ 2: 0] Destination Node ID
-        *         000 = Node 0
-        *         001 = Node 1
-        *         010 = Node 2
-        *         011 = Node 3
-        *         100 = Node 4
-        *         101 = Node 5
-        *         110 = Node 6
-        *         111 = Node 7
-        * [ 3: 3] Reserved
-        * [ 5: 4] Destination Link ID
-        *         00 = Link 0
-        *         01 = Link 1
-        *         10 = Link 2
-        *         11 = Reserved
-        * [ 6: 6] Reserved
-        * [ 7: 7] Non-Posted
-        *         0 = CPU writes may be posted
-        *         1 = CPU writes must be non-posted
-        * [31: 8] Memory-Mapped I/O Limit Address i (39-16)
-        *         This field defines the upp adddress bits of a 40-bit address that
-        *         defines the end of a memory-mapped I/O region n
-        */
-       PCI_ADDR(0, 0x18, 1, 0x84), 0x00000048, 0x00000000,
-       PCI_ADDR(0, 0x18, 1, 0x8C), 0x00000048, 0x00000000,
-       PCI_ADDR(0, 0x18, 1, 0x94), 0x00000048, 0x00000000,
-       PCI_ADDR(0, 0x18, 1, 0x9C), 0x00000048, 0x00000000,
-       PCI_ADDR(0, 0x18, 1, 0xA4), 0x00000048, 0x00000000,
-       PCI_ADDR(0, 0x18, 1, 0xAC), 0x00000048, 0x00000000,
-       PCI_ADDR(0, 0x18, 1, 0xB4), 0x00000048, 0x00000000,
-       PCI_ADDR(0, 0x18, 1, 0xBC), 0x00000048, 0x00ffff00,
-
-       /* Memory-Mapped I/O Base i Registers
-        * F1:0x80 i = 0
-        * F1:0x88 i = 1
-        * F1:0x90 i = 2
-        * F1:0x98 i = 3
-        * F1:0xA0 i = 4
-        * F1:0xA8 i = 5
-        * F1:0xB0 i = 6
-        * F1:0xB8 i = 7
-        * [ 0: 0] Read Enable
-        *         0 = Reads disabled
-        *         1 = Reads Enabled
-        * [ 1: 1] Write Enable
-        *         0 = Writes disabled
-        *         1 = Writes Enabled
-        * [ 2: 2] Cpu Disable
-        *         0 = Cpu can use this I/O range
-        *         1 = Cpu requests do not use this I/O range
-        * [ 3: 3] Lock
-        *         0 = base/limit registers i are read/write
-        *         1 = base/limit registers i are read-only
-        * [ 7: 4] Reserved
-        * [31: 8] Memory-Mapped I/O Base Address i (39-16)
-        *         This field defines the upper address bits of a 40bit address 
-        *         that defines the start of memory-mapped I/O region i
-        */
-       PCI_ADDR(0, 0x18, 1, 0x80), 0x000000f0, 0x00000000,
-       PCI_ADDR(0, 0x18, 1, 0x88), 0x000000f0, 0x00000000,
-       PCI_ADDR(0, 0x18, 1, 0x90), 0x000000f0, 0x00000000,
-       PCI_ADDR(0, 0x18, 1, 0x98), 0x000000f0, 0x00000000,
-       PCI_ADDR(0, 0x18, 1, 0xA0), 0x000000f0, 0x00000000,
-       PCI_ADDR(0, 0x18, 1, 0xA8), 0x000000f0, 0x00000000,
-       PCI_ADDR(0, 0x18, 1, 0xB0), 0x000000f0, 0x00000000,
-       PCI_ADDR(0, 0x18, 1, 0xB8), 0x000000f0, 0x00fc0003,
-
-       /* PCI I/O Limit i Registers
-        * F1:0xC4 i = 0
-        * F1:0xCC i = 1
-        * F1:0xD4 i = 2
-        * F1:0xDC i = 3
-        * [ 2: 0] Destination Node ID
-        *         000 = Node 0
-        *         001 = Node 1
-        *         010 = Node 2
-        *         011 = Node 3
-        *         100 = Node 4
-        *         101 = Node 5
-        *         110 = Node 6
-        *         111 = Node 7
-        * [ 3: 3] Reserved
-        * [ 5: 4] Destination Link ID
-        *         00 = Link 0
-        *         01 = Link 1
-        *         10 = Link 2
-        *         11 = reserved
-        * [11: 6] Reserved
-        * [24:12] PCI I/O Limit Address i
-        *         This field defines the end of PCI I/O region n
-        * [31:25] Reserved
-        */
-       PCI_ADDR(0, 0x18, 1, 0xC4), 0xFE000FC8, 0x01fff000,
-       PCI_ADDR(0, 0x18, 1, 0xCC), 0xFE000FC8, 0x00000000,
-       PCI_ADDR(0, 0x18, 1, 0xD4), 0xFE000FC8, 0x00000000,
-       PCI_ADDR(0, 0x18, 1, 0xDC), 0xFE000FC8, 0x00000000,
-
-       /* PCI I/O Base i Registers
-        * F1:0xC0 i = 0
-        * F1:0xC8 i = 1
-        * F1:0xD0 i = 2
-        * F1:0xD8 i = 3
-        * [ 0: 0] Read Enable
-        *         0 = Reads Disabled
-        *         1 = Reads Enabled
-        * [ 1: 1] Write Enable
-        *         0 = Writes Disabled
-        *         1 = Writes Enabled
-        * [ 3: 2] Reserved
-        * [ 4: 4] VGA Enable
-        *         0 = VGA matches Disabled
-        *         1 = matches all address < 64K and where A[9:0] is in the 
-        *             range 3B0-3BB or 3C0-3DF independen of the base & limit registers
-        * [ 5: 5] ISA Enable
-        *         0 = ISA matches Disabled
-        *         1 = Blocks address < 64K and in the last 768 bytes of eack 1K block
-        *             from matching agains this base/limit pair
-        * [11: 6] Reserved
-        * [24:12] PCI I/O Base i
-        *         This field defines the start of PCI I/O region n 
-        * [31:25] Reserved
-        */
-       PCI_ADDR(0, 0x18, 1, 0xC0), 0xFE000FCC, 0x00000003,
-       PCI_ADDR(0, 0x18, 1, 0xC8), 0xFE000FCC, 0x00000000,
-       PCI_ADDR(0, 0x18, 1, 0xD0), 0xFE000FCC, 0x00000000,
-       PCI_ADDR(0, 0x18, 1, 0xD8), 0xFE000FCC, 0x00000000,
-
-       /* Config Base and Limit i Registers
-        * F1:0xE0 i = 0
-        * F1:0xE4 i = 1
-        * F1:0xE8 i = 2
-        * F1:0xEC i = 3
-        * [ 0: 0] Read Enable
-        *         0 = Reads Disabled
-        *         1 = Reads Enabled
-        * [ 1: 1] Write Enable
-        *         0 = Writes Disabled
-        *         1 = Writes Enabled
-        * [ 2: 2] Device Number Compare Enable
-        *         0 = The ranges are based on bus number
-        *         1 = The ranges are ranges of devices on bus 0
-        * [ 3: 3] Reserved
-        * [ 6: 4] Destination Node
-        *         000 = Node 0
-        *         001 = Node 1
-        *         010 = Node 2
-        *         011 = Node 3
-        *         100 = Node 4
-        *         101 = Node 5
-        *         110 = Node 6
-        *         111 = Node 7
-        * [ 7: 7] Reserved
-        * [ 9: 8] Destination Link
-        *         00 = Link 0
-        *         01 = Link 1
-        *         10 = Link 2
-        *         11 - Reserved
-        * [15:10] Reserved
-        * [23:16] Bus Number Base i
-        *         This field defines the lowest bus number in configuration region i
-        * [31:24] Bus Number Limit i
-        *         This field defines the highest bus number in configuration regin i
-        */
-       PCI_ADDR(0, 0x18, 1, 0xE0), 0x0000FC88, 0xff000003,
-       PCI_ADDR(0, 0x18, 1, 0xE4), 0x0000FC88, 0x00000000,
-       PCI_ADDR(0, 0x18, 1, 0xE8), 0x0000FC88, 0x00000000,
-       PCI_ADDR(0, 0x18, 1, 0xEC), 0x0000FC88, 0x00000000,
-       };
-       int max;
-       max = sizeof(register_values)/sizeof(register_values[0]);
-       setup_resource_map(register_values, max);
+       return pci_read_config32(ctrl->f0, 0) == 0x11001022;
 }
 
+#if CONFIG_RAMINIT_SYSINFO
+static void sdram_set_registers(const struct mem_controller *ctrl, struct sys_info *sysinfo)
+#else
 static void sdram_set_registers(const struct mem_controller *ctrl)
+#endif
 {
        static const unsigned int register_values[] = {
 
-       /* Careful set limit registers before base registers which contain the enables */
+       /* Careful set limit registers before base registers which
+          contain the enables */
        /* DRAM Limit i Registers
         * F1:0x44 i = 0
         * F1:0x4C i = 1
@@ -587,7 +170,7 @@ static void sdram_set_registers(const struct mem_controller *ctrl)
         * [29:21] Address Mask (33-25)
         *         The bits with an address mask of 1 are excluded from address comparison
         * [31:30] Reserved
-        * 
+        *
         */
        PCI_ADDR(0, 0x18, 2, 0x60), 0xC01f01ff, 0x00000000,
        PCI_ADDR(0, 0x18, 2, 0x64), 0xC01f01ff, 0x00000000,
@@ -600,7 +183,7 @@ static void sdram_set_registers(const struct mem_controller *ctrl)
        /* DRAM Bank Address Mapping Register
         * F2:0x80
         * Specify the memory module size
-        * [ 2: 0] CS1/0 
+        * [ 2: 0] CS1/0
         * [ 6: 4] CS3/2
         * [10: 8] CS5/4
         * [14:12] CS7/6
@@ -611,7 +194,7 @@ static void sdram_set_registers(const struct mem_controller *ctrl)
         *         100 = 512Mbyte (Rows = 13 & Col = 11)|(Rows = 14 & Col = 10)
         *         101 = 1Gbyte   (Rows = 14 & Col = 11)|(Rows = 13 & Col = 12)
         *         110 = 2Gbyte   (Rows = 14 & Col = 12)
-        *         111 = reserved 
+        *         111 = reserved
         * [ 3: 3] Reserved
         * [ 7: 7] Reserved
         * [11:11] Reserved
@@ -727,8 +310,8 @@ static void sdram_set_registers(const struct mem_controller *ctrl)
         *         0 = Disabled
         *         1 = Enabled
         * [ 3: 3] Disable DQS Hystersis  (FIXME handle this one carefully)
-        *         0 = Enable DQS input filter 
-        *         1 = Disable DQS input filtering 
+        *         0 = Enable DQS input filter
+        *         1 = Disable DQS input filtering
         * [ 7: 4] Reserved
         * [ 8: 8] DRAM_Init
         *         0 = Initialization done or not yet started.
@@ -785,12 +368,12 @@ static void sdram_set_registers(const struct mem_controller *ctrl)
         *         111 = Oldest entry in DCQ can be bypassed 7 times
         * [31:28] Reserved
         */
-       PCI_ADDR(0, 0x18, 2, 0x90), 0xf0000000, 
-       (4 << 25)|(0 << 24)| 
-       (0 << 23)|(0 << 22)|(0 << 21)|(0 << 20)| 
-       (1 << 19)|(0 << 18)|(1 << 17)|(0 << 16)| 
-       (2 << 14)|(0 << 13)|(0 << 12)| 
-       (0 << 11)|(0 << 10)|(0 << 9)|(0 << 8)| 
+       PCI_ADDR(0, 0x18, 2, 0x90), 0xf0000000,
+       (4 << 25)|(0 << 24)|
+       (0 << 23)|(0 << 22)|(0 << 21)|(0 << 20)|
+       (1 << 19)|(0 << 18)|(1 << 17)|(0 << 16)|
+       (2 << 14)|(0 << 13)|(0 << 12)|
+       (0 << 11)|(0 << 10)|(0 << 9)|(0 << 8)|
        (0 << 3) |(0 << 1) |(0 << 0),
        /* DRAM Config High Register
         * F2:0x94
@@ -875,6 +458,14 @@ static void sdram_set_registers(const struct mem_controller *ctrl)
         * [31:26] Reserved
         */
        PCI_ADDR(0, 0x18, 2, 0x98), 0xfc00ffff, 0x00000000,
+       /* MCA NB Status Low reg */
+       PCI_ADDR(0, 0x18, 3, 0x48), 0x00f00000, 0x00000000,
+       /* MCA NB Status high reg */
+       PCI_ADDR(0, 0x18, 3, 0x4c), 0x01801e8c, 0x00000000,
+       /* MCA NB address Low reg */
+       PCI_ADDR(0, 0x18, 3, 0x50), 0x00000007, 0x00000000,
+       /* MCA NB address high reg */
+       PCI_ADDR(0, 0x18, 3, 0x54), 0xffffff00, 0x00000000,
        /* DRAM Scrub Control Register
         * F3:0x58
         * [ 4: 0] DRAM Scrube Rate
@@ -924,47 +515,44 @@ static void sdram_set_registers(const struct mem_controller *ctrl)
         * [31: 8] Reserved
         */
        PCI_ADDR(0, 0x18, 3, 0x60), 0xffffff00, 0x00000000,
-
-#if ENABLE_IOMMU != 0
-       /* BY LYH  add IOMMU 64M APERTURE */
-       PCI_ADDR(0, 0x18, 3, 0x94), 0xffff8000, 0x00000f70,
-       PCI_ADDR(0, 0x18, 3, 0x90), 0xffffff80, 0x00000002,
-       PCI_ADDR(0, 0x18, 3, 0x98), 0x0000000f, 0x00068300,
-#endif
        };
        int i;
        int max;
-       print_debug("setting up CPU");
-       print_debug_hex8(ctrl->node_id);
-       print_debug(" northbridge registers\r\n");
-       max = sizeof(register_values)/sizeof(register_values[0]);
-       for(i = 0; i < max; i += 3) {
+
+       if (!controller_present(ctrl)) {
+//             printk(BIOS_DEBUG, "No memory controller present\n");
+               return;
+       }
+       printk(BIOS_SPEW, "setting up CPU%02x northbridge registers\n", ctrl->node_id);
+       max = ARRAY_SIZE(register_values);
+       for (i = 0; i < max; i += 3) {
                device_t dev;
                unsigned where;
                unsigned long reg;
-#if 0
-               print_debug_hex32(register_values[i]);
-               print_debug(" <-");
-               print_debug_hex32(register_values[i+2]);
-               print_debug("\r\n");
-#endif
-               dev = (register_values[i] & ~0xff) - PCI_DEV(0, 0x18, 0) + ctrl->f0;
-               where = register_values[i] & 0xff;
+               dev = (register_values[i] & ~0xfff) - PCI_DEV(0, 0x18, 0) + ctrl->f0;
+               where = register_values[i] & 0xfff;
                reg = pci_read_config32(dev, where);
                reg &= register_values[i+1];
                reg |= register_values[i+2];
                pci_write_config32(dev, where, reg);
-#if 0
-
-               reg = pci_read_config32(register_values[i]);
-               reg &= register_values[i+1];
-               reg |= register_values[i+2];
-               pci_write_config32(register_values[i], reg);
-#endif
        }
-       print_debug("done.\r\n");
+       printk(BIOS_SPEW, "done.\n");
 }
 
+static void hw_enable_ecc(const struct mem_controller *ctrl)
+{
+       uint32_t dcl, nbcap;
+       nbcap = pci_read_config32(ctrl->f3, NORTHBRIDGE_CAP);
+       dcl = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
+       dcl &= ~DCL_DimmEccEn;
+       if (nbcap & NBCAP_ECC) {
+               dcl |= DCL_DimmEccEn;
+       }
+       if (read_option(ECC_memory, 1) == 0) {
+               dcl &= ~DCL_DimmEccEn;
+       }
+       pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dcl);
+}
 
 static int is_dual_channel(const struct mem_controller *ctrl)
 {
@@ -975,11 +563,13 @@ static int is_dual_channel(const struct mem_controller *ctrl)
 
 static int is_opteron(const struct mem_controller *ctrl)
 {
-       /* Test to see if I am an Opteron.  
-        * FIXME Testing dual channel capability is correct for now
-        * but a beter test is probably required.
+       /* Test to see if I am an Opteron.  Socket 939 based Athlon64
+        * have dual channel capability, too, so we need a better test
+        * for Opterons.
+        * However, all code uses is_opteron() to find out whether to
+        * use dual channel, so if we really check for opteron here, we
+        * need to fix up all code using this function, too.
         */
-#warning "FIXME implement a better test for opterons"
        uint32_t nbcap;
        nbcap = pci_read_config32(ctrl->f3, NORTHBRIDGE_CAP);
        return !!(nbcap & NBCAP_128Bit);
@@ -993,12 +583,17 @@ static int is_registered(const struct mem_controller *ctrl)
         */
        uint32_t dcl;
        dcl = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
-       return !(dcl & DCL_UnBufDimm);
+       return !(dcl & DCL_UnBuffDimm);
 }
 
 struct dimm_size {
        unsigned long side1;
        unsigned long side2;
+       unsigned long rows;
+       unsigned long col;
+#if CONFIG_QRANK_DIMM_SUPPORT
+       unsigned long rank;
+#endif
 };
 
 static struct dimm_size spd_get_dimm_size(unsigned device)
@@ -1008,89 +603,111 @@ static struct dimm_size spd_get_dimm_size(unsigned device)
        int value, low;
        sz.side1 = 0;
        sz.side2 = 0;
+       sz.rows = 0;
+       sz.col = 0;
+#if CONFIG_QRANK_DIMM_SUPPORT
+       sz.rank = 0;
+#endif
 
        /* Note it might be easier to use byte 31 here, it has the DIMM size as
         * a multiple of 4MB.  The way we do it now we can size both
         * sides of an assymetric dimm.
         */
        value = spd_read_byte(device, 3);       /* rows */
-       if (value < 0) goto out;
+       if (value < 0) goto hw_err;
+       if ((value & 0xf) == 0) goto val_err;
        sz.side1 += value & 0xf;
+       sz.rows = value & 0xf;
 
        value = spd_read_byte(device, 4);       /* columns */
-       if (value < 0) goto out;
+       if (value < 0) goto hw_err;
+       if ((value & 0xf) == 0) goto val_err;
        sz.side1 += value & 0xf;
+       sz.col = value & 0xf;
 
        value = spd_read_byte(device, 17);      /* banks */
-       if (value < 0) goto out;
+       if (value < 0) goto hw_err;
+       if ((value & 0xff) == 0) goto val_err;
        sz.side1 += log2(value & 0xff);
 
        /* Get the module data width and convert it to a power of two */
        value = spd_read_byte(device, 7);       /* (high byte) */
-       if (value < 0) goto out;
+       if (value < 0) goto hw_err;
        value &= 0xff;
        value <<= 8;
-       
+
        low = spd_read_byte(device, 6); /* (low byte) */
-       if (low < 0) goto out;
+       if (low < 0) goto hw_err;
        value = value | (low & 0xff);
+       if ((value != 72) && (value != 64)) goto val_err;
        sz.side1 += log2(value);
 
        /* side 2 */
        value = spd_read_byte(device, 5);       /* number of physical banks */
-       if (value <= 1) goto out;
+       if (value < 0) goto hw_err;
+       if (value == 1) goto out;
+       if ((value != 2) && (value != 4 )) {
+               goto val_err;
+       }
+#if CONFIG_QRANK_DIMM_SUPPORT
+       sz.rank = value;
+#endif
 
        /* Start with the symmetrical case */
        sz.side2 = sz.side1;
 
        value = spd_read_byte(device, 3);       /* rows */
-       if (value < 0) goto out;
+       if (value < 0) goto hw_err;
        if ((value & 0xf0) == 0) goto out;      /* If symmetrical we are done */
        sz.side2 -= (value & 0x0f);             /* Subtract out rows on side 1 */
        sz.side2 += ((value >> 4) & 0x0f);      /* Add in rows on side 2 */
 
        value = spd_read_byte(device, 4);       /* columns */
-       if (value < 0) goto out;
+       if (value < 0) goto hw_err;
+       if ((value & 0xff) == 0) goto val_err;
        sz.side2 -= (value & 0x0f);             /* Subtract out columns on side 1 */
        sz.side2 += ((value >> 4) & 0x0f);      /* Add in columsn on side 2 */
 
- out:
+       goto out;
+
+ val_err:
+       die("Bad SPD value\n");
+       /* If an hw_error occurs report that I have no memory */
+hw_err:
+       sz.side1 = 0;
+       sz.side2 = 0;
+       sz.rows = 0;
+       sz.col = 0;
+#if CONFIG_QRANK_DIMM_SUPPORT
+       sz.rank = 0;
+#endif
+out:
        return sz;
 }
 
+
 static void set_dimm_size(const struct mem_controller *ctrl, struct dimm_size sz, unsigned index)
 {
-       uint32_t base0, base1, map;
+       uint32_t base0, base1;
        uint32_t dch;
 
-#if 0
-       print_debug("set_dimm_size: (");
-       print_debug_hex32(sz.side1);
-       print_debug_char(',');
-       print_debug_hex32(sz.side2);
-       print_debug_char(',');
-       print_debug_hex32(index);
-       print_debug(")\r\n");
-#endif
        if (sz.side1 != sz.side2) {
                sz.side2 = 0;
        }
-       map = pci_read_config32(ctrl->f2, DRAM_BANK_ADDR_MAP);
-       map &= ~(0xf << (index + 4));
 
        /* For each base register.
         * Place the dimm size in 32 MB quantities in the bits 31 - 21.
         * The initialize dimm size is in bits.
         * Set the base enable bit0.
         */
-       
+
        base0 = base1 = 0;
 
        /* Make certain side1 of the dimm is at least 32MB */
        if (sz.side1 >= (25 +3)) {
-               map |= (sz.side1 - (25 + 3)) << (index *4);
                base0 = (1 << ((sz.side1 - (25 + 3)) + 21)) | 1;
        }
+
        /* Make certain side2 of the dimm is at least 32MB */
        if (sz.side2 >= (25 + 3)) {
                base1 = (1 << ((sz.side2 - (25 + 3)) + 21)) | 1;
@@ -1109,25 +726,87 @@ static void set_dimm_size(const struct mem_controller *ctrl, struct dimm_size sz
        /* Set the appropriate DIMM base address register */
        pci_write_config32(ctrl->f2, DRAM_CSBASE + (((index << 1)+0)<<2), base0);
        pci_write_config32(ctrl->f2, DRAM_CSBASE + (((index << 1)+1)<<2), base1);
-       pci_write_config32(ctrl->f2, DRAM_BANK_ADDR_MAP, map);
-       
+#if CONFIG_QRANK_DIMM_SUPPORT
+       if (sz.rank == 4) {
+               pci_write_config32(ctrl->f2, DRAM_CSBASE + (((index << 1)+4)<<2), base0);
+               pci_write_config32(ctrl->f2, DRAM_CSBASE + (((index << 1)+5)<<2), base1);
+       }
+#endif
+
        /* Enable the memory clocks for this DIMM */
        if (base0) {
                dch = pci_read_config32(ctrl->f2, DRAM_CONFIG_HIGH);
                dch |= DCH_MEMCLK_EN0 << index;
+#if CONFIG_QRANK_DIMM_SUPPORT
+               if (sz.rank == 4) {
+                       dch |= DCH_MEMCLK_EN0 << (index + 2);
+               }
+#endif
                pci_write_config32(ctrl->f2, DRAM_CONFIG_HIGH, dch);
        }
 }
 
-static void spd_set_ram_size(const struct mem_controller *ctrl)
+static void set_dimm_map(const struct mem_controller *ctrl, struct dimm_size sz, unsigned index)
+{
+       static const unsigned cs_map_aa[] = {
+               /* (row=12, col=8)(14, 12) ---> (0, 0) (2, 4) */
+               0, 1, 3, 6, 0,
+               0, 2, 4, 7, 9,
+               0, 0, 5, 8,10,
+       };
+
+       uint32_t map;
+
+       map = pci_read_config32(ctrl->f2, DRAM_BANK_ADDR_MAP);
+       map &= ~(0xf << (index * 4));
+#if CONFIG_QRANK_DIMM_SUPPORT
+       if (sz.rank == 4) {
+               map &= ~(0xf << ( (index + 2) * 4));
+       }
+#endif
+
+
+       /* Make certain side1 of the dimm is at least 32MB */
+       if (sz.side1 >= (25 +3)) {
+               if (is_cpu_pre_d0()) {
+                       map |= (sz.side1 - (25 + 3)) << (index *4);
+#if CONFIG_QRANK_DIMM_SUPPORT
+                       if (sz.rank == 4) {
+                               map |= (sz.side1 - (25 + 3)) << ( (index + 2) * 4);
+                       }
+#endif
+               }
+               else {
+                       map |= cs_map_aa[(sz.rows - 12) * 5 + (sz.col - 8) ] << (index*4);
+#if CONFIG_QRANK_DIMM_SUPPORT
+                       if (sz.rank == 4) {
+                               map |=  cs_map_aa[(sz.rows - 12) * 5 + (sz.col - 8) ] << ( (index + 2) * 4);
+                       }
+#endif
+               }
+       }
+
+       pci_write_config32(ctrl->f2, DRAM_BANK_ADDR_MAP, map);
+
+}
+
+static long spd_set_ram_size(const struct mem_controller *ctrl, long dimm_mask)
 {
        int i;
-       
-       for(i = 0; (i < 4) && (ctrl->channel0[i]); i++) {
+
+       for (i = 0; i < DIMM_SOCKETS; i++) {
                struct dimm_size sz;
+               if (!(dimm_mask & (1 << i))) {
+                       continue;
+               }
                sz = spd_get_dimm_size(ctrl->channel0[i]);
+               if (sz.side1 == 0) {
+                       return -1; /* Report SPD error */
+               }
                set_dimm_size(ctrl, sz, i);
+               set_dimm_map (ctrl, sz, i);
        }
+       return dimm_mask;
 }
 
 static void route_dram_accesses(const struct mem_controller *ctrl,
@@ -1153,43 +832,46 @@ static void route_dram_accesses(const struct mem_controller *ctrl,
 
        limit_reg = 0x44 + index;
        base_reg = 0x40 + index;
-       for(device = PCI_DEV(0, 0x18, 1); device <= PCI_DEV(0, 0x1f, 1); device += PCI_DEV(0, 1, 0)) {
+       for (device = PCI_DEV(0, 0x18, 1); device <= PCI_DEV(0, 0x1f, 1); device += PCI_DEV(0, 1, 0)) {
                pci_write_config32(device, limit_reg, limit);
                pci_write_config32(device, base_reg, base);
        }
 }
 
-static void set_top_mem(unsigned tom_k)
+static void set_top_mem(unsigned tom_k, unsigned hole_startk)
 {
        /* Error if I don't have memory */
        if (!tom_k) {
-               set_bios_reset();
-               print_debug("No memory - reset");
-               /* enable cf9 */
-               pci_write_config8(PCI_DEV(0, 0x04, 3), 0x41, 0xf1);
-               /* reset */
-               outb(0x0e, 0x0cf9);
+               die("No memory?");
        }
 
-#if 1
        /* Report the amount of memory. */
-       print_debug("RAM: 0x");
-       print_debug_hex32(tom_k);
-       print_debug(" KB\r\n");
-#endif
+       printk(BIOS_DEBUG, "RAM end at 0x%08x kB\n", tom_k);
 
        /* Now set top of memory */
        msr_t msr;
-       msr.lo = (tom_k & 0x003fffff) << 10;
-       msr.hi = (tom_k & 0xffc00000) >> 22;
-       wrmsr(TOP_MEM2, msr);
+       if (tom_k > (4*1024*1024)) {
+               printk(BIOS_SPEW, "Handling memory mapped above 4 GB\n");
+               printk(BIOS_SPEW, "Upper RAM end at 0x%08x kB\n", tom_k);
+               msr.lo = (tom_k & 0x003fffff) << 10;
+               msr.hi = (tom_k & 0xffc00000) >> 22;
+               wrmsr(TOP_MEM2, msr);
+               printk(BIOS_SPEW, "Correcting memory amount mapped below 4 GB\n");
+       }
 
        /* Leave a 64M hole between TOP_MEM and TOP_MEM2
         * so I can see my rom chip and other I/O devices.
         */
        if (tom_k >= 0x003f0000) {
+#if CONFIG_HW_MEM_HOLE_SIZEK != 0
+               if (hole_startk != 0) {
+                       tom_k = hole_startk;
+               } else
+#endif
                tom_k = 0x3f0000;
+               printk(BIOS_SPEW, "Adjusting lower RAM end\n");
        }
+       printk(BIOS_SPEW, "Lower RAM end at 0x%08x kB\n", tom_k);
        msr.lo = (tom_k & 0x003fffff) << 10;
        msr.hi = (tom_k & 0xffc00000) >> 22;
        wrmsr(TOP_MEM, msr);
@@ -1198,20 +880,37 @@ static void set_top_mem(unsigned tom_k)
 static unsigned long interleave_chip_selects(const struct mem_controller *ctrl)
 {
        /* 35 - 25 */
-       static const uint32_t csbase_low[] = { 
-       /* 32MB */      (1 << (13 - 4)),
-       /* 64MB */      (1 << (14 - 4)),
-       /* 128MB */     (1 << (14 - 4)), 
-       /* 256MB */     (1 << (15 - 4)),
-       /* 512MB */     (1 << (15 - 4)),
-       /* 1GB */       (1 << (16 - 4)),
-       /* 2GB */       (1 << (16 - 4)), 
+       static const uint8_t csbase_low_shift[] = {
+       /* 32MB */      (13 - 4),
+       /* 64MB */      (14 - 4),
+       /* 128MB */     (14 - 4),
+       /* 256MB */     (15 - 4),
+       /* 512MB */     (15 - 4),
+       /* 1GB */       (16 - 4),
+       /* 2GB */       (16 - 4),
+       };
+
+       static const uint8_t csbase_low_d0_shift[] = {
+       /* 32MB */      (13 - 4),
+       /* 64MB */      (14 - 4),
+       /* 128MB */     (14 - 4),
+       /* 128MB */     (15 - 4),
+       /* 256MB */     (15 - 4),
+       /* 512MB */     (15 - 4),
+       /* 256MB */     (16 - 4),
+       /* 512MB */     (16 - 4),
+       /* 1GB */       (16 - 4),
+       /* 1GB */       (17 - 4),
+       /* 2GB */       (17 - 4),
        };
+
+       /* cs_base_high is not changed */
+
        uint32_t csbase_inc;
        int chip_selects, index;
        int bits;
-       int dual_channel;
        unsigned common_size;
+       unsigned common_cs_mode;
        uint32_t csbase, csmask;
 
        /* See if all of the memory chip selects are the same size
@@ -1219,12 +918,14 @@ static unsigned long interleave_chip_selects(const struct mem_controller *ctrl)
         */
        chip_selects = 0;
        common_size = 0;
-       for(index = 0; index < 8; index++) {
+       common_cs_mode = 0;
+       for (index = 0; index < 8; index++) {
                unsigned size;
+               unsigned cs_mode;
                uint32_t value;
-               
+
                value = pci_read_config32(ctrl->f2, DRAM_CSBASE + (index << 2));
-               
+
                /* Is it enabled? */
                if (!(value & 1)) {
                        continue;
@@ -1238,34 +939,58 @@ static unsigned long interleave_chip_selects(const struct mem_controller *ctrl)
                if (common_size != size) {
                        return 0;
                }
+
+               value = pci_read_config32(ctrl->f2, DRAM_BANK_ADDR_MAP);
+               cs_mode =( value >> ((index>>1)*4)) & 0xf;
+               if (cs_mode == 0 ) continue;
+               if (common_cs_mode == 0) {
+                       common_cs_mode = cs_mode;
+               }
+               /* The cs_mode differed fail */
+               if (common_cs_mode != cs_mode) {
+                       return 0;
+               }
        }
+
        /* Chip selects can only be interleaved when there is
         * more than one and their is a power of two of them.
         */
        bits = log2(chip_selects);
        if (((1 << bits) != chip_selects) || (bits < 1) || (bits > 3)) {
                return 0;
-               
-       }
-       /* Also we run out of address mask bits if we try and interleave 8 4GB dimms */
-       if ((bits == 3) && (common_size == (1 << (32 - 3)))) {
-               print_debug("8 4GB chip selects cannot be interleaved\r\n");
-               return 0;
        }
+
        /* Find the bits of csbase that we need to interleave on */
-       if (is_dual_channel(ctrl)) {
-               csbase_inc = csbase_low[log2(common_size) - 1] << 1;
-       } else {
-               csbase_inc = csbase_low[log2(common_size)];
+       if (is_cpu_pre_d0()){
+               csbase_inc = 1 << csbase_low_shift[common_cs_mode];
+               if (is_dual_channel(ctrl)) {
+               /* Also we run out of address mask bits if we try and interleave 8 4GB dimms */
+                       if ((bits == 3) && (common_size == (1 << (32 - 3)))) {
+//                                     printk(BIOS_DEBUG, "8 4GB chip selects cannot be interleaved\n");
+                               return 0;
+                       }
+                       csbase_inc <<=1;
+               }
        }
-       /* Compute the initial values for csbase and csbask. 
+       else {
+               csbase_inc = 1 << csbase_low_d0_shift[common_cs_mode];
+               if (is_dual_channel(ctrl)) {
+                       if ( (bits==3) && (common_cs_mode > 8)) {
+//                             printk(BIOS_DEBUG, "8 cs_mode>8 chip selects cannot be interleaved\n");
+                               return 0;
+                       }
+                       csbase_inc <<=1;
+               }
+       }
+
+       /* Compute the initial values for csbase and csbask.
         * In csbase just set the enable bit and the base to zero.
         * In csmask set the mask bits for the size and page level interleave.
         */
        csbase = 0 | 1;
        csmask = (((common_size  << bits) - 1) << 21);
        csmask |= 0xfe00 & ~((csbase_inc << bits) - csbase_inc);
-       for(index = 0; index < 8; index++) {
+       for (index = 0; index < 8; index++) {
                uint32_t value;
 
                value = pci_read_config32(ctrl->f2, DRAM_CSBASE + (index << 2));
@@ -1277,10 +1002,9 @@ static unsigned long interleave_chip_selects(const struct mem_controller *ctrl)
                pci_write_config32(ctrl->f2, DRAM_CSMASK + (index << 2), csmask);
                csbase += csbase_inc;
        }
-       
-#if 1
-       print_debug("Interleaved\r\n");
-#endif 
+
+       printk(BIOS_SPEW, "Interleaved\n");
+
        /* Return the memory size in K */
        return common_size << (15 + bits);
 }
@@ -1288,17 +1012,17 @@ static unsigned long interleave_chip_selects(const struct mem_controller *ctrl)
 static unsigned long order_chip_selects(const struct mem_controller *ctrl)
 {
        unsigned long tom;
-       
+
        /* Remember which registers we have used in the high 8 bits of tom */
        tom = 0;
-       for(;;) {
-               /* Find the largest remaining canidate */
-               unsigned index, canidate;
+       for (;;) {
+               /* Find the largest remaining candidate */
+               unsigned index, candidate;
                uint32_t csbase, csmask;
                unsigned size;
                csbase = 0;
-               canidate = 0;
-               for(index = 0; index < 8; index++) {
+               candidate = 0;
+               for (index = 0; index < 8; index++) {
                        uint32_t value;
                        value = pci_read_config32(ctrl->f2, DRAM_CSBASE + (index << 2));
 
@@ -1306,21 +1030,22 @@ static unsigned long order_chip_selects(const struct mem_controller *ctrl)
                        if (!(value & 1)) {
                                continue;
                        }
-                       
+
                        /* Is it greater? */
                        if (value <= csbase) {
                                continue;
                        }
-                       
+
                        /* Has it already been selected */
                        if (tom & (1 << (index + 24))) {
                                continue;
                        }
-                       /* I have a new canidate */
+                       /* I have a new candidate */
                        csbase = value;
-                       canidate = index;
+                       candidate = index;
                }
-               /* See if I have found a new canidate */
+
+               /* See if I have found a new candidate */
                if (csbase == 0) {
                        break;
                }
@@ -1329,7 +1054,7 @@ static unsigned long order_chip_selects(const struct mem_controller *ctrl)
                size = csbase >> 21;
 
                /* Remember I have used this register */
-               tom |= (1 << (canidate + 24));
+               tom |= (1 << (candidate + 24));
 
                /* Recompute the cs base register value */
                csbase = (tom << 21) | 1;
@@ -1340,30 +1065,24 @@ static unsigned long order_chip_selects(const struct mem_controller *ctrl)
                /* Compute the memory mask */
                csmask = ((size -1) << 21);
                csmask |= 0xfe00;               /* For now don't optimize */
-#warning "Don't forget to optimize the DIMM size"
 
                /* Write the new base register */
-               pci_write_config32(ctrl->f2, DRAM_CSBASE + (canidate << 2), csbase);
+               pci_write_config32(ctrl->f2, DRAM_CSBASE + (candidate << 2), csbase);
                /* Write the new mask register */
-               pci_write_config32(ctrl->f2, DRAM_CSMASK + (canidate << 2), csmask);
-               
+               pci_write_config32(ctrl->f2, DRAM_CSMASK + (candidate << 2), csmask);
+
        }
        /* Return the memory size in K */
        return (tom & ~0xff000000) << 15;
 }
 
-static void order_dimms(const struct mem_controller *ctrl)
+static unsigned long memory_end_k(const struct mem_controller *ctrl, int max_node_id)
 {
-       unsigned long tom, tom_k, base_k;
        unsigned node_id;
-
-       tom_k = interleave_chip_selects(ctrl);
-       if (!tom_k) {
-               tom_k = order_chip_selects(ctrl);
-       }
-       /* Compute the memory base address */
-       base_k = 0;
-       for(node_id = 0; node_id < ctrl->node_id; node_id++) {
+       unsigned end_k;
+       /* Find the last memory address used */
+       end_k = 0;
+       for (node_id = 0; node_id < max_node_id; node_id++) {
                uint32_t limit, base;
                unsigned index;
                index = node_id << 3;
@@ -1371,50 +1090,67 @@ static void order_dimms(const struct mem_controller *ctrl)
                /* Only look at the limit if the base is enabled */
                if ((base & 3) == 3) {
                        limit = pci_read_config32(ctrl->f1, 0x44 + index);
-                       base_k = ((limit + 0x00010000) & 0xffff0000) >> 2;
+                       end_k = ((limit + 0x00010000) & 0xffff0000) >> 2;
                }
        }
-       tom_k += base_k;
-#if 0
-       print_debug("base_k: ");
-       print_debug_hex32(base_k);
-       print_debug(" tom_k: ");
-       print_debug_hex32(tom_k);
-       print_debug("\r\n");
-#endif
-       route_dram_accesses(ctrl, base_k, tom_k);
-       set_top_mem(tom_k);
+       return end_k;
 }
 
-static void disable_dimm(const struct mem_controller *ctrl, unsigned index)
+static void order_dimms(const struct mem_controller *ctrl)
 {
-       print_debug("disabling dimm"); 
-       print_debug_hex8(index); 
-       print_debug("\r\n");
+       unsigned long tom_k, base_k;
+
+       if (read_option(interleave_chip_selects, 1) != 0) {
+               tom_k = interleave_chip_selects(ctrl);
+       } else {
+               printk(BIOS_DEBUG, "Interleaving disabled\n");
+               tom_k = 0;
+       }
+
+       if (!tom_k) {
+               tom_k = order_chip_selects(ctrl);
+       }
+
+       /* Compute the memory base address */
+       base_k = memory_end_k(ctrl, ctrl->node_id);
+       tom_k += base_k;
+       route_dram_accesses(ctrl, base_k, tom_k);
+       set_top_mem(tom_k, 0);
+}
+
+static long disable_dimm(const struct mem_controller *ctrl, unsigned index, long dimm_mask)
+{
+       printk(BIOS_DEBUG, "disabling dimm %02x\n", index);
        pci_write_config32(ctrl->f2, DRAM_CSBASE + (((index << 1)+0)<<2), 0);
        pci_write_config32(ctrl->f2, DRAM_CSBASE + (((index << 1)+1)<<2), 0);
+       dimm_mask &= ~(1 << index);
+       return dimm_mask;
 }
 
-
-static void spd_handle_unbuffered_dimms(const struct mem_controller *ctrl)
+static long spd_handle_unbuffered_dimms(const struct mem_controller *ctrl,
+                                       long dimm_mask)
 {
        int i;
        int registered;
        int unbuffered;
+       int has_dualch = is_opteron(ctrl);
        uint32_t dcl;
        unbuffered = 0;
        registered = 0;
-       for(i = 0; (i < 4) && (ctrl->channel0[i]); i++) {
+       for (i = 0; (i < DIMM_SOCKETS); i++) {
                int value;
+               if (!(dimm_mask & (1 << i))) {
+                       continue;
+               }
                value = spd_read_byte(ctrl->channel0[i], 21);
                if (value < 0) {
-                       disable_dimm(ctrl, i);
-                       continue;
+                       return -1;
                }
+
                /* Registered dimm ? */
                if (value & (1 << 1)) {
                        registered = 1;
-               } 
+               }
                /* Otherwise it must be an unbuffered dimm */
                else {
                        unbuffered = 1;
@@ -1423,33 +1159,65 @@ static void spd_handle_unbuffered_dimms(const struct mem_controller *ctrl)
        if (unbuffered && registered) {
                die("Mixed buffered and registered dimms not supported");
        }
-       if (unbuffered && is_opteron(ctrl)) {
-               die("Unbuffered Dimms not supported on Opteron");
-       }
 
        dcl = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
-       dcl &= ~DCL_UnBufDimm;
+       dcl &= ~DCL_UnBuffDimm;
        if (unbuffered) {
-               dcl |= DCL_UnBufDimm;
+               if ((has_dualch) && (!is_cpu_pre_d0())) {
+                       dcl |= DCL_UnBuffDimm;
+#if CONFIG_CPU_AMD_SOCKET_939
+                       if ((cpuid_eax(1) & 0x30) == 0x30) {
+                               /* CS[7:4] is copy of CS[3:0], should be set for 939 socket */
+                               dcl |= DCL_UpperCSMap;
+                       }
+#endif
+               } else {
+                       dcl |= DCL_UnBuffDimm;
+               }
        }
        pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dcl);
-#if 0
+
        if (is_registered(ctrl)) {
-               print_debug("Registered\r\n");
+               printk(BIOS_SPEW, "Registered\n");
        } else {
-               print_debug("Unbuffered\r\n");
+               printk(BIOS_SPEW, "Unbuffered\n");
        }
-#endif
+
+       return dimm_mask;
+}
+
+static unsigned int spd_detect_dimms(const struct mem_controller *ctrl)
+{
+       unsigned dimm_mask;
+       int i;
+       dimm_mask = 0;
+       for (i = 0; i < DIMM_SOCKETS; i++) {
+               int byte;
+               unsigned device;
+               device = ctrl->channel0[i];
+               if (device) {
+                       byte = spd_read_byte(ctrl->channel0[i], 2);  /* Type */
+                       if (byte == 7) {
+                               dimm_mask |= (1 << i);
+                       }
+               }
+               device = ctrl->channel1[i];
+               if (device) {
+                       byte = spd_read_byte(ctrl->channel1[i], 2);
+                       if (byte == 7) {
+                               dimm_mask |= (1 << (i + DIMM_SOCKETS));
+                       }
+               }
+       }
+       return dimm_mask;
 }
 
-static void spd_enable_2channels(const struct mem_controller *ctrl)
+static long spd_enable_2channels(const struct mem_controller *ctrl, long dimm_mask)
 {
        int i;
        uint32_t nbcap;
        /* SPD addresses to verify are identical */
-#warning "FINISHME review and see if these are the bytes I need"
-       /* FINISHME review and see if these are the bytes I need */
-       static const unsigned addresses[] = {
+       static const uint8_t addresses[] = {
                2,      /* Type should be DDR SDRAM */
                3,      /* *Row addresses */
                4,      /* *Column addresses */
@@ -1462,8 +1230,8 @@ static void spd_enable_2channels(const struct mem_controller *ctrl)
                17,     /* *Logical Banks */
                18,     /* *Supported CAS Latencies */
                21,     /* *SDRAM Module Attributes */
-               23,     /* *Cycle time at CAS Latnecy (CLX - 0.5) */
-               26,     /* *Cycle time at CAS Latnecy (CLX - 1.0) */
+               23,     /* *Cycle time at CAS Latency (CLX - 0.5) */
+               25,     /* *Cycle time at CAS Latency (CLX - 1.0) */
                27,     /* *tRP Row precharge time */
                28,     /* *Minimum Row Active to Row Active Delay (tRRD) */
                29,     /* *tRCD RAS to CAS */
@@ -1471,40 +1239,52 @@ static void spd_enable_2channels(const struct mem_controller *ctrl)
                41,     /* *Minimum Active to Active/Auto Refresh Time(Trc) */
                42,     /* *Minimum Auto Refresh Command Time(Trfc) */
        };
+       /* If the dimms are not in pairs do not do dual channels */
+       if ((dimm_mask & ((1 << DIMM_SOCKETS) - 1)) !=
+               ((dimm_mask >> DIMM_SOCKETS) & ((1 << DIMM_SOCKETS) - 1))) {
+               goto single_channel;
+       }
+       /* If the cpu is not capable of doing dual channels don't do dual channels */
        nbcap = pci_read_config32(ctrl->f3, NORTHBRIDGE_CAP);
        if (!(nbcap & NBCAP_128Bit)) {
-               return;
+               goto single_channel;
        }
-       for(i = 0; (i < 4) && (ctrl->channel0[i]); i++) {
+       for (i = 0; (i < 4) && (ctrl->channel0[i]); i++) {
                unsigned device0, device1;
                int value0, value1;
                int j;
+               /* If I don't have a dimm skip this one */
+               if (!(dimm_mask & (1 << i))) {
+                       continue;
+               }
                device0 = ctrl->channel0[i];
                device1 = ctrl->channel1[i];
-               if (!device1)
-                       return;
-               for(j = 0; j < sizeof(addresses)/sizeof(addresses[0]); j++) {
+               for (j = 0; j < ARRAY_SIZE(addresses); j++) {
                        unsigned addr;
                        addr = addresses[j];
                        value0 = spd_read_byte(device0, addr);
                        if (value0 < 0) {
-                               break;
+                               return -1;
                        }
                        value1 = spd_read_byte(device1, addr);
                        if (value1 < 0) {
-                               return;
+                               return -1;
                        }
                        if (value0 != value1) {
-                               return;
+                               goto single_channel;
                        }
                }
        }
-       print_debug("Enabling dual channel memory\r\n");
+       printk(BIOS_SPEW, "Enabling dual channel memory\n");
        uint32_t dcl;
        dcl = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
        dcl &= ~DCL_32ByteEn;
        dcl |= DCL_128BitEn;
        pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dcl);
+       return dimm_mask;
+ single_channel:
+       dimm_mask &= ~((1 << (DIMM_SOCKETS *2)) - (1 << DIMM_SOCKETS));
+       return dimm_mask;
 }
 
 struct mem_param {
@@ -1515,14 +1295,17 @@ struct mem_param {
        uint32_t dch_memclk;
        uint16_t dch_tref4k, dch_tref8k;
        uint8_t  dtl_twr;
+       uint8_t  dtl_twtr;
+       uint8_t  dtl_trwt[3][3]; /* first index is CAS_LAT 2/2.5/3 and 128/registered64/64 */
+       uint8_t  rdpreamble[4]; /* 0 is for registered, 1 for 1-2 DIMMS, 2 and 3 for 3 or 4 unreg dimm slots */
        char name[9];
 };
 
-static const struct mem_param *get_mem_param(unsigned min_cycle_time)
+static const struct mem_param *get_mem_param(int freq)
 {
        static const struct mem_param speed[] = {
-               {
-                       .name       = "100Mhz\r\n",
+               [NBCAP_MEMCLK_100MHZ] = {
+                       .name       = "100MHz",
                        .cycle_time = 0xa0,
                        .divisor    = (10 <<1),
                        .tRC        = 0x46,
@@ -1531,9 +1314,12 @@ static const struct mem_param *get_mem_param(unsigned min_cycle_time)
                        .dch_tref4k = DTH_TREF_100MHZ_4K,
                        .dch_tref8k = DTH_TREF_100MHZ_8K,
                        .dtl_twr    = 2,
+                       .dtl_twtr   = 1,
+                       .dtl_trwt   = { { 2, 2, 3 }, { 3, 3, 4 }, { 3, 3, 4 }},
+                       .rdpreamble = { ((9 << 1) + 0), ((9 << 1) + 0), ((9 << 1) + 0), ((9 << 1) + 0) }
                },
-               {
-                       .name       = "133Mhz\r\n",
+               [NBCAP_MEMCLK_133MHZ] = {
+                       .name       = "133MHz",
                        .cycle_time = 0x75,
                        .divisor    = (7<<1)+1,
                        .tRC        = 0x41,
@@ -1542,9 +1328,12 @@ static const struct mem_param *get_mem_param(unsigned min_cycle_time)
                        .dch_tref4k = DTH_TREF_133MHZ_4K,
                        .dch_tref8k = DTH_TREF_133MHZ_8K,
                        .dtl_twr    = 2,
+                       .dtl_twtr   = 1,
+                       .dtl_trwt   = { { 2, 2, 3 }, { 3, 3, 4 }, { 3, 3, 4 }},
+                       .rdpreamble = { ((8 << 1) + 0), ((7 << 1) + 0), ((7 << 1) + 1), ((7 << 1) + 0) }
                },
-               {
-                       .name       = "166Mhz\r\n",
+               [NBCAP_MEMCLK_166MHZ] = {
+                       .name       = "166MHz",
                        .cycle_time = 0x60,
                        .divisor    = (6<<1),
                        .tRC        = 0x3C,
@@ -1553,9 +1342,12 @@ static const struct mem_param *get_mem_param(unsigned min_cycle_time)
                        .dch_tref4k = DTH_TREF_166MHZ_4K,
                        .dch_tref8k = DTH_TREF_166MHZ_8K,
                        .dtl_twr    = 3,
+                       .dtl_twtr   = 1,
+                       .dtl_trwt   = { { 3, 2, 3 }, { 3, 3, 4 }, { 4, 3, 4 }},
+                       .rdpreamble = { ((7 << 1) + 1), ((6 << 1) + 0), ((6 << 1) + 1), ((6 << 1) + 0) }
                },
-               {
-                       .name       = "200Mhz\r\n",
+               [NBCAP_MEMCLK_200MHZ] = {
+                       .name       = "200MHz",
                        .cycle_time = 0x50,
                        .divisor    = (5<<1),
                        .tRC        = 0x37,
@@ -1564,191 +1356,348 @@ static const struct mem_param *get_mem_param(unsigned min_cycle_time)
                        .dch_tref4k = DTH_TREF_200MHZ_4K,
                        .dch_tref8k = DTH_TREF_200MHZ_8K,
                        .dtl_twr    = 3,
-               },
-               {
-                       .cycle_time = 0x00,
-               },
+                       .dtl_twtr   = 2,
+                       .dtl_trwt   = { { 0, 2, 3 }, { 3, 3, 4 }, { 3, 3, 4 }},
+                       .rdpreamble = { ((7 << 1) + 0), ((5 << 1) + 0), ((5 << 1) + 1), ((5 << 1) + 1) }
+               }
        };
        const struct mem_param *param;
-       for(param = &speed[0]; param->cycle_time ; param++) {
-               if (min_cycle_time > (param+1)->cycle_time) {
-                       break;
-               }
+
+       param = speed + freq;
+       printk(BIOS_SPEW, "%s\n", param->name);
+       return param;
+}
+
+struct spd_set_memclk_result {
+       const struct mem_param *param;
+       long dimm_mask;
+};
+
+static int spd_dimm_loading_socket(const struct mem_controller *ctrl, long dimm_mask, int *freq_1t)
+{
+
+#if CONFIG_CPU_AMD_SOCKET_939
+
+/* + 1 raise so we detect 0 as bad field */
+#define DDR200 (NBCAP_MEMCLK_100MHZ + 1)
+#define DDR333 (NBCAP_MEMCLK_166MHZ + 1)
+#define DDR400 (NBCAP_MEMCLK_200MHZ + 1)
+#define DDR_2T 0x80
+#define DDR_MASK 0x7
+
+#define DDR200_2T (DDR_2T | DDR200)
+#define DDR333_2T (DDR_2T | DDR333)
+#define DDR400_2T (DDR_2T | DDR400)
+
+/*
+       Following table comes directly from BKDG (unbuffered DIMM support)
+       [Y][X] Y = ch0_0, ch1_0, ch0_1, ch1_1 1=present 0=empty
+         X uses same layout but 1 means double rank 0 is single rank/empty
+
+       Following tables come from BKDG the ch{0_0,1_0,0_1,1_1} maps to
+       MEMCS_{1L,1H,2L,2H} in i the PDF. PreE is table 45, and revE table 46.
+*/
+
+       static const unsigned char dimm_loading_config_preE[16][16] = {
+               [0x8] = {[0x0] = DDR400,[0x8] = DDR400},
+               [0x2] = {[0x0] = DDR333,[0x2] = DDR400},
+               [0xa] = {[0x0] = DDR400_2T,[0x2] = DDR400_2T,
+                        [0x8] = DDR400_2T,[0xa] = DDR333_2T},
+               [0xc] = {[0x0] = DDR400,[0xc] = DDR400},
+               [0x3] = {[0x0] = DDR333,[0x3] = DDR400},
+               [0xf] = {[0x0] = DDR400_2T,[0x3] = DDR400_2T,
+                        [0xc] = DDR400_2T,[0xf] = DDR333_2T},
+       };
+
+       static const unsigned char dimm_loading_config_revE[16][16] = {
+               [0x8] = {[0x0] = DDR400, [0x8] = DDR400},
+               [0x2] = {[0x0] = DDR333, [0x2] = DDR400},
+               [0x4] = {[0x0] = DDR400, [0x4] = DDR400},
+               [0x1] = {[0x0] = DDR333, [0x1] = DDR400},
+               [0xa] = {[0x0] = DDR400_2T, [0x2] = DDR400_2T,
+                        [0x8] = DDR400_2T, [0xa] = DDR333_2T},
+               [0x5] = {[0x0] = DDR400_2T, [0x1] = DDR400_2T,
+                        [0x4] = DDR400_2T, [0x5] = DDR333_2T},
+               [0xc] = {[0x0] = DDR400, [0xc] = DDR400, [0x4] = DDR400, [0x8] = DDR400},
+               [0x3] = {[0x0] = DDR333, [0x1] = DDR333, [0x2] = DDR333, [0x3] = DDR400},
+               [0xe] = {[0x0] = DDR400_2T, [0x4] = DDR400_2T, [0x2] = DDR400_2T,
+                        [0x6] = DDR400_2T, [0x8] = DDR400_2T, [0xc] = DDR400_2T,
+                        [0xa] = DDR333_2T, [0xe] = DDR333_2T},
+               [0xb] = {[0x0] = DDR333, [0x1] = DDR400_2T, [0x2] = DDR333_2T,
+                        [0x3] = DDR400_2T, [0x8] = DDR333_2T, [0x9] = DDR400_2T,
+                        [0xa] = DDR333_2T, [0xb] = DDR333_2T},
+               [0xd] = {[0x0] = DDR400_2T, [0x8] = DDR400_2T, [0x1] = DDR400_2T,
+                        [0x9] = DDR333_2T, [0x4] = DDR400_2T, [0xc] = DDR400_2T,
+                        [0x5] = DDR333_2T, [0xd] = DDR333_2T},
+               [0x7] = {[0x0] = DDR333,    [0x2] = DDR400_2T, [0x1] = DDR333_2T,
+                        [0x3] = DDR400_2T, [0x4] = DDR333_2T, [0x6] = DDR400_2T,
+                        [0x5] = DDR333_2T, [0x7] = DDR333_2T},
+               [0xf] = {[0x0] = DDR400_2T, [0x1] = DDR400_2T, [0x4] = DDR400_2T,
+                        [0x5] = DDR333_2T, [0x2] = DDR400_2T, [0x3] = DDR400_2T,
+                        [0x6] = DDR400_2T, [0x7] = DDR333_2T, [0x8] = DDR400_2T,
+                        [0x9] = DDR400_2T, [0xc] = DDR400_2T, [0xd] = DDR333_2T,
+                        [0xa] = DDR333_2T, [0xb] = DDR333_2T, [0xe] = DDR333_2T,
+                        [0xf] = DDR333_2T},
+       };
+       /*The dpos matches channel positions defined in BKDG and above arrays
+         The rpos is bitmask of dual rank dimms in same order as dpos */
+       unsigned int dloading = 0, i, rpos = 0, dpos = 0;
+       const unsigned char (*dimm_loading_config)[16] = dimm_loading_config_revE;
+       int rank;
+       uint32_t dcl;
+
+       if (is_cpu_pre_e0()) {
+               dimm_loading_config = dimm_loading_config_preE;
        }
-       if (!param->cycle_time) {
-               die("min_cycle_time to low");
+
+       /* only DIMMS two per channel */
+       for (i = 0; i < 2; i++) {
+               if ((dimm_mask & (1 << i))) {
+                       /* read rank channel 0 */
+                       rank = spd_read_byte(ctrl->channel0[i], 5);
+                       if (rank < 0) goto hw_error;
+                       rpos |= (rank == 2) ? (1 << (3 - (i * 2))) : 0;
+                       dpos |= (1 << (3 - (i * 2)));
+               }
+
+               if ((dimm_mask & (1 << (i+DIMM_SOCKETS)))) {
+                       /* read rank channel 1*/
+                       rank = spd_read_byte(ctrl->channel1[i], 5);
+                       if (rank < 0) goto hw_error;
+                       rpos |= (rank == 2) ? (1 << (2 - (i * 2))) : 0;
+                       dpos |= (1 << (2 - (i * 2)));
+               }
        }
-#if 1
-       print_debug(param->name);
+       /* now the lookup, decode the max speed DDR400_2T etc */
+       dloading = dimm_loading_config[dpos][rpos] & DDR_MASK;
+#if 0
+       printk(BIOS_DEBUG, "XXX %x %x dload %x 2T %x\n", dpos,rpos, dloading, dimm_loading_config[dpos][rpos] & DDR_2T);
 #endif
-       return param;
+hw_error:
+       if (dloading != 0) {
+               /* we have valid combination check the restrictions */
+               dcl = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
+               dcl |= ((dimm_loading_config[dpos][rpos] & DDR_2T) || CONFIG_K8_FORCE_2T_DRAM_TIMING) ? (DCL_En2T) : 0;
+               /* Set DuallDimm is second channel is completely empty (revD+) */
+               if (((cpuid_eax(1) & 0xfff0f) >= 0x10f00) && ((dpos & 0x5) == 0)) {
+                       printk(BIOS_DEBUG, "Setting DualDIMMen\n");
+                       dcl |= DCL_DualDIMMen;
+               }
+               pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dcl);
+               return dloading - 1;
+       } else {
+               /* if we don't find it we se it to DDR400 */
+               printk(BIOS_WARNING, "Detected strange DIMM configuration, may not work! (or bug)\n");
+               return NBCAP_MEMCLK_200MHZ;
+       }
+
+#elif CONFIG_CPU_AMD_SOCKET_754
+
+#define CFGIDX(DIMM1,DIMM2,DIMM3) ((DIMM3)*9+(DIMM2)*3+(DIMM1))
+
+#define EMPTY 0
+#define X8S_X16 1
+#define X8D 2
+
+#define DDR200 NBCAP_MEMCLK_100MHZ
+#define DDR333 NBCAP_MEMCLK_166MHZ
+#define DDR400 NBCAP_MEMCLK_200MHZ
+
+       /* this is table 42 from the BKDG, ignoring footnote 4,
+        * with the EMPTY, EMPTY, EMPTY row added */
+       static const unsigned char cfgtable[][2] = {
+               [CFGIDX(EMPTY,          EMPTY,          EMPTY   )] = { DDR400, DDR400 },
+               [CFGIDX(X8S_X16,        EMPTY,          EMPTY   )] = { DDR400, DDR400 },
+               [CFGIDX(EMPTY,          X8S_X16,        EMPTY   )] = { DDR400, DDR400 },
+               [CFGIDX(EMPTY,          EMPTY,          X8S_X16 )] = { DDR400, DDR400 },
+               [CFGIDX(X8D,            EMPTY,          EMPTY   )] = { DDR400, DDR400 },
+               [CFGIDX(EMPTY,          X8D,            EMPTY   )] = { DDR400, DDR400 },
+               [CFGIDX(EMPTY,          EMPTY,          X8D     )] = { DDR400, DDR400 },
+               [CFGIDX(X8S_X16,        X8S_X16,        EMPTY   )] = { DDR400, DDR400 },
+               [CFGIDX(X8S_X16,        X8D,            EMPTY   )] = { DDR400, DDR400 },
+               [CFGIDX(X8S_X16,        EMPTY,          X8S_X16 )] = { DDR400, DDR400 },
+               [CFGIDX(X8S_X16,        EMPTY,          X8D     )] = { DDR400, DDR400 },
+               [CFGIDX(X8D,            X8S_X16,        EMPTY   )] = { DDR400, DDR400 },
+               [CFGIDX(X8D,            X8D,            EMPTY   )] = { DDR333, DDR333 },
+               [CFGIDX(X8D,            EMPTY,          X8S_X16 )] = { DDR400, DDR400 },
+               [CFGIDX(X8D,            EMPTY,          X8D     )] = { DDR333, DDR333 },
+               [CFGIDX(EMPTY,          X8S_X16,        X8S_X16 )] = { DDR333, DDR400 },
+               [CFGIDX(EMPTY,          X8S_X16,        X8D     )] = { DDR200, DDR400 },
+               [CFGIDX(EMPTY,          X8D,            X8S_X16 )] = { DDR200, DDR400 },
+               [CFGIDX(EMPTY,          X8D,            X8D     )] = { DDR200, DDR333 },
+               [CFGIDX(X8S_X16,        X8S_X16,        X8S_X16 )] = { DDR333, DDR400 },
+               [CFGIDX(X8S_X16,        X8S_X16,        X8D     )] = { DDR200, DDR333 },
+               [CFGIDX(X8S_X16,        X8D,            X8S_X16 )] = { DDR200, DDR333 },
+               [CFGIDX(X8S_X16,        X8D,            X8D     )] = { DDR200, DDR333 },
+               [CFGIDX(X8D,            X8S_X16,        X8S_X16 )] = { DDR333, DDR333 },
+               [CFGIDX(X8D,            X8S_X16,        X8D     )] = { DDR200, DDR333 },
+               [CFGIDX(X8D,            X8D,            X8S_X16 )] = { DDR200, DDR333 },
+               [CFGIDX(X8D,            X8D,            X8D     )] = { DDR200, DDR333 }
+       };
+
+       int i, rank, width, dimmtypes[3];
+       const unsigned char *cfg;
+
+       for (i = 0; i < 3; i++) {
+               if (dimm_mask & (1 << i)) {
+                       rank = spd_read_byte(ctrl->channel0[i], 5);
+                       width = spd_read_byte(ctrl->channel0[i], 13);
+                       if (rank < 0 || width < 0) die("failed to read SPD");
+                       width &= 0x7f;
+                       /* this is my guess as to how the criteria in the table
+                        * are to be understood:
+                        */
+                       dimmtypes[i] = width >= (rank == 1 ? 8 : 16) ? X8S_X16 : X8D;
+               } else {
+                       dimmtypes[i] = EMPTY;
+               }
+       }
+       cfg = cfgtable[CFGIDX(dimmtypes[0], dimmtypes[1], dimmtypes[2])];
+       *freq_1t = cfg[0];
+       return is_cpu_c0() ? cfg[0] : cfg[1];
+
+#else /* CONFIG_CPU_AMD_SOCKET_* */
+
+/* well, there are socket 940 boards supported which obviously fail to
+ * compile with this */
+//     #error load dependent memory clock limiting is not implemented for this socket
+
+       /* see BKDG 4.1.3--if you just want to test a setup that doesn't
+        * require limiting, you may use the following code */
+
+       *freq_1t = NBCAP_MEMCLK_200MHZ;
+       return NBCAP_MEMCLK_200MHZ;
+
+#endif /* CONFIG_CPU_AMD_SOCKET_* */
+
 }
 
-static const struct mem_param *spd_set_memclk(const struct mem_controller *ctrl)
+static struct spd_set_memclk_result spd_set_memclk(const struct mem_controller *ctrl, long dimm_mask)
 {
-       /* Compute the minimum cycle time for these dimms */
-       const struct mem_param *param;
-       unsigned min_cycle_time, min_latency;
-       int i;
+       struct spd_set_memclk_result result;
+       unsigned char cl_at_freq[NBCAP_MEMCLK_MASK + 1];
+       int dimm, freq, max_freq_bios, max_freq_dloading, max_freq_1t;
        uint32_t value;
 
-       static const int latency_indicies[] = { 26, 23, 9 };
-       static const unsigned char min_cycle_times[] = {
+       static const uint8_t spd_min_cycle_time_indices[] = { 9, 23, 25 };
+       static const unsigned char cycle_time_at_freq[] = {
                [NBCAP_MEMCLK_200MHZ] = 0x50, /* 5ns */
                [NBCAP_MEMCLK_166MHZ] = 0x60, /* 6ns */
                [NBCAP_MEMCLK_133MHZ] = 0x75, /* 7.5ns */
                [NBCAP_MEMCLK_100MHZ] = 0xa0, /* 10ns */
        };
 
-
-       value = pci_read_config32(ctrl->f3, NORTHBRIDGE_CAP);
-       min_cycle_time = min_cycle_times[(value >> NBCAP_MEMCLK_SHIFT) & NBCAP_MEMCLK_MASK];
-       min_latency = 2;
-
-#if 0
-       print_debug("min_cycle_time: "); 
-       print_debug_hex8(min_cycle_time); 
-       print_debug(" min_latency: ");
-       print_debug_hex8(min_latency);
-       print_debug("\r\n");
-#endif
-
-       /* Compute the least latency with the fastest clock supported
-        * by both the memory controller and the dimms.
+       /* BEWARE that the constants for frequencies order in reverse of what
+        * would be intuitive. 200 MHz has the lowest constant, 100 MHz the
+        * highest. Thus, all comparisons and traversal directions having to
+        * do with frequencies are/have to be the opposite of what would be
+        * intuitive.
         */
-       for(i = 0; (i < 4) && (ctrl->channel0[i]); i++) {
-               int new_cycle_time, new_latency;
-               int index;
-               int latencies;
-               int latency;
-
-               /* First find the supported CAS latencies
-                * Byte 18 for DDR SDRAM is interpreted:
+
+       /* the CLs supported by the controller: */
+       memset(cl_at_freq, 0x1c, sizeof(cl_at_freq));
+       memset(cl_at_freq, 0x00,
+               (pci_read_config32(ctrl->f3, NORTHBRIDGE_CAP) >>
+                NBCAP_MEMCLK_SHIFT) & NBCAP_MEMCLK_MASK);
+       max_freq_bios = read_option(max_mem_clock, 0);
+       if (max_freq_bios <= NBCAP_MEMCLK_100MHZ)
+               memset(cl_at_freq, 0x00, max_freq_bios);
+       for (dimm = 0; dimm < DIMM_SOCKETS; dimm++) {
+               int x,i,spd_cls,cl,spd_min_cycle_time;
+               unsigned char cl_at_freq_mask[sizeof(cl_at_freq)];
+
+               if (!(dimm_mask & (1 << dimm)))
+                       continue;
+               /* Byte 18 for DDR SDRAM is interpreted:
                 * bit 0 == CAS Latency = 1.0
                 * bit 1 == CAS Latency = 1.5
                 * bit 2 == CAS Latency = 2.0
                 * bit 3 == CAS Latency = 2.5
                 * bit 4 == CAS Latency = 3.0
                 * bit 5 == CAS Latency = 3.5
-                * bit 6 == TBD
+                * bit 6 == CAS Latency = 4.0
                 * bit 7 == TBD
                 */
-               new_cycle_time = 0xa0;
-               new_latency = 5;
-
-               latencies = spd_read_byte(ctrl->channel0[i], 18);
-               if (latencies <= 0) continue;
-
-               /* Compute the lowest cas latency supported */
-               latency = log2(latencies) -2;
-
-               /* Loop through and find a fast clock with a low latency */
-               for(index = 0; index < 3; index++, latency++) {
-                       int value;
-                       if ((latency < 2) || (latency > 4) ||
-                               (!(latencies & (1 << latency)))) {
+               spd_cls = spd_read_byte(ctrl->channel0[dimm], 18);
+               if (spd_cls <= 0)
+                       goto hw_error;
+               memset(cl_at_freq_mask, 0x00, sizeof(cl_at_freq_mask));
+               for (cl = 1 << log2(spd_cls), i = 0; i < 3; cl >>= 1, i++) {
+                       if (!(spd_cls & cl))
                                continue;
-                       }
-                       value = spd_read_byte(ctrl->channel0[i], latency_indicies[index]);
-                       if (value < 0) {
+                       spd_min_cycle_time = spd_read_byte(ctrl->channel0[dimm],
+                                       spd_min_cycle_time_indices[i]);
+                       if (spd_min_cycle_time < 0)
+                               goto hw_error;
+                       if ((!spd_min_cycle_time) || (spd_min_cycle_time & 0x0f) > 9)
                                continue;
-                       }
-
-                       /* Only increase the latency if we decreas the clock */
-                       if ((value >= min_cycle_time) && (value < new_cycle_time)) {
-                               new_cycle_time = value;
-                               new_latency = latency;
-                       }
+                       for (x = 0; x < sizeof(cl_at_freq_mask); x++)
+                               if (cycle_time_at_freq[x] >= spd_min_cycle_time)
+                                       cl_at_freq_mask[x] |= cl;
                }
-               if (new_latency > 4){
-                       continue;
-               }
-               /* Does min_latency need to be increased? */
-               if (new_cycle_time > min_cycle_time) {
-                       min_cycle_time = new_cycle_time;
-               }
-               /* Does min_cycle_time need to be increased? */
-               if (new_latency > min_latency) {
-                       min_latency = new_latency;
-               }
-#if 0
-               print_debug("i: ");
-               print_debug_hex8(i);
-               print_debug(" min_cycle_time: "); 
-               print_debug_hex8(min_cycle_time); 
-               print_debug(" min_latency: ");
-               print_debug_hex8(min_latency);
-               print_debug("\r\n");
-#endif
+               for (x = 0; x < sizeof(cl_at_freq_mask); x++)
+                       cl_at_freq[x] &= cl_at_freq_mask[x];
        }
-       /* Make a second pass through the dimms and disable
-        * any that cannot support the selected memclk and cas latency.
-        */
-       
-       for(i = 0; (i < 4) && (ctrl->channel0[i]); i++) {
-               int latencies;
-               int latency;
-               int index;
-               int value;
-               int dimm;
-               latencies = spd_read_byte(ctrl->channel0[i], 18);
-               if (latencies <= 0) {
-                       goto dimm_err;
-               }
 
-               /* Compute the lowest cas latency supported */
-               latency = log2(latencies) -2;
+       freq = NBCAP_MEMCLK_200MHZ;
+       while (freq < sizeof(cl_at_freq) && !cl_at_freq[freq])
+               freq++;
 
-               /* Walk through searching for the selected latency */
-               for(index = 0; index < 3; index++, latency++) {
-                       if (!(latencies & (1 << latency))) {
-                               continue;
-                       }
-                       if (latency == min_latency)
-                               break;
-               }
-               /* If I can't find the latency or my index is bad error */
-               if ((latency != min_latency) || (index >= 3)) {
-                       goto dimm_err;
-               }
-               
-               /* Read the min_cycle_time for this latency */
-               value = spd_read_byte(ctrl->channel0[i], latency_indicies[index]);
-               
-               /* All is good if the selected clock speed 
-                * is what I need or slower.
-                */
-               if (value <= min_cycle_time) {
-                       continue;
-               }
-               /* Otherwise I have an error, disable the dimm */
-       dimm_err:
-               disable_dimm(ctrl, i);
+       max_freq_dloading = spd_dimm_loading_socket(ctrl, dimm_mask, &max_freq_1t);
+       if (max_freq_dloading > freq) {
+               printk(BIOS_WARNING, "Memory speed reduced due to signal loading conditions\n");
+               freq = max_freq_dloading;
+               while (freq < sizeof(cl_at_freq) && !cl_at_freq[freq])
+                       freq++;
+       }
+
+       /* if the next lower frequency gives a CL at least one whole cycle
+        * shorter, select that (see end of BKDG 4.1.1.1) */
+       if (freq < sizeof(cl_at_freq)-1 && cl_at_freq[freq+1] &&
+               log2f(cl_at_freq[freq]) - log2f(cl_at_freq[freq+1]) >= 2)
+                       freq++;
+
+       if (freq == sizeof(cl_at_freq))
+               goto hw_error;
+
+#if CONFIG_CPU_AMD_SOCKET_754
+       if (freq < max_freq_1t || CONFIG_K8_FORCE_2T_DRAM_TIMING) {
+               pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW,
+                       pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW) | DCL_En2T);
        }
-#if 0
-       print_debug("min_cycle_time: "); 
-       print_debug_hex8(min_cycle_time); 
-       print_debug(" min_latency: ");
-       print_debug_hex8(min_latency);
-       print_debug("\r\n");
 #endif
-       /* Now that I know the minimum cycle time lookup the memory parameters */
-       param = get_mem_param(min_cycle_time);
+
+       result.param = get_mem_param(freq);
 
        /* Update DRAM Config High with our selected memory speed */
        value = pci_read_config32(ctrl->f2, DRAM_CONFIG_HIGH);
        value &= ~(DCH_MEMCLK_MASK << DCH_MEMCLK_SHIFT);
-       value |= param->dch_memclk;
+#if 0
+       /* Improves DQS centering by correcting for case when core speed multiplier and MEMCLK speed result in odd clock divisor, by selecting the next lowest memory speed, required only at DDR400 and higher speeds with certain DIMM loadings ---- cheating???*/
+       if (!is_cpu_pre_e0()) {
+               if (min_cycle_time==0x50) {
+                       value |= 1<<31;
+               }
+       }
+#endif
+
+       value |= result.param->dch_memclk;
        pci_write_config32(ctrl->f2, DRAM_CONFIG_HIGH, value);
 
        static const unsigned latencies[] = { DTL_CL_2, DTL_CL_2_5, DTL_CL_3 };
+
        /* Update DRAM Timing Low with our selected cas latency */
        value = pci_read_config32(ctrl->f2, DRAM_TIMING_LOW);
        value &= ~(DTL_TCL_MASK << DTL_TCL_SHIFT);
-       value |= latencies[min_latency - 2] << DTL_TCL_SHIFT;
+       value |= latencies[log2f(cl_at_freq[freq]) - 2] << DTL_TCL_SHIFT;
        pci_write_config32(ctrl->f2, DRAM_TIMING_LOW, value);
-       
-       return param;
+
+       result.dimm_mask = dimm_mask;
+       return result;
+ hw_error:
+       result.param = (const struct mem_param *)0;
+       result.dimm_mask = -1;
+       return result;
 }
 
 
@@ -1767,7 +1716,7 @@ static int update_dimm_Trc(const struct mem_controller *ctrl, const struct mem_p
                clocks = DTL_TRC_MIN;
        }
        if (clocks > DTL_TRC_MAX) {
-               return -1;
+               return 0;
        }
 
        dtl = pci_read_config32(ctrl->f2, DRAM_TIMING_LOW);
@@ -1778,7 +1727,7 @@ static int update_dimm_Trc(const struct mem_controller *ctrl, const struct mem_p
        dtl &= ~(DTL_TRC_MASK << DTL_TRC_SHIFT);
        dtl |=  ((clocks - DTL_TRC_BASE) << DTL_TRC_SHIFT);
        pci_write_config32(ctrl->f2, DRAM_TIMING_LOW, dtl);
-       return 0;
+       return 1;
 }
 
 static int update_dimm_Trfc(const struct mem_controller *ctrl, const struct mem_param *param, int i)
@@ -1796,7 +1745,7 @@ static int update_dimm_Trfc(const struct mem_controller *ctrl, const struct mem_
                clocks = DTL_TRFC_MIN;
        }
        if (clocks > DTL_TRFC_MAX) {
-               return -1;
+               return 0;
        }
        dtl = pci_read_config32(ctrl->f2, DRAM_TIMING_LOW);
        old_clocks = ((dtl >> DTL_TRFC_SHIFT) & DTL_TRFC_MASK) + DTL_TRFC_BASE;
@@ -1806,7 +1755,7 @@ static int update_dimm_Trfc(const struct mem_controller *ctrl, const struct mem_
        dtl &= ~(DTL_TRFC_MASK << DTL_TRFC_SHIFT);
        dtl |= ((clocks - DTL_TRFC_BASE) << DTL_TRFC_SHIFT);
        pci_write_config32(ctrl->f2, DRAM_TIMING_LOW, dtl);
-       return 0;
+       return 1;
 }
 
 
@@ -1817,16 +1766,12 @@ static int update_dimm_Trcd(const struct mem_controller *ctrl, const struct mem_
        int value;
        value = spd_read_byte(ctrl->channel0[i], 29);
        if (value < 0) return -1;
-#if 0
        clocks = (value + (param->divisor << 1) -1)/(param->divisor << 1);
-#else
-       clocks = (value + ((param->divisor & 0xff) << 1) -1)/((param->divisor & 0xff) << 1);
-#endif
        if (clocks < DTL_TRCD_MIN) {
                clocks = DTL_TRCD_MIN;
        }
        if (clocks > DTL_TRCD_MAX) {
-               return -1;
+               return 0;
        }
        dtl = pci_read_config32(ctrl->f2, DRAM_TIMING_LOW);
        old_clocks = ((dtl >> DTL_TRCD_SHIFT) & DTL_TRCD_MASK) + DTL_TRCD_BASE;
@@ -1836,7 +1781,7 @@ static int update_dimm_Trcd(const struct mem_controller *ctrl, const struct mem_
        dtl &= ~(DTL_TRCD_MASK << DTL_TRCD_SHIFT);
        dtl |= ((clocks - DTL_TRCD_BASE) << DTL_TRCD_SHIFT);
        pci_write_config32(ctrl->f2, DRAM_TIMING_LOW, dtl);
-       return 0;
+       return 1;
 }
 
 static int update_dimm_Trrd(const struct mem_controller *ctrl, const struct mem_param *param, int i)
@@ -1846,12 +1791,12 @@ static int update_dimm_Trrd(const struct mem_controller *ctrl, const struct mem_
        int value;
        value = spd_read_byte(ctrl->channel0[i], 28);
        if (value < 0) return -1;
-       clocks = (value + ((param->divisor & 0xff) << 1) -1)/((param->divisor & 0xff) << 1);
+       clocks = (value + (param->divisor << 1) -1)/(param->divisor << 1);
        if (clocks < DTL_TRRD_MIN) {
                clocks = DTL_TRRD_MIN;
        }
        if (clocks > DTL_TRRD_MAX) {
-               return -1;
+               return 0;
        }
        dtl = pci_read_config32(ctrl->f2, DRAM_TIMING_LOW);
        old_clocks = ((dtl >> DTL_TRRD_SHIFT) & DTL_TRRD_MASK) + DTL_TRRD_BASE;
@@ -1861,7 +1806,7 @@ static int update_dimm_Trrd(const struct mem_controller *ctrl, const struct mem_
        dtl &= ~(DTL_TRRD_MASK << DTL_TRRD_SHIFT);
        dtl |= ((clocks - DTL_TRRD_BASE) << DTL_TRRD_SHIFT);
        pci_write_config32(ctrl->f2, DRAM_TIMING_LOW, dtl);
-       return 0;
+       return 1;
 }
 
 static int update_dimm_Tras(const struct mem_controller *ctrl, const struct mem_param *param, int i)
@@ -1876,7 +1821,7 @@ static int update_dimm_Tras(const struct mem_controller *ctrl, const struct mem_
                clocks = DTL_TRAS_MIN;
        }
        if (clocks > DTL_TRAS_MAX) {
-               return -1;
+               return 0;
        }
        dtl = pci_read_config32(ctrl->f2, DRAM_TIMING_LOW);
        old_clocks = ((dtl >> DTL_TRAS_SHIFT) & DTL_TRAS_MASK) + DTL_TRAS_BASE;
@@ -1886,7 +1831,7 @@ static int update_dimm_Tras(const struct mem_controller *ctrl, const struct mem_
        dtl &= ~(DTL_TRAS_MASK << DTL_TRAS_SHIFT);
        dtl |= ((clocks - DTL_TRAS_BASE) << DTL_TRAS_SHIFT);
        pci_write_config32(ctrl->f2, DRAM_TIMING_LOW, dtl);
-       return 0;
+       return 1;
 }
 
 static int update_dimm_Trp(const struct mem_controller *ctrl, const struct mem_param *param, int i)
@@ -1896,25 +1841,12 @@ static int update_dimm_Trp(const struct mem_controller *ctrl, const struct mem_p
        int value;
        value = spd_read_byte(ctrl->channel0[i], 27);
        if (value < 0) return -1;
-#if 0
        clocks = (value + (param->divisor << 1) - 1)/(param->divisor << 1);
-#else
-       clocks = (value + ((param->divisor & 0xff) << 1) - 1)/((param->divisor & 0xff) << 1);
-#endif
-#if 0
-       print_debug("Trp: ");
-       print_debug_hex8(clocks);
-       print_debug(" spd value: ");
-       print_debug_hex8(value);
-       print_debug(" divisor: ");
-       print_debug_hex8(param->divisor);
-       print_debug("\r\n");
-#endif
        if (clocks < DTL_TRP_MIN) {
                clocks = DTL_TRP_MIN;
        }
        if (clocks > DTL_TRP_MAX) {
-               return -1;
+               return 0;
        }
        dtl = pci_read_config32(ctrl->f2, DRAM_TIMING_LOW);
        old_clocks = ((dtl >> DTL_TRP_SHIFT) & DTL_TRP_MASK) + DTL_TRP_BASE;
@@ -1924,7 +1856,7 @@ static int update_dimm_Trp(const struct mem_controller *ctrl, const struct mem_p
        dtl &= ~(DTL_TRP_MASK << DTL_TRP_SHIFT);
        dtl |= ((clocks - DTL_TRP_BASE) << DTL_TRP_SHIFT);
        pci_write_config32(ctrl->f2, DRAM_TIMING_LOW, dtl);
-       return 0;
+       return 1;
 }
 
 static void set_Twr(const struct mem_controller *ctrl, const struct mem_param *param)
@@ -1970,7 +1902,7 @@ static int update_dimm_Tref(const struct mem_controller *ctrl, const struct mem_
        dth &= ~(DTH_TREF_MASK << DTH_TREF_SHIFT);
        dth |= (tref << DTH_TREF_SHIFT);
        pci_write_config32(ctrl->f2, DRAM_TIMING_HIGH, dth);
-       return 0;
+       return 1;
 }
 
 
@@ -1978,20 +1910,35 @@ static int update_dimm_x4(const struct mem_controller *ctrl, const struct mem_pa
 {
        uint32_t dcl;
        int value;
+#if CONFIG_QRANK_DIMM_SUPPORT
+       int rank;
+#endif
        int dimm;
        value = spd_read_byte(ctrl->channel0[i], 13);
        if (value < 0) {
                return -1;
        }
-       dimm = i;
-       dimm += DCL_x4DIMM_SHIFT;
+
+#if CONFIG_QRANK_DIMM_SUPPORT
+       rank = spd_read_byte(ctrl->channel0[i], 5);     /* number of physical banks */
+       if (rank < 0) {
+               return -1;
+       }
+#endif
+
+       dimm = 1<<(DCL_x4DIMM_SHIFT+i);
+#if CONFIG_QRANK_DIMM_SUPPORT
+       if (rank==4) {
+               dimm |= 1<<(DCL_x4DIMM_SHIFT+i+2);
+       }
+#endif
        dcl = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
-       dcl &= ~(1 << dimm);
+       dcl &= ~dimm;
        if (value == 4) {
-               dcl |= (1 << dimm);
+               dcl |= dimm;
        }
        pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dcl);
-       return 0;
+       return 1;
 }
 
 static int update_dimm_ecc(const struct mem_controller *ctrl, const struct mem_param *param, int i)
@@ -2007,7 +1954,7 @@ static int update_dimm_ecc(const struct mem_controller *ctrl, const struct mem_p
                dcl &= ~DCL_DimmEccEn;
                pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dcl);
        }
-       return 0;
+       return 1;
 }
 
 static int count_dimms(const struct mem_controller *ctrl)
@@ -2015,9 +1962,9 @@ static int count_dimms(const struct mem_controller *ctrl)
        int dimms;
        unsigned index;
        dimms = 0;
-       for(index = 0; index < 8; index += 2) {
+       for (index = 0; index < 8; index += 2) {
                uint32_t csbase;
-               csbase = pci_read_config32(ctrl->f2, (DRAM_CSBASE + index << 2));
+               csbase = pci_read_config32(ctrl->f2, (DRAM_CSBASE + (index << 2)));
                if (csbase & 1) {
                        dimms += 1;
                }
@@ -2028,79 +1975,51 @@ static int count_dimms(const struct mem_controller *ctrl)
 static void set_Twtr(const struct mem_controller *ctrl, const struct mem_param *param)
 {
        uint32_t dth;
-       unsigned clocks;
-       clocks = 1; /* AMD says hard code this */
+
        dth = pci_read_config32(ctrl->f2, DRAM_TIMING_HIGH);
        dth &= ~(DTH_TWTR_MASK << DTH_TWTR_SHIFT);
-       dth |= ((clocks - DTH_TWTR_BASE) << DTH_TWTR_SHIFT);
+       dth |= ((param->dtl_twtr - DTH_TWTR_BASE) << DTH_TWTR_SHIFT);
        pci_write_config32(ctrl->f2, DRAM_TIMING_HIGH, dth);
 }
 
 static void set_Trwt(const struct mem_controller *ctrl, const struct mem_param *param)
 {
        uint32_t dth, dtl;
-       unsigned divisor;
        unsigned latency;
        unsigned clocks;
+       int lat, mtype;
 
        clocks = 0;
        dtl = pci_read_config32(ctrl->f2, DRAM_TIMING_LOW);
        latency = (dtl >> DTL_TCL_SHIFT) & DTL_TCL_MASK;
-       divisor = param->divisor;
 
        if (is_opteron(ctrl)) {
-               if (latency == DTL_CL_2) {
-                       if (divisor == ((6 << 0) + 0)) {
-                               /* 166Mhz */
-                               clocks = 3;
-                       }
-                       else if (divisor > ((6 << 0)+0)) {
-                               /* 100Mhz && 133Mhz */
-                               clocks = 2;
-                       }
-               }
-               else if (latency == DTL_CL_2_5) {
-                       clocks = 3;
-               }
-               else if (latency == DTL_CL_3) {
-                       if (divisor == ((6 << 0)+0)) {
-                               /* 166Mhz */
-                               clocks = 4;
-                       }
-                       else if (divisor > ((6 << 0)+0)) {
-                               /* 100Mhz && 133Mhz */
-                               clocks = 3;
-                       }
-               }
+               mtype = 0; /* dual channel */
+       } else if (is_registered(ctrl)) {
+               mtype = 1; /* registered 64bit interface */
+       } else {
+               mtype = 2; /* unbuffered 64bit interface */
        }
-       else /* Athlon64 */ {
-               if (is_registered(ctrl)) {
-                       if (latency == DTL_CL_2) {
-                               clocks = 2;
-                       }
-                       else if (latency == DTL_CL_2_5) {
-                               clocks = 3;
-                       }
-                       else if (latency == DTL_CL_3) {
-                               clocks = 3;
-                       }
-               }
-               else /* Unbuffered */{
-                       if (latency == DTL_CL_2) {
-                               clocks = 3;
-                       }
-                       else if (latency == DTL_CL_2_5) {
-                               clocks = 4;
-                       }
-                       else if (latency == DTL_CL_3) {
-                               clocks = 4;
-                       }
-               }
+
+       switch (latency) {
+               case DTL_CL_2:
+                       lat = 0;
+                       break;
+               case DTL_CL_2_5:
+                       lat = 1;
+                       break;
+               case DTL_CL_3:
+                       lat = 2;
+                       break;
+               default:
+                       die("Unknown LAT for Trwt");
        }
+
+       clocks = param->dtl_trwt[lat][mtype];
        if ((clocks < DTH_TRWT_MIN) || (clocks > DTH_TRWT_MAX)) {
-               die("Unknown Trwt");
+               die("Unknown Trwt\n");
        }
-       
+
        dth = pci_read_config32(ctrl->f2, DRAM_TIMING_HIGH);
        dth &= ~(DTH_TRWT_MASK << DTH_TRWT_SHIFT);
        dth |= ((clocks - DTH_TRWT_BASE) << DTH_TRWT_SHIFT);
@@ -2128,83 +2047,38 @@ static void set_Twcl(const struct mem_controller *ctrl, const struct mem_param *
 static void set_read_preamble(const struct mem_controller *ctrl, const struct mem_param *param)
 {
        uint32_t dch;
-       unsigned divisor;
        unsigned rdpreamble;
-       divisor = param->divisor;
-       dch = pci_read_config32(ctrl->f2, DRAM_CONFIG_HIGH);
-       dch &= ~(DCH_RDPREAMBLE_MASK << DCH_RDPREAMBLE_SHIFT);
-       rdpreamble = 0;
-       if (is_registered(ctrl)) {
-               if (divisor == ((10 << 1)+0)) {
-                       /* 100Mhz, 9ns */
-                       rdpreamble = ((9 << 1)+ 0);
-               }
-               else if (divisor == ((7 << 1)+1)) {
-                       /* 133Mhz, 8ns */
-                       rdpreamble = ((8 << 1)+0);
-               }
-               else if (divisor == ((6 << 1)+0)) {
-                       /* 166Mhz, 7.5ns */
-                       rdpreamble = ((7 << 1)+1);
-               }
-               else if (divisor == ((5 << 1)+0)) {
-                       /* 200Mhz,  7ns */
-                       rdpreamble = ((7 << 1)+0);
+       int slots, i;
+
+       slots = 0;
+
+       for (i = 0; i < 4; i++) {
+               if (ctrl->channel0[i]) {
+                       slots += 1;
                }
        }
-       else {
-               int slots;
-               int i;
-               slots = 0;
-               for(i = 0; i < 4; i++) {
-                       if (ctrl->channel0[i]) {
-                               slots += 1;
-                       }
-               }
-               if (divisor == ((10 << 1)+0)) {
-                       /* 100Mhz */
-                       if (slots <= 2) {
-                               /* 9ns */
-                               rdpreamble = ((9 << 1)+0);
-                       } else {
-                               /* 14ns */
-                               rdpreamble = ((14 << 1)+0);
-                       }
-               }
-               else if (divisor == ((7 << 1)+1)) {
-                       /* 133Mhz */
-                       if (slots <= 2) {
-                               /* 7ns */
-                               rdpreamble = ((7 << 1)+0);
-                       } else {
-                               /* 11 ns */
-                               rdpreamble = ((11 << 1)+0);
-                       }
-               }
-               else if (divisor == ((6 << 1)+0)) {
-                       /* 166Mhz */
-                       if (slots <= 2) {
-                               /* 6ns */
-                               rdpreamble = ((7 << 1)+0);
-                       } else {
-                               /* 9ns */
-                               rdpreamble = ((9 << 1)+0);
-                       }
-               }
-               else if (divisor == ((5 << 1)+0)) {
-                       /* 200Mhz */
-                       if (slots <= 2) {
-                               /* 5ns */
-                               rdpreamble = ((5 << 1)+0);
-                       } else {
-                               /* 7ns */
-                               rdpreamble = ((7 << 1)+0);
-                       }
-               }
+
+       /* map to index to param.rdpreamble array */
+       if (is_registered(ctrl)) {
+               i = 0;
+       } else if (slots < 3) {
+               i = 1;
+       } else if (slots == 3) {
+               i = 2;
+       } else if (slots == 4) {
+               i = 3;
+       } else {
+               die("Unknown rdpreamble for this nr of slots");
        }
+
+       dch = pci_read_config32(ctrl->f2, DRAM_CONFIG_HIGH);
+       dch &= ~(DCH_RDPREAMBLE_MASK << DCH_RDPREAMBLE_SHIFT);
+       rdpreamble = param->rdpreamble[i];
+
        if ((rdpreamble < DCH_RDPREAMBLE_MIN) || (rdpreamble > DCH_RDPREAMBLE_MAX)) {
                die("Unknown rdpreamble");
        }
+
        dch |= (rdpreamble - DCH_RDPREAMBLE_BASE) << DCH_RDPREAMBLE_SHIFT;
        pci_write_config32(ctrl->f2, DRAM_CONFIG_HIGH, dch);
 }
@@ -2212,7 +2086,6 @@ static void set_read_preamble(const struct mem_controller *ctrl, const struct me
 static void set_max_async_latency(const struct mem_controller *ctrl, const struct mem_param *param)
 {
        uint32_t dch;
-       int i;
        unsigned async_lat;
        int dimms;
 
@@ -2225,7 +2098,7 @@ static void set_max_async_latency(const struct mem_controller *ctrl, const struc
                if (dimms == 4) {
                        /* 9ns */
                        async_lat = 9;
-               } 
+               }
                else {
                        /* 8ns */
                        async_lat = 8;
@@ -2259,33 +2132,37 @@ static void set_idle_cycle_limit(const struct mem_controller *ctrl, const struct
        pci_write_config32(ctrl->f2, DRAM_CONFIG_HIGH, dch);
 }
 
-static void spd_set_dram_timing(const struct mem_controller *ctrl, const struct mem_param *param)
+static long spd_set_dram_timing(const struct mem_controller *ctrl, const struct mem_param *param, long dimm_mask)
 {
-       int dimms;
        int i;
-       int rc;
-       
+
        init_Tref(ctrl, param);
-       for(i = 0; (i < 4) && ctrl->channel0[i]; i++) {
+       for (i = 0; i < DIMM_SOCKETS; i++) {
                int rc;
+               if (!(dimm_mask & (1 << i))) {
+                       continue;
+               }
                /* DRAM Timing Low Register */
-               if (update_dimm_Trc (ctrl, param, i) < 0) goto dimm_err;
-               if (update_dimm_Trfc(ctrl, param, i) < 0) goto dimm_err;
-               if (update_dimm_Trcd(ctrl, param, i) < 0) goto dimm_err;
-               if (update_dimm_Trrd(ctrl, param, i) < 0) goto dimm_err;
-               if (update_dimm_Tras(ctrl, param, i) < 0) goto dimm_err;
-               if (update_dimm_Trp (ctrl, param, i) < 0) goto dimm_err;
+               if ((rc = update_dimm_Trc (ctrl, param, i)) <= 0) goto dimm_err;
+               if ((rc = update_dimm_Trfc(ctrl, param, i)) <= 0) goto dimm_err;
+               if ((rc = update_dimm_Trcd(ctrl, param, i)) <= 0) goto dimm_err;
+               if ((rc = update_dimm_Trrd(ctrl, param, i)) <= 0) goto dimm_err;
+               if ((rc = update_dimm_Tras(ctrl, param, i)) <= 0) goto dimm_err;
+               if ((rc = update_dimm_Trp (ctrl, param, i)) <= 0) goto dimm_err;
 
                /* DRAM Timing High Register */
-               if (update_dimm_Tref(ctrl, param, i) < 0) goto dimm_err;
+               if ((rc = update_dimm_Tref(ctrl, param, i)) <= 0) goto dimm_err;
+
 
                /* DRAM Config Low */
-               if (update_dimm_x4 (ctrl, param, i) < 0) goto dimm_err;
-               if (update_dimm_ecc(ctrl, param, i) < 0) goto dimm_err;
+               if ((rc = update_dimm_x4 (ctrl, param, i)) <= 0) goto dimm_err;
+               if ((rc = update_dimm_ecc(ctrl, param, i)) <= 0) goto dimm_err;
                continue;
        dimm_err:
-               disable_dimm(ctrl, i);
-               
+               if (rc < 0) {
+                       return -1;
+               }
+               dimm_mask = disable_dimm(ctrl, i, dimm_mask);
        }
        /* DRAM Timing Low Register */
        set_Twr(ctrl, param);
@@ -2299,47 +2176,227 @@ static void spd_set_dram_timing(const struct mem_controller *ctrl, const struct
        set_read_preamble(ctrl, param);
        set_max_async_latency(ctrl, param);
        set_idle_cycle_limit(ctrl, param);
+       return dimm_mask;
 }
 
-static void sdram_set_spd_registers(const struct mem_controller *ctrl) 
+#if CONFIG_RAMINIT_SYSINFO
+static void sdram_set_spd_registers(const struct mem_controller *ctrl, struct sys_info *sysinfo)
+#else
+static void sdram_set_spd_registers(const struct mem_controller *ctrl)
+#endif
 {
+       struct spd_set_memclk_result result;
        const struct mem_param *param;
-       spd_enable_2channels(ctrl);
-       spd_set_ram_size(ctrl);
-       spd_handle_unbuffered_dimms(ctrl);
-       param = spd_set_memclk(ctrl);
-       spd_set_dram_timing(ctrl, param);
+       long dimm_mask;
+#if 1
+       if (!controller_present(ctrl)) {
+//             printk(BIOS_DEBUG, "No memory controller present\n");
+               return;
+       }
+#endif
+       hw_enable_ecc(ctrl);
+       activate_spd_rom(ctrl);
+       dimm_mask = spd_detect_dimms(ctrl);
+       if (!(dimm_mask & ((1 << DIMM_SOCKETS) - 1))) {
+               printk(BIOS_DEBUG, "No memory for this cpu\n");
+               return;
+       }
+       dimm_mask = spd_enable_2channels(ctrl, dimm_mask);
+       if (dimm_mask < 0)
+               goto hw_spd_err;
+       dimm_mask = spd_set_ram_size(ctrl , dimm_mask);
+       if (dimm_mask < 0)
+               goto hw_spd_err;
+       dimm_mask = spd_handle_unbuffered_dimms(ctrl, dimm_mask);
+       if (dimm_mask < 0)
+               goto hw_spd_err;
+       result = spd_set_memclk(ctrl, dimm_mask);
+       param     = result.param;
+       dimm_mask = result.dimm_mask;
+       if (dimm_mask < 0)
+               goto hw_spd_err;
+       dimm_mask = spd_set_dram_timing(ctrl, param , dimm_mask);
+       if (dimm_mask < 0)
+               goto hw_spd_err;
        order_dimms(ctrl);
+       return;
+ hw_spd_err:
+       /* Unrecoverable error reading SPD data */
+       printk(BIOS_ERR, "SPD error - reset\n");
+       hard_reset();
+       return;
+}
+
+#if CONFIG_HW_MEM_HOLE_SIZEK != 0
+static uint32_t hoist_memory(int controllers, const struct mem_controller *ctrl,unsigned hole_startk, int i)
+{
+       int ii;
+       uint32_t carry_over;
+       device_t dev;
+       uint32_t base, limit;
+       uint32_t basek;
+       uint32_t hoist;
+       int j;
+
+       carry_over = (4*1024*1024) - hole_startk;
+
+       for (ii=controllers - 1;ii>i;ii--) {
+               base  = pci_read_config32(ctrl[0].f1, 0x40 + (ii << 3));
+               if ((base & ((1<<1)|(1<<0))) != ((1<<1)|(1<<0))) {
+                       continue;
+               }
+               limit = pci_read_config32(ctrl[0].f1, 0x44 + (ii << 3));
+               for (j = 0; j < controllers; j++) {
+                       pci_write_config32(ctrl[j].f1, 0x44 + (ii << 3), limit + (carry_over << 2));
+                       pci_write_config32(ctrl[j].f1, 0x40 + (ii << 3), base + (carry_over << 2));
+               }
+       }
+       limit = pci_read_config32(ctrl[0].f1, 0x44 + (i << 3));
+       for (j = 0; j < controllers; j++) {
+               pci_write_config32(ctrl[j].f1, 0x44 + (i << 3), limit + (carry_over << 2));
+       }
+       dev = ctrl[i].f1;
+       base  = pci_read_config32(dev, 0x40 + (i << 3));
+       basek  = (base & 0xffff0000) >> 2;
+       if (basek == hole_startk) {
+               //don't need set memhole here, because hole off set will be 0, overflow
+               //so need to change base reg instead, new basek will be 4*1024*1024
+               base &= 0x0000ffff;
+               base |= (4*1024*1024)<<2;
+               for (j = 0; j < controllers; j++) {
+                       pci_write_config32(ctrl[j].f1, 0x40 + (i<<3), base);
+               }
+       }
+       else {
+               hoist = /* hole start address */
+                       ((hole_startk << 10) & 0xff000000) +
+                       /* hole address to memory controller address */
+                       (((basek + carry_over) >> 6) & 0x0000ff00) +
+                       /* enable */
+                       1;
+               pci_write_config32(dev, 0xf0, hoist);
+       }
+
+       return carry_over;
+}
+
+static void set_hw_mem_hole(int controllers, const struct mem_controller *ctrl)
+{
+
+       uint32_t hole_startk;
+       int i;
+
+       hole_startk = 4*1024*1024 - CONFIG_HW_MEM_HOLE_SIZEK;
+
+       printk(BIOS_SPEW, "Handling memory hole at 0x%08x (default)\n", hole_startk);
+#if CONFIG_HW_MEM_HOLE_SIZE_AUTO_INC == 1
+       /* We need to double check if hole_startk is valid.
+        * If it is equal to the dram base address in K (base_k),
+        * we need to decrease it.
+        */
+       uint32_t basek_pri;
+       for (i=0; i<controllers; i++) {
+                       uint32_t base;
+                       unsigned base_k;
+                       base  = pci_read_config32(ctrl[0].f1, 0x40 + (i << 3));
+                       if ((base & ((1<<1)|(1<<0))) != ((1<<1)|(1<<0))) {
+                               continue;
+                       }
+                       base_k = (base & 0xffff0000) >> 2;
+                       if (base_k == hole_startk) {
+                               /* decrease memory hole startk to make sure it is
+                                * in the middle of the previous node
+                                */
+                               hole_startk -= (base_k - basek_pri)>>1;
+                               break; /* only one hole */
+                       }
+                       basek_pri = base_k;
+       }
+
+       printk(BIOS_SPEW, "Handling memory hole at 0x%08x (adjusted)\n", hole_startk);
+#endif
+       /* Find node number that needs the memory hole configured */
+       for (i=0; i<controllers; i++) {
+                       uint32_t base, limit;
+                       unsigned base_k, limit_k;
+                       base  = pci_read_config32(ctrl[0].f1, 0x40 + (i << 3));
+                       if ((base & ((1<<1)|(1<<0))) != ((1<<1)|(1<<0))) {
+                               continue;
+                       }
+                       limit = pci_read_config32(ctrl[0].f1, 0x44 + (i << 3));
+                       base_k = (base & 0xffff0000) >> 2;
+                       limit_k = ((limit + 0x00010000) & 0xffff0000) >> 2;
+                       if ((base_k <= hole_startk) && (limit_k > hole_startk)) {
+                               unsigned end_k;
+                               hoist_memory(controllers, ctrl, hole_startk, i);
+                               end_k = memory_end_k(ctrl, controllers);
+                               set_top_mem(end_k, hole_startk);
+                               break; /* only one hole */
+                       }
+       }
+
 }
 
+#endif
+
 #define TIMEOUT_LOOPS 300000
+#if CONFIG_RAMINIT_SYSINFO
+static void sdram_enable(int controllers, const struct mem_controller *ctrl, struct sys_info *sysinfo)
+#else
 static void sdram_enable(int controllers, const struct mem_controller *ctrl)
+#endif
 {
        int i;
+       u32 whatWait = 0;
+#if CONFIG_HAVE_ACPI_RESUME == 1
+       int suspend = acpi_is_wakeup_early();
+#else
+       int suspend = 0;
+#endif
+
+       /* Error if I don't have memory */
+       if (memory_end_k(ctrl, controllers) == 0) {
+               die("No memory\n");
+       }
 
        /* Before enabling memory start the memory clocks */
-       for(i = 0; i < controllers; i++) {
+       for (i = 0; i < controllers; i++) {
                uint32_t dch;
+               if (!controller_present(ctrl + i))
+                       continue;
                dch = pci_read_config32(ctrl[i].f2, DRAM_CONFIG_HIGH);
-               dch |= DCH_MEMCLK_VALID;
-               pci_write_config32(ctrl[i].f2, DRAM_CONFIG_HIGH, dch);
+               if (dch & (DCH_MEMCLK_EN0|DCH_MEMCLK_EN1|DCH_MEMCLK_EN2|DCH_MEMCLK_EN3)) {
+                       dch |= DCH_MEMCLK_VALID;
+                       pci_write_config32(ctrl[i].f2, DRAM_CONFIG_HIGH, dch);
+               }
+               else {
+                       /* Disable dram receivers */
+                       uint32_t dcl;
+                       dcl = pci_read_config32(ctrl[i].f2, DRAM_CONFIG_LOW);
+                       dcl |= DCL_DisInRcvrs;
+                       pci_write_config32(ctrl[i].f2, DRAM_CONFIG_LOW, dcl);
+               }
        }
 
+       /* We need to wait a minimum of 20 MEMCLKS to enable the InitDram */
        /* And if necessary toggle the the reset on the dimms by hand */
        memreset(controllers, ctrl);
 
-       for(i = 0; i < controllers; i++) {
-               uint32_t dcl;
+       for (i = 0; i < controllers; i++) {
+               uint32_t dcl, dch;
+               if (!controller_present(ctrl + i))
+                       continue;
+               /* Skip everything if I don't have any memory on this controller */
+               dch = pci_read_config32(ctrl[i].f2, DRAM_CONFIG_HIGH);
+               if (!(dch & DCH_MEMCLK_VALID)) {
+                       continue;
+               }
+
                /* Toggle DisDqsHys to get it working */
                dcl = pci_read_config32(ctrl[i].f2, DRAM_CONFIG_LOW);
-#if 0
-               print_debug("dcl: ");
-               print_debug_hex32(dcl);
-               print_debug("\r\n");
-#endif
                if (dcl & DCL_DimmEccEn) {
                        uint32_t mnc;
-                       print_debug("ECC enabled\r\n");
+                       printk(BIOS_SPEW, "ECC enabled\n");
                        mnc = pci_read_config32(ctrl[i].f3, MCA_NB_CONFIG);
                        mnc |= MNC_ECC_EN;
                        if (dcl & DCL_128BitEn) {
@@ -2347,174 +2404,115 @@ static void sdram_enable(int controllers, const struct mem_controller *ctrl)
                        }
                        pci_write_config32(ctrl[i].f3, MCA_NB_CONFIG, mnc);
                }
-               dcl |= DCL_DisDqsHys;
-               pci_write_config32(ctrl[i].f2, DRAM_CONFIG_LOW, dcl);
+
+               if (!suspend) {
+                       dcl |= DCL_DisDqsHys;
+                       pci_write_config32(ctrl[i].f2, DRAM_CONFIG_LOW, dcl);
+               }
                dcl &= ~DCL_DisDqsHys;
                dcl &= ~DCL_DLL_Disable;
                dcl &= ~DCL_D_DRV;
                dcl &= ~DCL_QFC_EN;
-               dcl |= DCL_DramInit;
-               pci_write_config32(ctrl[i].f2, DRAM_CONFIG_LOW, dcl);
 
+               if (suspend) {
+                       enable_lapic();
+                       init_timer();
+                       dcl |= (DCL_ESR | DCL_SRS);
+                       /* Handle errata 85 Insufficient Delay Between MEMCLK Startup
+                          and CKE Assertion During Resume From S3 */
+                       udelay(10); /* for unregistered */
+                       if (is_registered(&ctrl[i])) {
+                               udelay(100); /* 110us for registered (we wait 10us already) */
+                       }
+                       whatWait = DCL_ESR;
+               } else {
+                       dcl |= DCL_DramInit;
+                       whatWait = DCL_DramInit;
+               }
+               pci_write_config32(ctrl[i].f2, DRAM_CONFIG_LOW, dcl);
        }
-       for(i = 0; i < controllers; i++) {
-               uint32_t dcl;
-               print_debug("Initializing memory: ");
+
+       for (i = 0; i < controllers; i++) {
+               uint32_t dcl, dch;
+               if (!controller_present(ctrl + i))
+                       continue;
+               /* Skip everything if I don't have any memory on this controller */
+               dch = pci_read_config32(ctrl[i].f2, DRAM_CONFIG_HIGH);
+               if (!(dch & DCH_MEMCLK_VALID)) {
+                       continue;
+               }
+
+               printk(BIOS_DEBUG, "Initializing memory: ");
                int loops = 0;
                do {
                        dcl = pci_read_config32(ctrl[i].f2, DRAM_CONFIG_LOW);
-                       loops += 1;
+                       loops++;
                        if ((loops & 1023) == 0) {
-                               print_debug(".");
+                               printk(BIOS_DEBUG, ".");
                        }
-               } while(((dcl & DCL_DramInit) != 0) && (loops < TIMEOUT_LOOPS));
+               } while(((dcl & whatWait) != 0) && (loops < TIMEOUT_LOOPS));
                if (loops >= TIMEOUT_LOOPS) {
-                       print_debug(" failed\r\n");
-               } else {
-                       print_debug(" done\r\n");
+                       printk(BIOS_DEBUG, " failed\n");
+                       continue;
                }
-               if (dcl & DCL_DimmEccEn) {
-                       print_debug("Clearing memory: ");
-                       if (!is_cpu_pre_c0()) {
-                               /* Wait until the automatic ram scrubber is finished */
-                               dcl &= ~(DCL_MemClrStatus | DCL_DramEnable);
-                               pci_write_config32(ctrl[i].f2, DRAM_CONFIG_LOW, dcl);
-                               do {
-                                       dcl = pci_read_config32(ctrl[i].f2, DRAM_CONFIG_LOW);
-                               } while(((dcl & DCL_MemClrStatus) == 0) || ((dcl & DCL_DramEnable) == 0) );
-                       }
-                       uint32_t base, last_scrub_k, scrub_k;
-                       uint32_t cnt,zstart,zend;
-                       msr_t msr,msr_201;
-
-                       /* First make certain the scrubber is disabled */
-                       pci_write_config32(ctrl[i].f3, SCRUB_CONTROL,
-                               (SCRUB_NONE << 16) | (SCRUB_NONE << 8) | (SCRUB_NONE << 0));
-
-                       /* load the start and end for the memory block to clear */
-                       msr_201 = rdmsr(0x201);
-                       zstart = pci_read_config32(ctrl[0].f1, 0x40 + (i*8));
-                       zend = pci_read_config32(ctrl[0].f1, 0x44 + (i*8));
-                       zstart >>= 16;
-                       zend >>=16;
-#if 1
-                       print_debug("addr ");
-                       print_debug_hex32(zstart);
-                       print_debug("-");
-                       print_debug_hex32(zend);
-                       print_debug("\r\n");
+
+               if (!is_cpu_pre_c0()) {
+                       /* Wait until it is safe to touch memory */
+#if 0
+                       /* the registers are marked read-only but code zeros them */
+                       dcl &= ~(DCL_MemClrStatus | DCL_DramEnable);
+                       pci_write_config32(ctrl[i].f2, DRAM_CONFIG_LOW, dcl);
 #endif
-                       
-                       /* Disable fixed mtrrs */
-                       msr = rdmsr(MTRRdefType_MSR);
-                       msr.lo &= ~(1<<10);
-                       wrmsr(MTRRdefType_MSR, msr);
-
-                       /* turn on the wrap 32 disable */
-                       msr = rdmsr(0xc0010015);
-                       msr.lo |= (1<<17);
-                       wrmsr(0xc0010015,msr);
-
-                       for(;zstart<zend;zstart+=4) {
-
-                               /* test for the last 64 meg of 4 gig space */
-                               if(zstart == 0x0fc)
-                                       continue;
-                               
-                               /* disable cache */
-                               __asm__ volatile(
-                                       "movl  %%cr0, %0\n\t"
-                                       "orl  $0x40000000, %0\n\t"
-                                       "movl  %0, %%cr0\n\t"
-                                       :"=r" (cnt)
-                                       );
-                               
-                               /* Set the variable mtrrs to write combine */
-                               msr.lo = 1 + ((zstart&0x0ff)<<24);
-                               msr.hi = (zstart&0x0ff00)>>8;
-                               wrmsr(0x200,msr);
-
-                               /* Set the limit to 64 meg of ram */
-                               msr.hi = 0x000000ff;
-                               msr.lo = 0xfc000800;
-                               wrmsr(0x201,msr);
-
-                               /* enable cache */
-                               __asm__ volatile(
-                                       "movl  %%cr0, %0\n\t"
-                                       "andl  $0x9fffffff, %0\n\t"
-                                       "movl  %0, %%cr0\n\t"   
-                                       :"=r" (cnt)     
-                                       );
-                               /* Set fs base address */
-                               msr.lo = (zstart&0xff) << 24;
-                               msr.hi = (zstart&0xff00) >> 8;
-                               wrmsr(0xc0000100,msr);
-
-                               print_debug_char((zstart > 0x0ff)?'+':'-');     
-                                       
-                               /* clear memory 64meg */
-                               __asm__ volatile(
-                                       "1: \n\t"
-                                       "movl %0, %%fs:(%1)\n\t"
-                                       "addl $4,%1\n\t"
-                                       "subl $1,%2\n\t"
-                                       "jnz 1b\n\t"
-                                       :
-                                       : "a" (0), "D" (0), "c" (0x01000000)
-                                       );                      
-                       }
-                       
-                       /* disable cache */
-                       __asm__ volatile(
-                               "movl  %%cr0, %0\n\t"
-                               "orl  $0x40000000, %0\n\t"
-                               "movl  %0, %%cr0\n\t"
-                               :"=r" (cnt)     
-                               );
-               
-                       /* restore msr registers */     
-                       msr = rdmsr(MTRRdefType_MSR);
-                       msr.lo |= 0x0400;
-                       wrmsr(MTRRdefType_MSR, msr);
-
-                       /* Restore the variable mtrrs */
-                       msr.lo = 6;
-                       msr.hi = 0;
-                       wrmsr(0x200,msr);
-                       wrmsr(0x201,msr_201);
-
-                       /* Set fs base to 0 */
-                       msr.lo = 0;
-                       msr.hi = 0;
-                       wrmsr(0xc0000100,msr);
-
-                       /* enable cache */
-                       __asm__ volatile(
-                               "movl  %%cr0, %0\n\t"
-                               "andl  $0x9fffffff, %0\n\t"
-                               "movl  %0, %%cr0\n\t"   
-                               :"=r" (cnt)     
-                               );
-                       
-                       /* turn off the wrap 32 disable */
-                       msr = rdmsr(0xc0010015);
-                       msr.lo &= ~(1<<17);
-                       wrmsr(0xc0010015,msr);
-
-                       /* Find the Srub base address for this cpu */
-                       base = pci_read_config32(ctrl[i].f1, 0x40 + (ctrl[i].node_id << 3));
-                       base &= 0xffff0000;
-
-                       /* Set the scrub base address registers */
-                       pci_write_config32(ctrl[i].f3, SCRUB_ADDR_LOW, base << 8);
-                       pci_write_config32(ctrl[i].f3, SCRUB_ADDR_HIGH, base >> 24);
-
-                       /* Enable scrubbing at the lowest possible rate */
-                       pci_write_config32(ctrl[i].f3, SCRUB_CONTROL, 
-                               (SCRUB_84ms << 16) | (SCRUB_84ms << 8) | (SCRUB_84ms << 0));
-
-                       print_debug("done\r\n");
+                       do {
+                               dcl = pci_read_config32(ctrl[i].f2, DRAM_CONFIG_LOW);
+                       } while(((dcl & DCL_MemClrStatus) == 0) || ((dcl & DCL_DramEnable) == 0) ||
+                                       ((dcl & DCL_SRS)));
+               }
+
+               printk(BIOS_DEBUG, " done\n");
+       }
+
+#if CONFIG_HW_MEM_HOLE_SIZEK != 0
+        // init hw mem hole here
+       /* DramHoleValid bit only can be set after MemClrStatus is set by Hardware */
+       if (!is_cpu_pre_e0())
+               set_hw_mem_hole(controllers, ctrl);
+#endif
+
+       //FIXME add enable node interleaving here -- yhlu
+       /*needed?
+               1. check how many nodes we have , if not all has ram installed get out
+               2. check cs_base lo is 0, node 0 f2 0x40,,,,, if any one is not using lo is CS_BASE, get out
+               3. check if other node is the same as node 0 about f2 0x40,,,,, otherwise get out
+               4. if all ready enable node_interleaving in f1 0x40..... of every node
+               5. for node interleaving we need to set mem hole to every node ( need recalcute hole offset in f0 for every node)
+       */
+
+}
+
+static void set_sysinfo_in_ram(unsigned val)
+{
+}
+
+void fill_mem_ctrl(int controllers, struct mem_controller *ctrl_a,
+                         const uint16_t *spd_addr)
+{
+       int i;
+       int j;
+       struct mem_controller *ctrl;
+       for (i=0;i<controllers; i++) {
+               ctrl = &ctrl_a[i];
+               ctrl->node_id = i;
+               ctrl->f0 = PCI_DEV(0, 0x18+i, 0);
+               ctrl->f1 = PCI_DEV(0, 0x18+i, 1);
+               ctrl->f2 = PCI_DEV(0, 0x18+i, 2);
+               ctrl->f3 = PCI_DEV(0, 0x18+i, 3);
+
+               if (spd_addr == (void *)0) continue;
+
+               for (j=0;j<DIMM_SOCKETS;j++) {
+                       ctrl->channel0[j] = spd_addr[(i*2+0)*DIMM_SOCKETS + j];
+                       ctrl->channel1[j] = spd_addr[(i*2+1)*DIMM_SOCKETS + j];
                }
        }
 }