1 /* This should be done by Eric
2 2004.11 yhlu add 4 rank DIMM support
3 2004.12 yhlu add D0 support
4 2005.02 yhlu add E0 memory hole support
7 #include <cpu/x86/mem.h>
8 #include <cpu/x86/cache.h>
9 #include <cpu/x86/mtrr.h>
14 #if (CONFIG_LB_MEM_TOPK & (CONFIG_LB_MEM_TOPK -1)) != 0
15 # error "CONFIG_LB_MEM_TOPK must be a power of 2"
18 #ifndef QRANK_DIMM_SUPPORT
19 #define QRANK_DIMM_SUPPORT 0
22 #if defined (__GNUC__)
23 static void hard_reset(void);
26 static void setup_resource_map(const unsigned int *register_values, int max)
29 // printk_debug("setting up resource map....");
30 for (i = 0; i < max; i += 3) {
34 dev = register_values[i] & ~0xfff;
35 where = register_values[i] & 0xfff;
36 reg = pci_read_config32(dev, where);
37 reg &= register_values[i+1];
38 reg |= register_values[i+2];
39 pci_write_config32(dev, where, reg);
41 // printk_debug("done.\n");
44 static int controller_present(const struct mem_controller *ctrl)
46 return pci_read_config32(ctrl->f0, 0) == 0x11001022;
49 #if RAMINIT_SYSINFO==1
50 static void sdram_set_registers(const struct mem_controller *ctrl, struct sys_info *sysinfo)
52 static void sdram_set_registers(const struct mem_controller *ctrl)
55 static const unsigned int register_values[] = {
57 /* Careful set limit registers before base registers which
58 contain the enables */
59 /* DRAM Limit i Registers
68 * [ 2: 0] Destination Node ID
78 * [10: 8] Interleave select
79 * specifies the values of A[14:12] to use with interleave enable.
81 * [31:16] DRAM Limit Address i Bits 39-24
82 * This field defines the upper address bits of a 40 bit address
83 * that define the end of the DRAM region.
85 PCI_ADDR(0, 0x18, 1, 0x44), 0x0000f8f8, 0x00000000,
86 PCI_ADDR(0, 0x18, 1, 0x4C), 0x0000f8f8, 0x00000001,
87 PCI_ADDR(0, 0x18, 1, 0x54), 0x0000f8f8, 0x00000002,
88 PCI_ADDR(0, 0x18, 1, 0x5C), 0x0000f8f8, 0x00000003,
89 PCI_ADDR(0, 0x18, 1, 0x64), 0x0000f8f8, 0x00000004,
90 PCI_ADDR(0, 0x18, 1, 0x6C), 0x0000f8f8, 0x00000005,
91 PCI_ADDR(0, 0x18, 1, 0x74), 0x0000f8f8, 0x00000006,
92 PCI_ADDR(0, 0x18, 1, 0x7C), 0x0000f8f8, 0x00000007,
93 /* DRAM Base i Registers
102 * [ 0: 0] Read Enable
105 * [ 1: 1] Write Enable
106 * 0 = Writes Disabled
109 * [10: 8] Interleave Enable
110 * 000 = No interleave
111 * 001 = Interleave on A[12] (2 nodes)
113 * 011 = Interleave on A[12] and A[14] (4 nodes)
117 * 111 = Interleve on A[12] and A[13] and A[14] (8 nodes)
119 * [13:16] DRAM Base Address i Bits 39-24
120 * This field defines the upper address bits of a 40-bit address
121 * that define the start of the DRAM region.
123 PCI_ADDR(0, 0x18, 1, 0x40), 0x0000f8fc, 0x00000000,
124 PCI_ADDR(0, 0x18, 1, 0x48), 0x0000f8fc, 0x00000000,
125 PCI_ADDR(0, 0x18, 1, 0x50), 0x0000f8fc, 0x00000000,
126 PCI_ADDR(0, 0x18, 1, 0x58), 0x0000f8fc, 0x00000000,
127 PCI_ADDR(0, 0x18, 1, 0x60), 0x0000f8fc, 0x00000000,
128 PCI_ADDR(0, 0x18, 1, 0x68), 0x0000f8fc, 0x00000000,
129 PCI_ADDR(0, 0x18, 1, 0x70), 0x0000f8fc, 0x00000000,
130 PCI_ADDR(0, 0x18, 1, 0x78), 0x0000f8fc, 0x00000000,
132 /* DRAM CS Base Address i Registers
141 * [ 0: 0] Chip-Select Bank Enable
145 * [15: 9] Base Address (19-13)
146 * An optimization used when all DIMM are the same size...
148 * [31:21] Base Address (35-25)
149 * This field defines the top 11 addresses bit of a 40-bit
150 * address that define the memory address space. These
151 * bits decode 32-MByte blocks of memory.
153 PCI_ADDR(0, 0x18, 2, 0x40), 0x001f01fe, 0x00000000,
154 PCI_ADDR(0, 0x18, 2, 0x44), 0x001f01fe, 0x00000000,
155 PCI_ADDR(0, 0x18, 2, 0x48), 0x001f01fe, 0x00000000,
156 PCI_ADDR(0, 0x18, 2, 0x4C), 0x001f01fe, 0x00000000,
157 PCI_ADDR(0, 0x18, 2, 0x50), 0x001f01fe, 0x00000000,
158 PCI_ADDR(0, 0x18, 2, 0x54), 0x001f01fe, 0x00000000,
159 PCI_ADDR(0, 0x18, 2, 0x58), 0x001f01fe, 0x00000000,
160 PCI_ADDR(0, 0x18, 2, 0x5C), 0x001f01fe, 0x00000000,
161 /* DRAM CS Mask Address i Registers
170 * Select bits to exclude from comparison with the DRAM Base address register.
172 * [15: 9] Address Mask (19-13)
173 * Address to be excluded from the optimized case
175 * [29:21] Address Mask (33-25)
176 * The bits with an address mask of 1 are excluded from address comparison
180 PCI_ADDR(0, 0x18, 2, 0x60), 0xC01f01ff, 0x00000000,
181 PCI_ADDR(0, 0x18, 2, 0x64), 0xC01f01ff, 0x00000000,
182 PCI_ADDR(0, 0x18, 2, 0x68), 0xC01f01ff, 0x00000000,
183 PCI_ADDR(0, 0x18, 2, 0x6C), 0xC01f01ff, 0x00000000,
184 PCI_ADDR(0, 0x18, 2, 0x70), 0xC01f01ff, 0x00000000,
185 PCI_ADDR(0, 0x18, 2, 0x74), 0xC01f01ff, 0x00000000,
186 PCI_ADDR(0, 0x18, 2, 0x78), 0xC01f01ff, 0x00000000,
187 PCI_ADDR(0, 0x18, 2, 0x7C), 0xC01f01ff, 0x00000000,
188 /* DRAM Bank Address Mapping Register
190 * Specify the memory module size
195 * 000 = 32Mbyte (Rows = 12 & Col = 8)
196 * 001 = 64Mbyte (Rows = 12 & Col = 9)
197 * 010 = 128Mbyte (Rows = 13 & Col = 9)|(Rows = 12 & Col = 10)
198 * 011 = 256Mbyte (Rows = 13 & Col = 10)|(Rows = 12 & Col = 11)
199 * 100 = 512Mbyte (Rows = 13 & Col = 11)|(Rows = 14 & Col = 10)
200 * 101 = 1Gbyte (Rows = 14 & Col = 11)|(Rows = 13 & Col = 12)
201 * 110 = 2Gbyte (Rows = 14 & Col = 12)
208 PCI_ADDR(0, 0x18, 2, 0x80), 0xffff8888, 0x00000000,
209 /* DRAM Timing Low Register
211 * [ 2: 0] Tcl (Cas# Latency, Cas# to read-data-valid)
221 * [ 7: 4] Trc (Row Cycle Time, Ras#-active to Ras#-active/bank auto refresh)
222 * 0000 = 7 bus clocks
223 * 0001 = 8 bus clocks
225 * 1110 = 21 bus clocks
226 * 1111 = 22 bus clocks
227 * [11: 8] Trfc (Row refresh Cycle time, Auto-refresh-active to RAS#-active or RAS#auto-refresh)
228 * 0000 = 9 bus clocks
229 * 0010 = 10 bus clocks
231 * 1110 = 23 bus clocks
232 * 1111 = 24 bus clocks
233 * [14:12] Trcd (Ras#-active to Case#-read/write Delay)
243 * [18:16] Trrd (Ras# to Ras# Delay)
253 * [23:20] Tras (Minmum Ras# Active Time)
254 * 0000 to 0100 = reserved
255 * 0101 = 5 bus clocks
257 * 1111 = 15 bus clocks
258 * [26:24] Trp (Row Precharge Time)
268 * [28:28] Twr (Write Recovery Time)
273 PCI_ADDR(0, 0x18, 2, 0x88), 0xe8088008, 0x02522001 /* 0x03623125 */ ,
274 /* DRAM Timing High Register
276 * [ 0: 0] Twtr (Write to Read Delay)
280 * [ 6: 4] Trwt (Read to Write Delay)
290 * [12: 8] Tref (Refresh Rate)
291 * 00000 = 100Mhz 4K rows
292 * 00001 = 133Mhz 4K rows
293 * 00010 = 166Mhz 4K rows
294 * 00011 = 200Mhz 4K rows
295 * 01000 = 100Mhz 8K/16K rows
296 * 01001 = 133Mhz 8K/16K rows
297 * 01010 = 166Mhz 8K/16K rows
298 * 01011 = 200Mhz 8K/16K rows
300 * [22:20] Twcl (Write CAS Latency)
301 * 000 = 1 Mem clock after CAS# (Unbuffered Dimms)
302 * 001 = 2 Mem clocks after CAS# (Registered Dimms)
305 PCI_ADDR(0, 0x18, 2, 0x8c), 0xff8fe08e, (0 << 20)|(0 << 8)|(0 << 4)|(0 << 0),
306 /* DRAM Config Low Register
308 * [ 0: 0] DLL Disable
317 * [ 3: 3] Disable DQS Hystersis (FIXME handle this one carefully)
318 * 0 = Enable DQS input filter
319 * 1 = Disable DQS input filtering
322 * 0 = Initialization done or not yet started.
323 * 1 = Initiate DRAM intialization sequence
324 * [ 9: 9] SO-Dimm Enable
326 * 1 = SO-Dimms present
328 * 0 = DRAM not enabled
329 * 1 = DRAM initialized and enabled
330 * [11:11] Memory Clear Status
331 * 0 = Memory Clear function has not completed
332 * 1 = Memory Clear function has completed
333 * [12:12] Exit Self-Refresh
334 * 0 = Exit from self-refresh done or not yet started
335 * 1 = DRAM exiting from self refresh
336 * [13:13] Self-Refresh Status
337 * 0 = Normal Operation
338 * 1 = Self-refresh mode active
339 * [15:14] Read/Write Queue Bypass Count
344 * [16:16] 128-bit/64-Bit
345 * 0 = 64bit Interface to DRAM
346 * 1 = 128bit Interface to DRAM
347 * [17:17] DIMM ECC Enable
348 * 0 = Some DIMMs do not have ECC
349 * 1 = ALL DIMMS have ECC bits
350 * [18:18] UnBuffered DIMMs
352 * 1 = Unbuffered DIMMS
353 * [19:19] Enable 32-Byte Granularity
354 * 0 = Optimize for 64byte bursts
355 * 1 = Optimize for 32byte bursts
356 * [20:20] DIMM 0 is x4
357 * [21:21] DIMM 1 is x4
358 * [22:22] DIMM 2 is x4
359 * [23:23] DIMM 3 is x4
361 * 1 = x4 DIMM present
362 * [24:24] Disable DRAM Receivers
363 * 0 = Receivers enabled
364 * 1 = Receivers disabled
366 * 000 = Arbiters chois is always respected
367 * 001 = Oldest entry in DCQ can be bypassed 1 time
368 * 010 = Oldest entry in DCQ can be bypassed 2 times
369 * 011 = Oldest entry in DCQ can be bypassed 3 times
370 * 100 = Oldest entry in DCQ can be bypassed 4 times
371 * 101 = Oldest entry in DCQ can be bypassed 5 times
372 * 110 = Oldest entry in DCQ can be bypassed 6 times
373 * 111 = Oldest entry in DCQ can be bypassed 7 times
376 PCI_ADDR(0, 0x18, 2, 0x90), 0xf0000000,
378 (0 << 23)|(0 << 22)|(0 << 21)|(0 << 20)|
379 (1 << 19)|(0 << 18)|(1 << 17)|(0 << 16)|
380 (2 << 14)|(0 << 13)|(0 << 12)|
381 (0 << 11)|(0 << 10)|(0 << 9)|(0 << 8)|
382 (0 << 3) |(0 << 1) |(0 << 0),
383 /* DRAM Config High Register
385 * [ 0: 3] Maximum Asynchronous Latency
390 * [11: 8] Read Preamble
408 * [18:16] Idle Cycle Limit
417 * [19:19] Dynamic Idle Cycle Center Enable
418 * 0 = Use Idle Cycle Limit
419 * 1 = Generate a dynamic Idle cycle limit
420 * [22:20] DRAM MEMCLK Frequency
430 * [25:25] Memory Clock Ratio Valid (FIXME carefully enable memclk)
431 * 0 = Disable MemClks
433 * [26:26] Memory Clock 0 Enable
436 * [27:27] Memory Clock 1 Enable
439 * [28:28] Memory Clock 2 Enable
442 * [29:29] Memory Clock 3 Enable
447 PCI_ADDR(0, 0x18, 2, 0x94), 0xc180f0f0,
448 (0 << 29)|(0 << 28)|(0 << 27)|(0 << 26)|(0 << 25)|
449 (0 << 20)|(0 << 19)|(DCH_IDLE_LIMIT_16 << 16)|(0 << 8)|(0 << 0),
450 /* DRAM Delay Line Register
452 * Adjust the skew of the input DQS strobe relative to DATA
454 * [23:16] Delay Line Adjust
455 * Adjusts the DLL derived PDL delay by one or more delay stages
456 * in either the faster or slower direction.
457 * [24:24} Adjust Slower
459 * 1 = Adj is used to increase the PDL delay
460 * [25:25] Adjust Faster
462 * 1 = Adj is used to decrease the PDL delay
465 PCI_ADDR(0, 0x18, 2, 0x98), 0xfc00ffff, 0x00000000,
466 /* MCA NB Status Low reg */
467 PCI_ADDR(0, 0x18, 3, 0x48), 0x00f00000, 0x00000000,
468 /* MCA NB Status high reg */
469 PCI_ADDR(0, 0x18, 3, 0x4c), 0x01801e8c, 0x00000000,
470 /* MCA NB address Low reg */
471 PCI_ADDR(0, 0x18, 3, 0x50), 0x00000007, 0x00000000,
472 /* MCA NB address high reg */
473 PCI_ADDR(0, 0x18, 3, 0x54), 0xffffff00, 0x00000000,
474 /* DRAM Scrub Control Register
476 * [ 4: 0] DRAM Scrube Rate
478 * [12: 8] L2 Scrub Rate
480 * [20:16] Dcache Scrub
483 * 00000 = Do not scrub
505 * All Others = Reserved
507 PCI_ADDR(0, 0x18, 3, 0x58), 0xffe0e0e0, 0x00000000,
508 /* DRAM Scrub Address Low Register
510 * [ 0: 0] DRAM Scrubber Redirect Enable
512 * 1 = Scrubber Corrects errors found in normal operation
514 * [31: 6] DRAM Scrub Address 31-6
516 PCI_ADDR(0, 0x18, 3, 0x5C), 0x0000003e, 0x00000000,
517 /* DRAM Scrub Address High Register
519 * [ 7: 0] DRAM Scrubb Address 39-32
522 PCI_ADDR(0, 0x18, 3, 0x60), 0xffffff00, 0x00000000,
527 if (!controller_present(ctrl)) {
528 // printk_debug("No memory controller present\n");
531 printk_spew("setting up CPU%02x northbridge registers\n", ctrl->node_id);
532 max = ARRAY_SIZE(register_values);
533 for (i = 0; i < max; i += 3) {
537 dev = (register_values[i] & ~0xfff) - PCI_DEV(0, 0x18, 0) + ctrl->f0;
538 where = register_values[i] & 0xfff;
539 reg = pci_read_config32(dev, where);
540 reg &= register_values[i+1];
541 reg |= register_values[i+2];
542 pci_write_config32(dev, where, reg);
544 printk_spew("done.\n");
547 static void hw_enable_ecc(const struct mem_controller *ctrl)
550 nbcap = pci_read_config32(ctrl->f3, NORTHBRIDGE_CAP);
551 dcl = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
552 dcl &= ~DCL_DimmEccEn;
553 if (nbcap & NBCAP_ECC) {
554 dcl |= DCL_DimmEccEn;
556 if (HAVE_OPTION_TABLE &&
557 read_option(CMOS_VSTART_ECC_memory, CMOS_VLEN_ECC_memory, 1) == 0) {
558 dcl &= ~DCL_DimmEccEn;
560 pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dcl);
563 static int is_dual_channel(const struct mem_controller *ctrl)
566 dcl = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
567 return dcl & DCL_128BitEn;
570 static int is_opteron(const struct mem_controller *ctrl)
572 /* Test to see if I am an Opteron.
573 * FIXME Socket 939 based Athlon64 have dual channel capability,
574 * too, so we need a better test for Opterons
576 #warning "FIXME: Implement a better test for Opterons"
578 nbcap = pci_read_config32(ctrl->f3, NORTHBRIDGE_CAP);
579 return !!(nbcap & NBCAP_128Bit);
582 static int is_registered(const struct mem_controller *ctrl)
584 /* Test to see if we are dealing with registered SDRAM.
585 * If we are not registered we are unbuffered.
586 * This function must be called after spd_handle_unbuffered_dimms.
589 dcl = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
590 return !(dcl & DCL_UnBuffDimm);
598 #if QRANK_DIMM_SUPPORT == 1
603 static struct dimm_size spd_get_dimm_size(unsigned device)
605 /* Calculate the log base 2 size of a DIMM in bits */
612 #if QRANK_DIMM_SUPPORT == 1
616 /* Note it might be easier to use byte 31 here, it has the DIMM size as
617 * a multiple of 4MB. The way we do it now we can size both
618 * sides of an assymetric dimm.
620 value = spd_read_byte(device, 3); /* rows */
621 if (value < 0) goto hw_err;
622 if ((value & 0xf) == 0) goto val_err;
623 sz.side1 += value & 0xf;
624 sz.rows = value & 0xf;
626 value = spd_read_byte(device, 4); /* columns */
627 if (value < 0) goto hw_err;
628 if ((value & 0xf) == 0) goto val_err;
629 sz.side1 += value & 0xf;
630 sz.col = value & 0xf;
632 value = spd_read_byte(device, 17); /* banks */
633 if (value < 0) goto hw_err;
634 if ((value & 0xff) == 0) goto val_err;
635 sz.side1 += log2(value & 0xff);
637 /* Get the module data width and convert it to a power of two */
638 value = spd_read_byte(device, 7); /* (high byte) */
639 if (value < 0) goto hw_err;
643 low = spd_read_byte(device, 6); /* (low byte) */
644 if (low < 0) goto hw_err;
645 value = value | (low & 0xff);
646 if ((value != 72) && (value != 64)) goto val_err;
647 sz.side1 += log2(value);
650 value = spd_read_byte(device, 5); /* number of physical banks */
651 if (value < 0) goto hw_err;
652 if (value == 1) goto out;
653 if ((value != 2) && (value != 4 )) {
656 #if QRANK_DIMM_SUPPORT == 1
660 /* Start with the symmetrical case */
663 value = spd_read_byte(device, 3); /* rows */
664 if (value < 0) goto hw_err;
665 if ((value & 0xf0) == 0) goto out; /* If symmetrical we are done */
666 sz.side2 -= (value & 0x0f); /* Subtract out rows on side 1 */
667 sz.side2 += ((value >> 4) & 0x0f); /* Add in rows on side 2 */
669 value = spd_read_byte(device, 4); /* columns */
670 if (value < 0) goto hw_err;
671 if ((value & 0xff) == 0) goto val_err;
672 sz.side2 -= (value & 0x0f); /* Subtract out columns on side 1 */
673 sz.side2 += ((value >> 4) & 0x0f); /* Add in columsn on side 2 */
678 die("Bad SPD value\n");
679 /* If an hw_error occurs report that I have no memory */
685 #if QRANK_DIMM_SUPPORT == 1
693 static void set_dimm_size(const struct mem_controller *ctrl, struct dimm_size sz, unsigned index)
695 uint32_t base0, base1;
698 if (sz.side1 != sz.side2) {
702 /* For each base register.
703 * Place the dimm size in 32 MB quantities in the bits 31 - 21.
704 * The initialize dimm size is in bits.
705 * Set the base enable bit0.
710 /* Make certain side1 of the dimm is at least 32MB */
711 if (sz.side1 >= (25 +3)) {
712 base0 = (1 << ((sz.side1 - (25 + 3)) + 21)) | 1;
715 /* Make certain side2 of the dimm is at least 32MB */
716 if (sz.side2 >= (25 + 3)) {
717 base1 = (1 << ((sz.side2 - (25 + 3)) + 21)) | 1;
720 /* Double the size if we are using dual channel memory */
721 if (is_dual_channel(ctrl)) {
722 base0 = (base0 << 1) | (base0 & 1);
723 base1 = (base1 << 1) | (base1 & 1);
726 /* Clear the reserved bits */
727 base0 &= ~0x001ffffe;
728 base1 &= ~0x001ffffe;
730 /* Set the appropriate DIMM base address register */
731 pci_write_config32(ctrl->f2, DRAM_CSBASE + (((index << 1)+0)<<2), base0);
732 pci_write_config32(ctrl->f2, DRAM_CSBASE + (((index << 1)+1)<<2), base1);
733 #if QRANK_DIMM_SUPPORT == 1
735 pci_write_config32(ctrl->f2, DRAM_CSBASE + (((index << 1)+4)<<2), base0);
736 pci_write_config32(ctrl->f2, DRAM_CSBASE + (((index << 1)+5)<<2), base1);
740 /* Enable the memory clocks for this DIMM */
742 dch = pci_read_config32(ctrl->f2, DRAM_CONFIG_HIGH);
743 dch |= DCH_MEMCLK_EN0 << index;
744 #if QRANK_DIMM_SUPPORT == 1
746 dch |= DCH_MEMCLK_EN0 << (index + 2);
749 pci_write_config32(ctrl->f2, DRAM_CONFIG_HIGH, dch);
753 static void set_dimm_map(const struct mem_controller *ctrl, struct dimm_size sz, unsigned index)
755 static const unsigned cs_map_aa[] = {
756 /* (row=12, col=8)(14, 12) ---> (0, 0) (2, 4) */
764 map = pci_read_config32(ctrl->f2, DRAM_BANK_ADDR_MAP);
765 map &= ~(0xf << (index * 4));
766 #if QRANK_DIMM_SUPPORT == 1
768 map &= ~(0xf << ( (index + 2) * 4));
773 /* Make certain side1 of the dimm is at least 32MB */
774 if (sz.side1 >= (25 +3)) {
775 if (is_cpu_pre_d0()) {
776 map |= (sz.side1 - (25 + 3)) << (index *4);
777 #if QRANK_DIMM_SUPPORT == 1
779 map |= (sz.side1 - (25 + 3)) << ( (index + 2) * 4);
784 map |= cs_map_aa[(sz.rows - 12) * 5 + (sz.col - 8) ] << (index*4);
785 #if QRANK_DIMM_SUPPORT == 1
787 map |= cs_map_aa[(sz.rows - 12) * 5 + (sz.col - 8) ] << ( (index + 2) * 4);
793 pci_write_config32(ctrl->f2, DRAM_BANK_ADDR_MAP, map);
797 static long spd_set_ram_size(const struct mem_controller *ctrl, long dimm_mask)
801 for (i = 0; i < DIMM_SOCKETS; i++) {
803 if (!(dimm_mask & (1 << i))) {
806 sz = spd_get_dimm_size(ctrl->channel0[i]);
808 return -1; /* Report SPD error */
810 set_dimm_size(ctrl, sz, i);
811 set_dimm_map (ctrl, sz, i);
816 static void route_dram_accesses(const struct mem_controller *ctrl,
817 unsigned long base_k, unsigned long limit_k)
819 /* Route the addresses to the controller node */
824 unsigned limit_reg, base_reg;
827 node_id = ctrl->node_id;
828 index = (node_id << 3);
829 limit = (limit_k << 2);
832 limit |= ( 0 << 8) | (node_id << 0);
833 base = (base_k << 2);
835 base |= (0 << 8) | (1<<1) | (1<<0);
837 limit_reg = 0x44 + index;
838 base_reg = 0x40 + index;
839 for (device = PCI_DEV(0, 0x18, 1); device <= PCI_DEV(0, 0x1f, 1); device += PCI_DEV(0, 1, 0)) {
840 pci_write_config32(device, limit_reg, limit);
841 pci_write_config32(device, base_reg, base);
845 static void set_top_mem(unsigned tom_k, unsigned hole_startk)
847 /* Error if I don't have memory */
852 /* Report the amount of memory. */
853 printk_spew("RAM: 0x%08x kB\n", tom_k);
855 /* Now set top of memory */
857 if (tom_k > (4*1024*1024)) {
858 msr.lo = (tom_k & 0x003fffff) << 10;
859 msr.hi = (tom_k & 0xffc00000) >> 22;
860 wrmsr(TOP_MEM2, msr);
863 /* Leave a 64M hole between TOP_MEM and TOP_MEM2
864 * so I can see my rom chip and other I/O devices.
866 if (tom_k >= 0x003f0000) {
867 #if HW_MEM_HOLE_SIZEK != 0
868 if (hole_startk != 0) {
874 msr.lo = (tom_k & 0x003fffff) << 10;
875 msr.hi = (tom_k & 0xffc00000) >> 22;
879 static unsigned long interleave_chip_selects(const struct mem_controller *ctrl)
882 static const uint8_t csbase_low_shift[] = {
885 /* 128MB */ (14 - 4),
886 /* 256MB */ (15 - 4),
887 /* 512MB */ (15 - 4),
892 static const uint8_t csbase_low_d0_shift[] = {
895 /* 128MB */ (14 - 4),
896 /* 128MB */ (15 - 4),
897 /* 256MB */ (15 - 4),
898 /* 512MB */ (15 - 4),
899 /* 256MB */ (16 - 4),
900 /* 512MB */ (16 - 4),
906 /* cs_base_high is not changed */
909 int chip_selects, index;
911 unsigned common_size;
912 unsigned common_cs_mode;
913 uint32_t csbase, csmask;
915 /* See if all of the memory chip selects are the same size
916 * and if so count them.
921 for (index = 0; index < 8; index++) {
926 value = pci_read_config32(ctrl->f2, DRAM_CSBASE + (index << 2));
934 if (common_size == 0) {
937 /* The size differed fail */
938 if (common_size != size) {
942 value = pci_read_config32(ctrl->f2, DRAM_BANK_ADDR_MAP);
943 cs_mode =( value >> ((index>>1)*4)) & 0xf;
944 if (cs_mode == 0 ) continue;
945 if (common_cs_mode == 0) {
946 common_cs_mode = cs_mode;
948 /* The cs_mode differed fail */
949 if (common_cs_mode != cs_mode) {
954 /* Chip selects can only be interleaved when there is
955 * more than one and their is a power of two of them.
957 bits = log2(chip_selects);
958 if (((1 << bits) != chip_selects) || (bits < 1) || (bits > 3)) {
962 /* Find the bits of csbase that we need to interleave on */
963 if (is_cpu_pre_d0()){
964 csbase_inc = 1 << csbase_low_shift[common_cs_mode];
965 if (is_dual_channel(ctrl)) {
966 /* Also we run out of address mask bits if we try and interleave 8 4GB dimms */
967 if ((bits == 3) && (common_size == (1 << (32 - 3)))) {
968 // printk_debug("8 4GB chip selects cannot be interleaved\n");
975 csbase_inc = 1 << csbase_low_d0_shift[common_cs_mode];
976 if (is_dual_channel(ctrl)) {
977 if ( (bits==3) && (common_cs_mode > 8)) {
978 // printk_debug("8 cs_mode>8 chip selects cannot be interleaved\n");
985 /* Compute the initial values for csbase and csbask.
986 * In csbase just set the enable bit and the base to zero.
987 * In csmask set the mask bits for the size and page level interleave.
990 csmask = (((common_size << bits) - 1) << 21);
991 csmask |= 0xfe00 & ~((csbase_inc << bits) - csbase_inc);
992 for (index = 0; index < 8; index++) {
995 value = pci_read_config32(ctrl->f2, DRAM_CSBASE + (index << 2));
1000 pci_write_config32(ctrl->f2, DRAM_CSBASE + (index << 2), csbase);
1001 pci_write_config32(ctrl->f2, DRAM_CSMASK + (index << 2), csmask);
1002 csbase += csbase_inc;
1005 printk_spew("Interleaved\n");
1007 /* Return the memory size in K */
1008 return common_size << (15 + bits);
1011 static unsigned long order_chip_selects(const struct mem_controller *ctrl)
1015 /* Remember which registers we have used in the high 8 bits of tom */
1018 /* Find the largest remaining candidate */
1019 unsigned index, candidate;
1020 uint32_t csbase, csmask;
1024 for (index = 0; index < 8; index++) {
1026 value = pci_read_config32(ctrl->f2, DRAM_CSBASE + (index << 2));
1028 /* Is it enabled? */
1033 /* Is it greater? */
1034 if (value <= csbase) {
1038 /* Has it already been selected */
1039 if (tom & (1 << (index + 24))) {
1042 /* I have a new candidate */
1047 /* See if I have found a new candidate */
1052 /* Remember the dimm size */
1053 size = csbase >> 21;
1055 /* Remember I have used this register */
1056 tom |= (1 << (candidate + 24));
1058 /* Recompute the cs base register value */
1059 csbase = (tom << 21) | 1;
1061 /* Increment the top of memory */
1064 /* Compute the memory mask */
1065 csmask = ((size -1) << 21);
1066 csmask |= 0xfe00; /* For now don't optimize */
1068 /* Write the new base register */
1069 pci_write_config32(ctrl->f2, DRAM_CSBASE + (candidate << 2), csbase);
1070 /* Write the new mask register */
1071 pci_write_config32(ctrl->f2, DRAM_CSMASK + (candidate << 2), csmask);
1074 /* Return the memory size in K */
1075 return (tom & ~0xff000000) << 15;
1078 unsigned long memory_end_k(const struct mem_controller *ctrl, int max_node_id)
1082 /* Find the last memory address used */
1084 for (node_id = 0; node_id < max_node_id; node_id++) {
1085 uint32_t limit, base;
1087 index = node_id << 3;
1088 base = pci_read_config32(ctrl->f1, 0x40 + index);
1089 /* Only look at the limit if the base is enabled */
1090 if ((base & 3) == 3) {
1091 limit = pci_read_config32(ctrl->f1, 0x44 + index);
1092 end_k = ((limit + 0x00010000) & 0xffff0000) >> 2;
1098 static void order_dimms(const struct mem_controller *ctrl)
1100 unsigned long tom_k, base_k;
1102 if ((!HAVE_OPTION_TABLE) ||
1103 read_option(CMOS_VSTART_interleave_chip_selects, CMOS_VLEN_interleave_chip_selects, 1) != 0) {
1104 tom_k = interleave_chip_selects(ctrl);
1106 printk_debug("Interleaving disabled\n");
1111 tom_k = order_chip_selects(ctrl);
1114 /* Compute the memory base address */
1115 base_k = memory_end_k(ctrl, ctrl->node_id);
1117 route_dram_accesses(ctrl, base_k, tom_k);
1118 set_top_mem(tom_k, 0);
1121 static long disable_dimm(const struct mem_controller *ctrl, unsigned index, long dimm_mask)
1123 printk_debug("disabling dimm %02x\n", index);
1124 pci_write_config32(ctrl->f2, DRAM_CSBASE + (((index << 1)+0)<<2), 0);
1125 pci_write_config32(ctrl->f2, DRAM_CSBASE + (((index << 1)+1)<<2), 0);
1126 dimm_mask &= ~(1 << index);
1130 static long spd_handle_unbuffered_dimms(const struct mem_controller *ctrl,
1136 int has_dualch = is_opteron(ctrl);
1140 for (i = 0; (i < DIMM_SOCKETS); i++) {
1142 if (!(dimm_mask & (1 << i))) {
1145 value = spd_read_byte(ctrl->channel0[i], 21);
1150 /* Registered dimm ? */
1151 if (value & (1 << 1)) {
1154 /* Otherwise it must be an unbuffered dimm */
1159 if (unbuffered && registered) {
1160 die("Mixed buffered and registered dimms not supported");
1163 dcl = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
1164 dcl &= ~DCL_UnBuffDimm;
1166 if ((has_dualch) && (!is_cpu_pre_d0())) {
1167 dcl |= DCL_UnBuffDimm; /* set DCL_DualDIMMen too? */
1169 /* set DCL_En2T if you have non-equal DDR mem types! */
1171 if ((cpuid_eax(1) & 0x30) == 0x30) {
1172 /* CS[7:4] is copy of CS[3:0], should be set for 939 socket */
1173 dcl |= DCL_UpperCSMap;
1176 dcl |= DCL_UnBuffDimm;
1179 pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dcl);
1181 if (is_registered(ctrl)) {
1182 printk_spew("Registered\n");
1184 printk_spew("Unbuffered\n");
1190 static unsigned int spd_detect_dimms(const struct mem_controller *ctrl)
1195 for (i = 0; i < DIMM_SOCKETS; i++) {
1198 device = ctrl->channel0[i];
1200 byte = spd_read_byte(ctrl->channel0[i], 2); /* Type */
1202 dimm_mask |= (1 << i);
1205 device = ctrl->channel1[i];
1207 byte = spd_read_byte(ctrl->channel1[i], 2);
1209 dimm_mask |= (1 << (i + DIMM_SOCKETS));
1216 static long spd_enable_2channels(const struct mem_controller *ctrl, long dimm_mask)
1220 /* SPD addresses to verify are identical */
1221 static const uint8_t addresses[] = {
1222 2, /* Type should be DDR SDRAM */
1223 3, /* *Row addresses */
1224 4, /* *Column addresses */
1225 5, /* *Physical Banks */
1226 6, /* *Module Data Width low */
1227 7, /* *Module Data Width high */
1228 9, /* *Cycle time at highest CAS Latency CL=X */
1229 11, /* *SDRAM Type */
1230 13, /* *SDRAM Width */
1231 17, /* *Logical Banks */
1232 18, /* *Supported CAS Latencies */
1233 21, /* *SDRAM Module Attributes */
1234 23, /* *Cycle time at CAS Latnecy (CLX - 0.5) */
1235 26, /* *Cycle time at CAS Latnecy (CLX - 1.0) */
1236 27, /* *tRP Row precharge time */
1237 28, /* *Minimum Row Active to Row Active Delay (tRRD) */
1238 29, /* *tRCD RAS to CAS */
1239 30, /* *tRAS Activate to Precharge */
1240 41, /* *Minimum Active to Active/Auto Refresh Time(Trc) */
1241 42, /* *Minimum Auto Refresh Command Time(Trfc) */
1243 /* If the dimms are not in pairs do not do dual channels */
1244 if ((dimm_mask & ((1 << DIMM_SOCKETS) - 1)) !=
1245 ((dimm_mask >> DIMM_SOCKETS) & ((1 << DIMM_SOCKETS) - 1))) {
1246 goto single_channel;
1248 /* If the cpu is not capable of doing dual channels don't do dual channels */
1249 nbcap = pci_read_config32(ctrl->f3, NORTHBRIDGE_CAP);
1250 if (!(nbcap & NBCAP_128Bit)) {
1251 goto single_channel;
1253 for (i = 0; (i < 4) && (ctrl->channel0[i]); i++) {
1254 unsigned device0, device1;
1257 /* If I don't have a dimm skip this one */
1258 if (!(dimm_mask & (1 << i))) {
1261 device0 = ctrl->channel0[i];
1262 device1 = ctrl->channel1[i];
1263 for (j = 0; j < ARRAY_SIZE(addresses); j++) {
1265 addr = addresses[j];
1266 value0 = spd_read_byte(device0, addr);
1270 value1 = spd_read_byte(device1, addr);
1274 if (value0 != value1) {
1275 goto single_channel;
1279 printk_spew("Enabling dual channel memory\n");
1281 dcl = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
1282 dcl &= ~DCL_32ByteEn;
1283 dcl |= DCL_128BitEn;
1284 pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dcl);
1287 dimm_mask &= ~((1 << (DIMM_SOCKETS *2)) - (1 << DIMM_SOCKETS));
1293 uint8_t divisor; /* In 1/2 ns increments */
1296 uint32_t dch_memclk;
1297 uint16_t dch_tref4k, dch_tref8k;
1300 uint8_t dtl_trwt[3][3]; /* first index is CAS_LAT 2/2.5/3 and 128/registered64/64 */
1301 uint8_t rdpreamble[4]; /* 0 is for registered, 1 for 1-2 DIMMS, 2 and 3 for 3 or 4 unreg dimm slots */
1305 static const struct mem_param *get_mem_param(unsigned min_cycle_time)
1307 static const struct mem_param speed[] = {
1311 .divisor = (10 <<1),
1314 .dch_memclk = DCH_MEMCLK_100MHZ << DCH_MEMCLK_SHIFT,
1315 .dch_tref4k = DTH_TREF_100MHZ_4K,
1316 .dch_tref8k = DTH_TREF_100MHZ_8K,
1319 .dtl_trwt = { { 2, 2, 3 }, { 3, 3, 4 }, { 3, 3, 4 }},
1320 .rdpreamble = { ((9 << 1) + 0), ((9 << 1) + 0), ((9 << 1) + 0), ((9 << 1) + 0) }
1325 .divisor = (7<<1)+1,
1328 .dch_memclk = DCH_MEMCLK_133MHZ << DCH_MEMCLK_SHIFT,
1329 .dch_tref4k = DTH_TREF_133MHZ_4K,
1330 .dch_tref8k = DTH_TREF_133MHZ_8K,
1333 .dtl_trwt = { { 2, 2, 3 }, { 3, 3, 4 }, { 3, 3, 4 }},
1334 .rdpreamble = { ((8 << 1) + 0), ((7 << 1) + 0), ((7 << 1) + 1), ((7 << 1) + 0) }
1342 .dch_memclk = DCH_MEMCLK_166MHZ << DCH_MEMCLK_SHIFT,
1343 .dch_tref4k = DTH_TREF_166MHZ_4K,
1344 .dch_tref8k = DTH_TREF_166MHZ_8K,
1347 .dtl_trwt = { { 3, 2, 3 }, { 3, 3, 4 }, { 4, 3, 4 }},
1348 .rdpreamble = { ((7 << 1) + 1), ((6 << 1) + 0), ((6 << 1) + 1), ((6 << 1) + 0) }
1356 .dch_memclk = DCH_MEMCLK_200MHZ << DCH_MEMCLK_SHIFT,
1357 .dch_tref4k = DTH_TREF_200MHZ_4K,
1358 .dch_tref8k = DTH_TREF_200MHZ_8K,
1361 .dtl_trwt = { { 0, 2, 3 }, { 3, 3, 4 }, { 3, 3, 4 }},
1362 .rdpreamble = { ((7 << 1) + 0), ((5 << 1) + 0), ((5 << 1) + 1), ((5 << 1) + 1) }
1368 const struct mem_param *param;
1369 for (param = &speed[0]; param->cycle_time ; param++) {
1370 if (min_cycle_time > (param+1)->cycle_time) {
1374 if (!param->cycle_time) {
1375 die("min_cycle_time to low");
1377 printk_spew("%s\n", param->name);
1381 struct spd_set_memclk_result {
1382 const struct mem_param *param;
1385 static struct spd_set_memclk_result spd_set_memclk(const struct mem_controller *ctrl, long dimm_mask)
1387 /* Compute the minimum cycle time for these dimms */
1388 struct spd_set_memclk_result result;
1389 unsigned min_cycle_time, min_latency, bios_cycle_time;
1393 static const uint8_t latency_indicies[] = { 26, 23, 9 };
1394 static const unsigned char min_cycle_times[] = {
1395 [NBCAP_MEMCLK_200MHZ] = 0x50, /* 5ns */
1396 [NBCAP_MEMCLK_166MHZ] = 0x60, /* 6ns */
1397 [NBCAP_MEMCLK_133MHZ] = 0x75, /* 7.5ns */
1398 [NBCAP_MEMCLK_100MHZ] = 0xa0, /* 10ns */
1401 value = pci_read_config32(ctrl->f3, NORTHBRIDGE_CAP);
1403 min_cycle_time = min_cycle_times[(value >> NBCAP_MEMCLK_SHIFT) & NBCAP_MEMCLK_MASK];
1404 bios_cycle_time = min_cycle_times[
1405 read_option(CMOS_VSTART_max_mem_clock, CMOS_VLEN_max_mem_clock, 0)];
1406 if (HAVE_OPTION_TABLE && bios_cycle_time > min_cycle_time) {
1407 min_cycle_time = bios_cycle_time;
1411 /* Compute the least latency with the fastest clock supported
1412 * by both the memory controller and the dimms.
1414 for (i = 0; i < DIMM_SOCKETS; i++) {
1415 int new_cycle_time, new_latency;
1420 if (!(dimm_mask & (1 << i))) {
1424 /* First find the supported CAS latencies
1425 * Byte 18 for DDR SDRAM is interpreted:
1426 * bit 0 == CAS Latency = 1.0
1427 * bit 1 == CAS Latency = 1.5
1428 * bit 2 == CAS Latency = 2.0
1429 * bit 3 == CAS Latency = 2.5
1430 * bit 4 == CAS Latency = 3.0
1431 * bit 5 == CAS Latency = 3.5
1435 new_cycle_time = 0xa0;
1438 latencies = spd_read_byte(ctrl->channel0[i], 18);
1439 if (latencies <= 0) continue;
1441 /* Compute the lowest cas latency supported */
1442 latency = log2(latencies) -2;
1444 /* Loop through and find a fast clock with a low latency */
1445 for (index = 0; index < 3; index++, latency++) {
1447 if ((latency < 2) || (latency > 4) ||
1448 (!(latencies & (1 << latency)))) {
1451 spd_value = spd_read_byte(ctrl->channel0[i], latency_indicies[index]);
1452 if (spd_value < 0) {
1456 /* Only increase the latency if we decreas the clock */
1457 if ((spd_value >= min_cycle_time) && (spd_value < new_cycle_time)) {
1458 new_cycle_time = spd_value;
1459 new_latency = latency;
1462 if (new_latency > 4){
1465 /* Does min_latency need to be increased? */
1466 if (new_cycle_time > min_cycle_time) {
1467 min_cycle_time = new_cycle_time;
1469 /* Does min_cycle_time need to be increased? */
1470 if (new_latency > min_latency) {
1471 min_latency = new_latency;
1474 /* Make a second pass through the dimms and disable
1475 * any that cannot support the selected memclk and cas latency.
1478 for (i = 0; (i < 4) && (ctrl->channel0[i]); i++) {
1483 if (!(dimm_mask & (1 << i))) {
1487 latencies = spd_read_byte(ctrl->channel0[i], 18);
1488 if (latencies < 0) goto hw_error;
1489 if (latencies == 0) {
1493 /* Compute the lowest cas latency supported */
1494 latency = log2(latencies) -2;
1496 /* Walk through searching for the selected latency */
1497 for (index = 0; index < 3; index++, latency++) {
1498 if (!(latencies & (1 << latency))) {
1501 if (latency == min_latency)
1504 /* If I can't find the latency or my index is bad error */
1505 if ((latency != min_latency) || (index >= 3)) {
1509 /* Read the min_cycle_time for this latency */
1510 spd_value = spd_read_byte(ctrl->channel0[i], latency_indicies[index]);
1511 if (spd_value < 0) goto hw_error;
1513 /* All is good if the selected clock speed
1514 * is what I need or slower.
1516 if (spd_value <= min_cycle_time) {
1519 /* Otherwise I have an error, disable the dimm */
1521 dimm_mask = disable_dimm(ctrl, i, dimm_mask);
1524 //down speed for full load 4 rank support
1525 #if QRANK_DIMM_SUPPORT
1526 if (dimm_mask == (3|(3<<DIMM_SOCKETS)) ) {
1528 for (i = 0; (i < 4) && (ctrl->channel0[i]); i++) {
1530 if (!(dimm_mask & (1 << i))) {
1533 val = spd_read_byte(ctrl->channel0[i], 5);
1540 if (min_cycle_time <= 0x50 ) {
1541 min_cycle_time = 0x60;
1548 /* Now that I know the minimum cycle time lookup the memory parameters */
1549 result.param = get_mem_param(min_cycle_time);
1551 /* Update DRAM Config High with our selected memory speed */
1552 value = pci_read_config32(ctrl->f2, DRAM_CONFIG_HIGH);
1553 value &= ~(DCH_MEMCLK_MASK << DCH_MEMCLK_SHIFT);
1555 /* Improves DQS centering by correcting for case when core speed multiplier and MEMCLK speed result in odd clock divisor, by selecting the next lowest memory speed, required only at DDR400 and higher speeds with certain DIMM loadings ---- cheating???*/
1556 if (!is_cpu_pre_e0()) {
1557 if (min_cycle_time==0x50) {
1563 value |= result.param->dch_memclk;
1564 pci_write_config32(ctrl->f2, DRAM_CONFIG_HIGH, value);
1566 static const unsigned latencies[] = { DTL_CL_2, DTL_CL_2_5, DTL_CL_3 };
1568 /* Update DRAM Timing Low with our selected cas latency */
1569 value = pci_read_config32(ctrl->f2, DRAM_TIMING_LOW);
1570 value &= ~(DTL_TCL_MASK << DTL_TCL_SHIFT);
1571 value |= latencies[min_latency - 2] << DTL_TCL_SHIFT;
1572 pci_write_config32(ctrl->f2, DRAM_TIMING_LOW, value);
1574 result.dimm_mask = dimm_mask;
1577 result.param = (const struct mem_param *)0;
1578 result.dimm_mask = -1;
1583 static int update_dimm_Trc(const struct mem_controller *ctrl, const struct mem_param *param, int i)
1585 unsigned clocks, old_clocks;
1588 value = spd_read_byte(ctrl->channel0[i], 41);
1589 if (value < 0) return -1;
1590 if ((value == 0) || (value == 0xff)) {
1593 clocks = ((value << 1) + param->divisor - 1)/param->divisor;
1594 if (clocks < DTL_TRC_MIN) {
1595 clocks = DTL_TRC_MIN;
1597 if (clocks > DTL_TRC_MAX) {
1601 dtl = pci_read_config32(ctrl->f2, DRAM_TIMING_LOW);
1602 old_clocks = ((dtl >> DTL_TRC_SHIFT) & DTL_TRC_MASK) + DTL_TRC_BASE;
1603 if (old_clocks > clocks) {
1604 clocks = old_clocks;
1606 dtl &= ~(DTL_TRC_MASK << DTL_TRC_SHIFT);
1607 dtl |= ((clocks - DTL_TRC_BASE) << DTL_TRC_SHIFT);
1608 pci_write_config32(ctrl->f2, DRAM_TIMING_LOW, dtl);
1612 static int update_dimm_Trfc(const struct mem_controller *ctrl, const struct mem_param *param, int i)
1614 unsigned clocks, old_clocks;
1617 value = spd_read_byte(ctrl->channel0[i], 42);
1618 if (value < 0) return -1;
1619 if ((value == 0) || (value == 0xff)) {
1620 value = param->tRFC;
1622 clocks = ((value << 1) + param->divisor - 1)/param->divisor;
1623 if (clocks < DTL_TRFC_MIN) {
1624 clocks = DTL_TRFC_MIN;
1626 if (clocks > DTL_TRFC_MAX) {
1629 dtl = pci_read_config32(ctrl->f2, DRAM_TIMING_LOW);
1630 old_clocks = ((dtl >> DTL_TRFC_SHIFT) & DTL_TRFC_MASK) + DTL_TRFC_BASE;
1631 if (old_clocks > clocks) {
1632 clocks = old_clocks;
1634 dtl &= ~(DTL_TRFC_MASK << DTL_TRFC_SHIFT);
1635 dtl |= ((clocks - DTL_TRFC_BASE) << DTL_TRFC_SHIFT);
1636 pci_write_config32(ctrl->f2, DRAM_TIMING_LOW, dtl);
1641 static int update_dimm_Trcd(const struct mem_controller *ctrl, const struct mem_param *param, int i)
1643 unsigned clocks, old_clocks;
1646 value = spd_read_byte(ctrl->channel0[i], 29);
1647 if (value < 0) return -1;
1648 clocks = (value + (param->divisor << 1) -1)/(param->divisor << 1);
1649 if (clocks < DTL_TRCD_MIN) {
1650 clocks = DTL_TRCD_MIN;
1652 if (clocks > DTL_TRCD_MAX) {
1655 dtl = pci_read_config32(ctrl->f2, DRAM_TIMING_LOW);
1656 old_clocks = ((dtl >> DTL_TRCD_SHIFT) & DTL_TRCD_MASK) + DTL_TRCD_BASE;
1657 if (old_clocks > clocks) {
1658 clocks = old_clocks;
1660 dtl &= ~(DTL_TRCD_MASK << DTL_TRCD_SHIFT);
1661 dtl |= ((clocks - DTL_TRCD_BASE) << DTL_TRCD_SHIFT);
1662 pci_write_config32(ctrl->f2, DRAM_TIMING_LOW, dtl);
1666 static int update_dimm_Trrd(const struct mem_controller *ctrl, const struct mem_param *param, int i)
1668 unsigned clocks, old_clocks;
1671 value = spd_read_byte(ctrl->channel0[i], 28);
1672 if (value < 0) return -1;
1673 clocks = (value + (param->divisor << 1) -1)/(param->divisor << 1);
1674 if (clocks < DTL_TRRD_MIN) {
1675 clocks = DTL_TRRD_MIN;
1677 if (clocks > DTL_TRRD_MAX) {
1680 dtl = pci_read_config32(ctrl->f2, DRAM_TIMING_LOW);
1681 old_clocks = ((dtl >> DTL_TRRD_SHIFT) & DTL_TRRD_MASK) + DTL_TRRD_BASE;
1682 if (old_clocks > clocks) {
1683 clocks = old_clocks;
1685 dtl &= ~(DTL_TRRD_MASK << DTL_TRRD_SHIFT);
1686 dtl |= ((clocks - DTL_TRRD_BASE) << DTL_TRRD_SHIFT);
1687 pci_write_config32(ctrl->f2, DRAM_TIMING_LOW, dtl);
1691 static int update_dimm_Tras(const struct mem_controller *ctrl, const struct mem_param *param, int i)
1693 unsigned clocks, old_clocks;
1696 value = spd_read_byte(ctrl->channel0[i], 30);
1697 if (value < 0) return -1;
1698 clocks = ((value << 1) + param->divisor - 1)/param->divisor;
1699 if (clocks < DTL_TRAS_MIN) {
1700 clocks = DTL_TRAS_MIN;
1702 if (clocks > DTL_TRAS_MAX) {
1705 dtl = pci_read_config32(ctrl->f2, DRAM_TIMING_LOW);
1706 old_clocks = ((dtl >> DTL_TRAS_SHIFT) & DTL_TRAS_MASK) + DTL_TRAS_BASE;
1707 if (old_clocks > clocks) {
1708 clocks = old_clocks;
1710 dtl &= ~(DTL_TRAS_MASK << DTL_TRAS_SHIFT);
1711 dtl |= ((clocks - DTL_TRAS_BASE) << DTL_TRAS_SHIFT);
1712 pci_write_config32(ctrl->f2, DRAM_TIMING_LOW, dtl);
1716 static int update_dimm_Trp(const struct mem_controller *ctrl, const struct mem_param *param, int i)
1718 unsigned clocks, old_clocks;
1721 value = spd_read_byte(ctrl->channel0[i], 27);
1722 if (value < 0) return -1;
1723 clocks = (value + (param->divisor << 1) - 1)/(param->divisor << 1);
1724 if (clocks < DTL_TRP_MIN) {
1725 clocks = DTL_TRP_MIN;
1727 if (clocks > DTL_TRP_MAX) {
1730 dtl = pci_read_config32(ctrl->f2, DRAM_TIMING_LOW);
1731 old_clocks = ((dtl >> DTL_TRP_SHIFT) & DTL_TRP_MASK) + DTL_TRP_BASE;
1732 if (old_clocks > clocks) {
1733 clocks = old_clocks;
1735 dtl &= ~(DTL_TRP_MASK << DTL_TRP_SHIFT);
1736 dtl |= ((clocks - DTL_TRP_BASE) << DTL_TRP_SHIFT);
1737 pci_write_config32(ctrl->f2, DRAM_TIMING_LOW, dtl);
1741 static void set_Twr(const struct mem_controller *ctrl, const struct mem_param *param)
1744 dtl = pci_read_config32(ctrl->f2, DRAM_TIMING_LOW);
1745 dtl &= ~(DTL_TWR_MASK << DTL_TWR_SHIFT);
1746 dtl |= (param->dtl_twr - DTL_TWR_BASE) << DTL_TWR_SHIFT;
1747 pci_write_config32(ctrl->f2, DRAM_TIMING_LOW, dtl);
1751 static void init_Tref(const struct mem_controller *ctrl, const struct mem_param *param)
1754 dth = pci_read_config32(ctrl->f2, DRAM_TIMING_HIGH);
1755 dth &= ~(DTH_TREF_MASK << DTH_TREF_SHIFT);
1756 dth |= (param->dch_tref4k << DTH_TREF_SHIFT);
1757 pci_write_config32(ctrl->f2, DRAM_TIMING_HIGH, dth);
1760 static int update_dimm_Tref(const struct mem_controller *ctrl, const struct mem_param *param, int i)
1764 unsigned tref, old_tref;
1765 value = spd_read_byte(ctrl->channel0[i], 3);
1766 if (value < 0) return -1;
1769 tref = param->dch_tref8k;
1771 tref = param->dch_tref4k;
1774 dth = pci_read_config32(ctrl->f2, DRAM_TIMING_HIGH);
1775 old_tref = (dth >> DTH_TREF_SHIFT) & DTH_TREF_MASK;
1776 if ((value == 12) && (old_tref == param->dch_tref4k)) {
1777 tref = param->dch_tref4k;
1779 tref = param->dch_tref8k;
1781 dth &= ~(DTH_TREF_MASK << DTH_TREF_SHIFT);
1782 dth |= (tref << DTH_TREF_SHIFT);
1783 pci_write_config32(ctrl->f2, DRAM_TIMING_HIGH, dth);
1788 static int update_dimm_x4(const struct mem_controller *ctrl, const struct mem_param *param, int i)
1792 #if QRANK_DIMM_SUPPORT == 1
1796 value = spd_read_byte(ctrl->channel0[i], 13);
1801 #if QRANK_DIMM_SUPPORT == 1
1802 rank = spd_read_byte(ctrl->channel0[i], 5); /* number of physical banks */
1808 dimm = 1<<(DCL_x4DIMM_SHIFT+i);
1809 #if QRANK_DIMM_SUPPORT == 1
1811 dimm |= 1<<(DCL_x4DIMM_SHIFT+i+2);
1814 dcl = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
1819 pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dcl);
1823 static int update_dimm_ecc(const struct mem_controller *ctrl, const struct mem_param *param, int i)
1827 value = spd_read_byte(ctrl->channel0[i], 11);
1832 dcl = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
1833 dcl &= ~DCL_DimmEccEn;
1834 pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dcl);
1839 static int count_dimms(const struct mem_controller *ctrl)
1844 for (index = 0; index < 8; index += 2) {
1846 csbase = pci_read_config32(ctrl->f2, (DRAM_CSBASE + (index << 2)));
1854 static void set_Twtr(const struct mem_controller *ctrl, const struct mem_param *param)
1858 dth = pci_read_config32(ctrl->f2, DRAM_TIMING_HIGH);
1859 dth &= ~(DTH_TWTR_MASK << DTH_TWTR_SHIFT);
1860 dth |= ((param->dtl_twtr - DTH_TWTR_BASE) << DTH_TWTR_SHIFT);
1861 pci_write_config32(ctrl->f2, DRAM_TIMING_HIGH, dth);
1864 static void set_Trwt(const struct mem_controller *ctrl, const struct mem_param *param)
1872 dtl = pci_read_config32(ctrl->f2, DRAM_TIMING_LOW);
1873 latency = (dtl >> DTL_TCL_SHIFT) & DTL_TCL_MASK;
1875 if (is_opteron(ctrl)) {
1876 mtype = 0; /* dual channel */
1877 } else if (is_registered(ctrl)) {
1878 mtype = 1; /* registered 64bit interface */
1880 mtype = 2; /* unbuffered 64bit interface */
1894 die("Unknown LAT for Trwt");
1897 clocks = param->dtl_trwt[lat][mtype];
1898 if ((clocks < DTH_TRWT_MIN) || (clocks > DTH_TRWT_MAX)) {
1899 die("Unknown Trwt\n");
1902 dth = pci_read_config32(ctrl->f2, DRAM_TIMING_HIGH);
1903 dth &= ~(DTH_TRWT_MASK << DTH_TRWT_SHIFT);
1904 dth |= ((clocks - DTH_TRWT_BASE) << DTH_TRWT_SHIFT);
1905 pci_write_config32(ctrl->f2, DRAM_TIMING_HIGH, dth);
1909 static void set_Twcl(const struct mem_controller *ctrl, const struct mem_param *param)
1911 /* Memory Clocks after CAS# */
1914 if (is_registered(ctrl)) {
1919 dth = pci_read_config32(ctrl->f2, DRAM_TIMING_HIGH);
1920 dth &= ~(DTH_TWCL_MASK << DTH_TWCL_SHIFT);
1921 dth |= ((clocks - DTH_TWCL_BASE) << DTH_TWCL_SHIFT);
1922 pci_write_config32(ctrl->f2, DRAM_TIMING_HIGH, dth);
1926 static void set_read_preamble(const struct mem_controller *ctrl, const struct mem_param *param)
1929 unsigned rdpreamble;
1934 for (i = 0; i < 4; i++) {
1935 if (ctrl->channel0[i]) {
1940 /* map to index to param.rdpreamble array */
1941 if (is_registered(ctrl)) {
1943 } else if (slots < 3) {
1945 } else if (slots == 3) {
1947 } else if (slots == 4) {
1950 die("Unknown rdpreamble for this nr of slots");
1953 dch = pci_read_config32(ctrl->f2, DRAM_CONFIG_HIGH);
1954 dch &= ~(DCH_RDPREAMBLE_MASK << DCH_RDPREAMBLE_SHIFT);
1955 rdpreamble = param->rdpreamble[i];
1957 if ((rdpreamble < DCH_RDPREAMBLE_MIN) || (rdpreamble > DCH_RDPREAMBLE_MAX)) {
1958 die("Unknown rdpreamble");
1961 dch |= (rdpreamble - DCH_RDPREAMBLE_BASE) << DCH_RDPREAMBLE_SHIFT;
1962 pci_write_config32(ctrl->f2, DRAM_CONFIG_HIGH, dch);
1965 static void set_max_async_latency(const struct mem_controller *ctrl, const struct mem_param *param)
1971 dimms = count_dimms(ctrl);
1973 dch = pci_read_config32(ctrl->f2, DRAM_CONFIG_HIGH);
1974 dch &= ~(DCH_ASYNC_LAT_MASK << DCH_ASYNC_LAT_SHIFT);
1976 if (is_registered(ctrl)) {
1988 die("Too many unbuffered dimms");
1990 else if (dimms == 3) {
1999 dch |= ((async_lat - DCH_ASYNC_LAT_BASE) << DCH_ASYNC_LAT_SHIFT);
2000 pci_write_config32(ctrl->f2, DRAM_CONFIG_HIGH, dch);
2003 static void set_idle_cycle_limit(const struct mem_controller *ctrl, const struct mem_param *param)
2006 /* AMD says to Hardcode this */
2007 dch = pci_read_config32(ctrl->f2, DRAM_CONFIG_HIGH);
2008 dch &= ~(DCH_IDLE_LIMIT_MASK << DCH_IDLE_LIMIT_SHIFT);
2009 dch |= DCH_IDLE_LIMIT_16 << DCH_IDLE_LIMIT_SHIFT;
2010 dch |= DCH_DYN_IDLE_CTR_EN;
2011 pci_write_config32(ctrl->f2, DRAM_CONFIG_HIGH, dch);
2014 static long spd_set_dram_timing(const struct mem_controller *ctrl, const struct mem_param *param, long dimm_mask)
2018 init_Tref(ctrl, param);
2019 for (i = 0; i < DIMM_SOCKETS; i++) {
2021 if (!(dimm_mask & (1 << i))) {
2024 /* DRAM Timing Low Register */
2025 if ((rc = update_dimm_Trc (ctrl, param, i)) <= 0) goto dimm_err;
2026 if ((rc = update_dimm_Trfc(ctrl, param, i)) <= 0) goto dimm_err;
2027 if ((rc = update_dimm_Trcd(ctrl, param, i)) <= 0) goto dimm_err;
2028 if ((rc = update_dimm_Trrd(ctrl, param, i)) <= 0) goto dimm_err;
2029 if ((rc = update_dimm_Tras(ctrl, param, i)) <= 0) goto dimm_err;
2030 if ((rc = update_dimm_Trp (ctrl, param, i)) <= 0) goto dimm_err;
2032 /* DRAM Timing High Register */
2033 if ((rc = update_dimm_Tref(ctrl, param, i)) <= 0) goto dimm_err;
2036 /* DRAM Config Low */
2037 if ((rc = update_dimm_x4 (ctrl, param, i)) <= 0) goto dimm_err;
2038 if ((rc = update_dimm_ecc(ctrl, param, i)) <= 0) goto dimm_err;
2044 dimm_mask = disable_dimm(ctrl, i, dimm_mask);
2046 /* DRAM Timing Low Register */
2047 set_Twr(ctrl, param);
2049 /* DRAM Timing High Register */
2050 set_Twtr(ctrl, param);
2051 set_Trwt(ctrl, param);
2052 set_Twcl(ctrl, param);
2054 /* DRAM Config High */
2055 set_read_preamble(ctrl, param);
2056 set_max_async_latency(ctrl, param);
2057 set_idle_cycle_limit(ctrl, param);
2061 #if RAMINIT_SYSINFO==1
2062 static void sdram_set_spd_registers(const struct mem_controller *ctrl, struct sys_info *sysinfo)
2064 static void sdram_set_spd_registers(const struct mem_controller *ctrl)
2067 struct spd_set_memclk_result result;
2068 const struct mem_param *param;
2071 if (!controller_present(ctrl)) {
2072 // printk_debug("No memory controller present\n");
2076 hw_enable_ecc(ctrl);
2077 activate_spd_rom(ctrl);
2078 dimm_mask = spd_detect_dimms(ctrl);
2079 if (!(dimm_mask & ((1 << DIMM_SOCKETS) - 1))) {
2080 printk_debug("No memory for this cpu\n");
2083 dimm_mask = spd_enable_2channels(ctrl, dimm_mask);
2086 dimm_mask = spd_set_ram_size(ctrl , dimm_mask);
2089 dimm_mask = spd_handle_unbuffered_dimms(ctrl, dimm_mask);
2092 result = spd_set_memclk(ctrl, dimm_mask);
2093 param = result.param;
2094 dimm_mask = result.dimm_mask;
2097 dimm_mask = spd_set_dram_timing(ctrl, param , dimm_mask);
2103 /* Unrecoverable error reading SPD data */
2104 print_err("SPD error - reset\n");
2109 #if HW_MEM_HOLE_SIZEK != 0
2110 static uint32_t hoist_memory(int controllers, const struct mem_controller *ctrl,unsigned hole_startk, int i)
2113 uint32_t carry_over;
2115 uint32_t base, limit;
2120 carry_over = (4*1024*1024) - hole_startk;
2122 for (ii=controllers - 1;ii>i;ii--) {
2123 base = pci_read_config32(ctrl[0].f1, 0x40 + (ii << 3));
2124 if ((base & ((1<<1)|(1<<0))) != ((1<<1)|(1<<0))) {
2127 limit = pci_read_config32(ctrl[0].f1, 0x44 + (ii << 3));
2128 for (j = 0; j < controllers; j++) {
2129 pci_write_config32(ctrl[j].f1, 0x44 + (ii << 3), limit + (carry_over << 2));
2130 pci_write_config32(ctrl[j].f1, 0x40 + (ii << 3), base + (carry_over << 2));
2133 limit = pci_read_config32(ctrl[0].f1, 0x44 + (i << 3));
2134 for (j = 0; j < controllers; j++) {
2135 pci_write_config32(ctrl[j].f1, 0x44 + (i << 3), limit + (carry_over << 2));
2138 base = pci_read_config32(dev, 0x40 + (i << 3));
2139 basek = (base & 0xffff0000) >> 2;
2140 if (basek == hole_startk) {
2141 //don't need set memhole here, because hole off set will be 0, overflow
2142 //so need to change base reg instead, new basek will be 4*1024*1024
2144 base |= (4*1024*1024)<<2;
2145 for (j = 0; j < controllers; j++) {
2146 pci_write_config32(ctrl[j].f1, 0x40 + (i<<3), base);
2150 hoist = /* hole start address */
2151 ((hole_startk << 10) & 0xff000000) +
2152 /* hole address to memory controller address */
2153 (((basek + carry_over) >> 6) & 0x0000ff00) +
2156 pci_write_config32(dev, 0xf0, hoist);
2162 static void set_hw_mem_hole(int controllers, const struct mem_controller *ctrl)
2165 uint32_t hole_startk;
2168 hole_startk = 4*1024*1024 - HW_MEM_HOLE_SIZEK;
2170 #if HW_MEM_HOLE_SIZE_AUTO_INC == 1
2171 /* We need to double check if hole_startk is valid.
2172 * If it is equal to the dram base address in K (base_k),
2173 * we need to decrease it.
2176 for (i=0; i<controllers; i++) {
2179 base = pci_read_config32(ctrl[0].f1, 0x40 + (i << 3));
2180 if ((base & ((1<<1)|(1<<0))) != ((1<<1)|(1<<0))) {
2183 base_k = (base & 0xffff0000) >> 2;
2184 if (base_k == hole_startk) {
2185 /* decrease memory hole startk to make sure it is
2186 * in the middle of the previous node
2188 hole_startk -= (base_k - basek_pri)>>1;
2189 break; /* only one hole */
2195 /* Find node number that needs the memory hole configured */
2196 for (i=0; i<controllers; i++) {
2197 uint32_t base, limit;
2198 unsigned base_k, limit_k;
2199 base = pci_read_config32(ctrl[0].f1, 0x40 + (i << 3));
2200 if ((base & ((1<<1)|(1<<0))) != ((1<<1)|(1<<0))) {
2203 limit = pci_read_config32(ctrl[0].f1, 0x44 + (i << 3));
2204 base_k = (base & 0xffff0000) >> 2;
2205 limit_k = ((limit + 0x00010000) & 0xffff0000) >> 2;
2206 if ((base_k <= hole_startk) && (limit_k > hole_startk)) {
2208 hoist_memory(controllers, ctrl, hole_startk, i);
2209 end_k = memory_end_k(ctrl, controllers);
2210 set_top_mem(end_k, hole_startk);
2211 break; /* only one hole */
2219 #define TIMEOUT_LOOPS 300000
2220 #if RAMINIT_SYSINFO == 1
2221 static void sdram_enable(int controllers, const struct mem_controller *ctrl, struct sys_info *sysinfo)
2223 static void sdram_enable(int controllers, const struct mem_controller *ctrl)
2228 /* Error if I don't have memory */
2229 if (memory_end_k(ctrl, controllers) == 0) {
2233 /* Before enabling memory start the memory clocks */
2234 for (i = 0; i < controllers; i++) {
2236 if (!controller_present(ctrl + i))
2238 dch = pci_read_config32(ctrl[i].f2, DRAM_CONFIG_HIGH);
2239 if (dch & (DCH_MEMCLK_EN0|DCH_MEMCLK_EN1|DCH_MEMCLK_EN2|DCH_MEMCLK_EN3)) {
2240 dch |= DCH_MEMCLK_VALID;
2241 pci_write_config32(ctrl[i].f2, DRAM_CONFIG_HIGH, dch);
2244 /* Disable dram receivers */
2246 dcl = pci_read_config32(ctrl[i].f2, DRAM_CONFIG_LOW);
2247 dcl |= DCL_DisInRcvrs;
2248 pci_write_config32(ctrl[i].f2, DRAM_CONFIG_LOW, dcl);
2252 /* We need to wait a minimum of 20 MEMCLKS to enable the InitDram */
2253 /* And if necessary toggle the the reset on the dimms by hand */
2254 memreset(controllers, ctrl);
2256 for (i = 0; i < controllers; i++) {
2258 if (!controller_present(ctrl + i))
2260 /* Skip everything if I don't have any memory on this controller */
2261 dch = pci_read_config32(ctrl[i].f2, DRAM_CONFIG_HIGH);
2262 if (!(dch & DCH_MEMCLK_VALID)) {
2266 /* Toggle DisDqsHys to get it working */
2267 dcl = pci_read_config32(ctrl[i].f2, DRAM_CONFIG_LOW);
2268 if (dcl & DCL_DimmEccEn) {
2270 printk_spew("ECC enabled\n");
2271 mnc = pci_read_config32(ctrl[i].f3, MCA_NB_CONFIG);
2273 if (dcl & DCL_128BitEn) {
2274 mnc |= MNC_CHIPKILL_EN;
2276 pci_write_config32(ctrl[i].f3, MCA_NB_CONFIG, mnc);
2278 dcl |= DCL_DisDqsHys;
2279 pci_write_config32(ctrl[i].f2, DRAM_CONFIG_LOW, dcl);
2280 dcl &= ~DCL_DisDqsHys;
2281 dcl &= ~DCL_DLL_Disable;
2284 dcl |= DCL_DramInit;
2285 pci_write_config32(ctrl[i].f2, DRAM_CONFIG_LOW, dcl);
2288 for (i = 0; i < controllers; i++) {
2290 if (!controller_present(ctrl + i))
2292 /* Skip everything if I don't have any memory on this controller */
2293 dch = pci_read_config32(ctrl[i].f2, DRAM_CONFIG_HIGH);
2294 if (!(dch & DCH_MEMCLK_VALID)) {
2298 printk_debug("Initializing memory: ");
2301 dcl = pci_read_config32(ctrl[i].f2, DRAM_CONFIG_LOW);
2303 if ((loops & 1023) == 0) {
2306 } while(((dcl & DCL_DramInit) != 0) && (loops < TIMEOUT_LOOPS));
2307 if (loops >= TIMEOUT_LOOPS) {
2308 printk_debug(" failed\n");
2312 if (!is_cpu_pre_c0()) {
2313 /* Wait until it is safe to touch memory */
2314 dcl &= ~(DCL_MemClrStatus | DCL_DramEnable);
2315 pci_write_config32(ctrl[i].f2, DRAM_CONFIG_LOW, dcl);
2317 dcl = pci_read_config32(ctrl[i].f2, DRAM_CONFIG_LOW);
2318 } while(((dcl & DCL_MemClrStatus) == 0) || ((dcl & DCL_DramEnable) == 0) );
2321 printk_debug(" done\n");
2324 #if HW_MEM_HOLE_SIZEK != 0
2325 // init hw mem hole here
2326 /* DramHoleValid bit only can be set after MemClrStatus is set by Hardware */
2327 if (!is_cpu_pre_e0())
2328 set_hw_mem_hole(controllers, ctrl);
2331 //FIXME add enable node interleaving here -- yhlu
2333 1. check how many nodes we have , if not all has ram installed get out
2334 2. check cs_base lo is 0, node 0 f2 0x40,,,,, if any one is not using lo is CS_BASE, get out
2335 3. check if other node is the same as node 0 about f2 0x40,,,,, otherwise get out
2336 4. if all ready enable node_interleaving in f1 0x40..... of every node
2337 5. for node interleaving we need to set mem hole to every node ( need recalcute hole offset in f0 for every node)
2342 static void set_sysinfo_in_ram(unsigned val)
2346 static void fill_mem_ctrl(int controllers, struct mem_controller *ctrl_a,
2347 const uint16_t *spd_addr)
2351 struct mem_controller *ctrl;
2352 for (i=0;i<controllers; i++) {
2355 ctrl->f0 = PCI_DEV(0, 0x18+i, 0);
2356 ctrl->f1 = PCI_DEV(0, 0x18+i, 1);
2357 ctrl->f2 = PCI_DEV(0, 0x18+i, 2);
2358 ctrl->f3 = PCI_DEV(0, 0x18+i, 3);
2360 if (spd_addr == (void *)0) continue;
2362 for (j=0;j<DIMM_SOCKETS;j++) {
2363 ctrl->channel0[j] = spd_addr[(i*2+0)*DIMM_SOCKETS + j];
2364 ctrl->channel1[j] = spd_addr[(i*2+1)*DIMM_SOCKETS + j];