1 /* This should be done by Eric
2 2004.11 yhlu add 4 rank DIMM support
3 2004.12 yhlu add D0 support
4 2005.02 yhlu add E0 memory hole support
7 #include <cpu/x86/mem.h>
8 #include <cpu/x86/cache.h>
9 #include <cpu/x86/mtrr.h>
13 #if (CONFIG_LB_MEM_TOPK & (CONFIG_LB_MEM_TOPK -1)) != 0
14 # error "CONFIG_LB_MEM_TOPK must be a power of 2"
17 #ifndef K8_4RANK_DIMM_SUPPORT
18 #define K8_4RANK_DIMM_SUPPORT 0
22 static void setup_resource_map(const unsigned int *register_values, int max)
25 // print_debug("setting up resource map....");
29 for(i = 0; i < max; i += 3) {
35 prink_debug("%08x <- %08x\r\n", register_values[i], register_values[i+2]);
37 print_debug_hex32(register_values[i]);
39 print_debug_hex32(register_values[i+2]);
43 dev = register_values[i] & ~0xff;
44 where = register_values[i] & 0xff;
45 reg = pci_read_config32(dev, where);
46 reg &= register_values[i+1];
47 reg |= register_values[i+2];
48 pci_write_config32(dev, where, reg);
50 reg = pci_read_config32(register_values[i]);
51 reg &= register_values[i+1];
52 reg |= register_values[i+2] & ~register_values[i+1];
53 pci_write_config32(register_values[i], reg);
56 // print_debug("done.\r\n");
60 static int controller_present(const struct mem_controller *ctrl)
62 return pci_read_config32(ctrl->f0, 0) == 0x11001022;
65 static void sdram_set_registers(const struct mem_controller *ctrl)
67 static const unsigned int register_values[] = {
69 /* Careful set limit registers before base registers which contain the enables */
70 /* DRAM Limit i Registers
79 * [ 2: 0] Destination Node ID
89 * [10: 8] Interleave select
90 * specifies the values of A[14:12] to use with interleave enable.
92 * [31:16] DRAM Limit Address i Bits 39-24
93 * This field defines the upper address bits of a 40 bit address
94 * that define the end of the DRAM region.
96 PCI_ADDR(0, 0x18, 1, 0x44), 0x0000f8f8, 0x00000000,
97 PCI_ADDR(0, 0x18, 1, 0x4C), 0x0000f8f8, 0x00000001,
98 PCI_ADDR(0, 0x18, 1, 0x54), 0x0000f8f8, 0x00000002,
99 PCI_ADDR(0, 0x18, 1, 0x5C), 0x0000f8f8, 0x00000003,
100 PCI_ADDR(0, 0x18, 1, 0x64), 0x0000f8f8, 0x00000004,
101 PCI_ADDR(0, 0x18, 1, 0x6C), 0x0000f8f8, 0x00000005,
102 PCI_ADDR(0, 0x18, 1, 0x74), 0x0000f8f8, 0x00000006,
103 PCI_ADDR(0, 0x18, 1, 0x7C), 0x0000f8f8, 0x00000007,
104 /* DRAM Base i Registers
113 * [ 0: 0] Read Enable
116 * [ 1: 1] Write Enable
117 * 0 = Writes Disabled
120 * [10: 8] Interleave Enable
121 * 000 = No interleave
122 * 001 = Interleave on A[12] (2 nodes)
124 * 011 = Interleave on A[12] and A[14] (4 nodes)
128 * 111 = Interleve on A[12] and A[13] and A[14] (8 nodes)
130 * [13:16] DRAM Base Address i Bits 39-24
131 * This field defines the upper address bits of a 40-bit address
132 * that define the start of the DRAM region.
134 PCI_ADDR(0, 0x18, 1, 0x40), 0x0000f8fc, 0x00000000,
135 PCI_ADDR(0, 0x18, 1, 0x48), 0x0000f8fc, 0x00000000,
136 PCI_ADDR(0, 0x18, 1, 0x50), 0x0000f8fc, 0x00000000,
137 PCI_ADDR(0, 0x18, 1, 0x58), 0x0000f8fc, 0x00000000,
138 PCI_ADDR(0, 0x18, 1, 0x60), 0x0000f8fc, 0x00000000,
139 PCI_ADDR(0, 0x18, 1, 0x68), 0x0000f8fc, 0x00000000,
140 PCI_ADDR(0, 0x18, 1, 0x70), 0x0000f8fc, 0x00000000,
141 PCI_ADDR(0, 0x18, 1, 0x78), 0x0000f8fc, 0x00000000,
143 /* DRAM CS Base Address i Registers
152 * [ 0: 0] Chip-Select Bank Enable
156 * [15: 9] Base Address (19-13)
157 * An optimization used when all DIMM are the same size...
159 * [31:21] Base Address (35-25)
160 * This field defines the top 11 addresses bit of a 40-bit
161 * address that define the memory address space. These
162 * bits decode 32-MByte blocks of memory.
164 PCI_ADDR(0, 0x18, 2, 0x40), 0x001f01fe, 0x00000000,
165 PCI_ADDR(0, 0x18, 2, 0x44), 0x001f01fe, 0x00000000,
166 PCI_ADDR(0, 0x18, 2, 0x48), 0x001f01fe, 0x00000000,
167 PCI_ADDR(0, 0x18, 2, 0x4C), 0x001f01fe, 0x00000000,
168 PCI_ADDR(0, 0x18, 2, 0x50), 0x001f01fe, 0x00000000,
169 PCI_ADDR(0, 0x18, 2, 0x54), 0x001f01fe, 0x00000000,
170 PCI_ADDR(0, 0x18, 2, 0x58), 0x001f01fe, 0x00000000,
171 PCI_ADDR(0, 0x18, 2, 0x5C), 0x001f01fe, 0x00000000,
172 /* DRAM CS Mask Address i Registers
181 * Select bits to exclude from comparison with the DRAM Base address register.
183 * [15: 9] Address Mask (19-13)
184 * Address to be excluded from the optimized case
186 * [29:21] Address Mask (33-25)
187 * The bits with an address mask of 1 are excluded from address comparison
191 PCI_ADDR(0, 0x18, 2, 0x60), 0xC01f01ff, 0x00000000,
192 PCI_ADDR(0, 0x18, 2, 0x64), 0xC01f01ff, 0x00000000,
193 PCI_ADDR(0, 0x18, 2, 0x68), 0xC01f01ff, 0x00000000,
194 PCI_ADDR(0, 0x18, 2, 0x6C), 0xC01f01ff, 0x00000000,
195 PCI_ADDR(0, 0x18, 2, 0x70), 0xC01f01ff, 0x00000000,
196 PCI_ADDR(0, 0x18, 2, 0x74), 0xC01f01ff, 0x00000000,
197 PCI_ADDR(0, 0x18, 2, 0x78), 0xC01f01ff, 0x00000000,
198 PCI_ADDR(0, 0x18, 2, 0x7C), 0xC01f01ff, 0x00000000,
199 /* DRAM Bank Address Mapping Register
201 * Specify the memory module size
206 * 000 = 32Mbyte (Rows = 12 & Col = 8)
207 * 001 = 64Mbyte (Rows = 12 & Col = 9)
208 * 010 = 128Mbyte (Rows = 13 & Col = 9)|(Rows = 12 & Col = 10)
209 * 011 = 256Mbyte (Rows = 13 & Col = 10)|(Rows = 12 & Col = 11)
210 * 100 = 512Mbyte (Rows = 13 & Col = 11)|(Rows = 14 & Col = 10)
211 * 101 = 1Gbyte (Rows = 14 & Col = 11)|(Rows = 13 & Col = 12)
212 * 110 = 2Gbyte (Rows = 14 & Col = 12)
219 PCI_ADDR(0, 0x18, 2, 0x80), 0xffff8888, 0x00000000,
220 /* DRAM Timing Low Register
222 * [ 2: 0] Tcl (Cas# Latency, Cas# to read-data-valid)
232 * [ 7: 4] Trc (Row Cycle Time, Ras#-active to Ras#-active/bank auto refresh)
233 * 0000 = 7 bus clocks
234 * 0001 = 8 bus clocks
236 * 1110 = 21 bus clocks
237 * 1111 = 22 bus clocks
238 * [11: 8] Trfc (Row refresh Cycle time, Auto-refresh-active to RAS#-active or RAS#auto-refresh)
239 * 0000 = 9 bus clocks
240 * 0010 = 10 bus clocks
242 * 1110 = 23 bus clocks
243 * 1111 = 24 bus clocks
244 * [14:12] Trcd (Ras#-active to Case#-read/write Delay)
254 * [18:16] Trrd (Ras# to Ras# Delay)
264 * [23:20] Tras (Minmum Ras# Active Time)
265 * 0000 to 0100 = reserved
266 * 0101 = 5 bus clocks
268 * 1111 = 15 bus clocks
269 * [26:24] Trp (Row Precharge Time)
279 * [28:28] Twr (Write Recovery Time)
284 PCI_ADDR(0, 0x18, 2, 0x88), 0xe8088008, 0x02522001 /* 0x03623125 */ ,
285 /* DRAM Timing High Register
287 * [ 0: 0] Twtr (Write to Read Delay)
291 * [ 6: 4] Trwt (Read to Write Delay)
301 * [12: 8] Tref (Refresh Rate)
302 * 00000 = 100Mhz 4K rows
303 * 00001 = 133Mhz 4K rows
304 * 00010 = 166Mhz 4K rows
305 * 00011 = 200Mhz 4K rows
306 * 01000 = 100Mhz 8K/16K rows
307 * 01001 = 133Mhz 8K/16K rows
308 * 01010 = 166Mhz 8K/16K rows
309 * 01011 = 200Mhz 8K/16K rows
311 * [22:20] Twcl (Write CAS Latency)
312 * 000 = 1 Mem clock after CAS# (Unbuffered Dimms)
313 * 001 = 2 Mem clocks after CAS# (Registered Dimms)
316 PCI_ADDR(0, 0x18, 2, 0x8c), 0xff8fe08e, (0 << 20)|(0 << 8)|(0 << 4)|(0 << 0),
317 /* DRAM Config Low Register
319 * [ 0: 0] DLL Disable
328 * [ 3: 3] Disable DQS Hystersis (FIXME handle this one carefully)
329 * 0 = Enable DQS input filter
330 * 1 = Disable DQS input filtering
333 * 0 = Initialization done or not yet started.
334 * 1 = Initiate DRAM intialization sequence
335 * [ 9: 9] SO-Dimm Enable
337 * 1 = SO-Dimms present
339 * 0 = DRAM not enabled
340 * 1 = DRAM initialized and enabled
341 * [11:11] Memory Clear Status
342 * 0 = Memory Clear function has not completed
343 * 1 = Memory Clear function has completed
344 * [12:12] Exit Self-Refresh
345 * 0 = Exit from self-refresh done or not yet started
346 * 1 = DRAM exiting from self refresh
347 * [13:13] Self-Refresh Status
348 * 0 = Normal Operation
349 * 1 = Self-refresh mode active
350 * [15:14] Read/Write Queue Bypass Count
355 * [16:16] 128-bit/64-Bit
356 * 0 = 64bit Interface to DRAM
357 * 1 = 128bit Interface to DRAM
358 * [17:17] DIMM ECC Enable
359 * 0 = Some DIMMs do not have ECC
360 * 1 = ALL DIMMS have ECC bits
361 * [18:18] UnBuffered DIMMs
363 * 1 = Unbuffered DIMMS
364 * [19:19] Enable 32-Byte Granularity
365 * 0 = Optimize for 64byte bursts
366 * 1 = Optimize for 32byte bursts
367 * [20:20] DIMM 0 is x4
368 * [21:21] DIMM 1 is x4
369 * [22:22] DIMM 2 is x4
370 * [23:23] DIMM 3 is x4
372 * 1 = x4 DIMM present
373 * [24:24] Disable DRAM Receivers
374 * 0 = Receivers enabled
375 * 1 = Receivers disabled
377 * 000 = Arbiters chois is always respected
378 * 001 = Oldest entry in DCQ can be bypassed 1 time
379 * 010 = Oldest entry in DCQ can be bypassed 2 times
380 * 011 = Oldest entry in DCQ can be bypassed 3 times
381 * 100 = Oldest entry in DCQ can be bypassed 4 times
382 * 101 = Oldest entry in DCQ can be bypassed 5 times
383 * 110 = Oldest entry in DCQ can be bypassed 6 times
384 * 111 = Oldest entry in DCQ can be bypassed 7 times
387 PCI_ADDR(0, 0x18, 2, 0x90), 0xf0000000,
389 (0 << 23)|(0 << 22)|(0 << 21)|(0 << 20)|
390 (1 << 19)|(0 << 18)|(1 << 17)|(0 << 16)|
391 (2 << 14)|(0 << 13)|(0 << 12)|
392 (0 << 11)|(0 << 10)|(0 << 9)|(0 << 8)|
393 (0 << 3) |(0 << 1) |(0 << 0),
394 /* DRAM Config High Register
396 * [ 0: 3] Maximum Asynchronous Latency
401 * [11: 8] Read Preamble
419 * [18:16] Idle Cycle Limit
428 * [19:19] Dynamic Idle Cycle Center Enable
429 * 0 = Use Idle Cycle Limit
430 * 1 = Generate a dynamic Idle cycle limit
431 * [22:20] DRAM MEMCLK Frequency
441 * [25:25] Memory Clock Ratio Valid (FIXME carefully enable memclk)
442 * 0 = Disable MemClks
444 * [26:26] Memory Clock 0 Enable
447 * [27:27] Memory Clock 1 Enable
450 * [28:28] Memory Clock 2 Enable
453 * [29:29] Memory Clock 3 Enable
458 PCI_ADDR(0, 0x18, 2, 0x94), 0xc180f0f0,
459 (0 << 29)|(0 << 28)|(0 << 27)|(0 << 26)|(0 << 25)|
460 (0 << 20)|(0 << 19)|(DCH_IDLE_LIMIT_16 << 16)|(0 << 8)|(0 << 0),
461 /* DRAM Delay Line Register
463 * Adjust the skew of the input DQS strobe relative to DATA
465 * [23:16] Delay Line Adjust
466 * Adjusts the DLL derived PDL delay by one or more delay stages
467 * in either the faster or slower direction.
468 * [24:24} Adjust Slower
470 * 1 = Adj is used to increase the PDL delay
471 * [25:25] Adjust Faster
473 * 1 = Adj is used to decrease the PDL delay
476 PCI_ADDR(0, 0x18, 2, 0x98), 0xfc00ffff, 0x00000000,
477 /* DRAM Scrub Control Register
479 * [ 4: 0] DRAM Scrube Rate
481 * [12: 8] L2 Scrub Rate
483 * [20:16] Dcache Scrub
486 * 00000 = Do not scrub
508 * All Others = Reserved
510 PCI_ADDR(0, 0x18, 3, 0x58), 0xffe0e0e0, 0x00000000,
511 /* DRAM Scrub Address Low Register
513 * [ 0: 0] DRAM Scrubber Redirect Enable
515 * 1 = Scrubber Corrects errors found in normal operation
517 * [31: 6] DRAM Scrub Address 31-6
519 PCI_ADDR(0, 0x18, 3, 0x5C), 0x0000003e, 0x00000000,
520 /* DRAM Scrub Address High Register
522 * [ 7: 0] DRAM Scrubb Address 39-32
525 PCI_ADDR(0, 0x18, 3, 0x60), 0xffffff00, 0x00000000,
531 if (!controller_present(ctrl)) {
532 // print_debug("No memory controller present\r\n");
536 print_spew("setting up CPU");
537 print_spew_hex8(ctrl->node_id);
538 print_spew(" northbridge registers\r\n");
539 max = sizeof(register_values)/sizeof(register_values[0]);
540 for(i = 0; i < max; i += 3) {
546 prink_debug("%08x <- %08x\r\n", register_values[i], register_values[i+2]);
548 print_spew_hex32(register_values[i]);
550 print_spew_hex32(register_values[i+2]);
554 dev = (register_values[i] & ~0xff) - PCI_DEV(0, 0x18, 0) + ctrl->f0;
555 where = register_values[i] & 0xff;
556 reg = pci_read_config32(dev, where);
557 reg &= register_values[i+1];
558 reg |= register_values[i+2];
559 pci_write_config32(dev, where, reg);
562 reg = pci_read_config32(register_values[i]);
563 reg &= register_values[i+1];
564 reg |= register_values[i+2];
565 pci_write_config32(register_values[i], reg);
568 print_spew("done.\r\n");
572 static void hw_enable_ecc(const struct mem_controller *ctrl)
575 nbcap = pci_read_config32(ctrl->f3, NORTHBRIDGE_CAP);
576 dcl = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
577 dcl &= ~DCL_DimmEccEn;
578 if (nbcap & NBCAP_ECC) {
579 dcl |= DCL_DimmEccEn;
581 if (read_option(CMOS_VSTART_ECC_memory, CMOS_VLEN_ECC_memory, 1) == 0) {
582 dcl &= ~DCL_DimmEccEn;
584 pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dcl);
588 static int is_dual_channel(const struct mem_controller *ctrl)
591 dcl = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
592 return dcl & DCL_128BitEn;
595 static int is_opteron(const struct mem_controller *ctrl)
597 /* Test to see if I am an Opteron.
598 * FIXME Testing dual channel capability is correct for now
599 * but a beter test is probably required.
601 #warning "FIXME implement a better test for opterons"
603 nbcap = pci_read_config32(ctrl->f3, NORTHBRIDGE_CAP);
604 return !!(nbcap & NBCAP_128Bit);
607 static int is_registered(const struct mem_controller *ctrl)
609 /* Test to see if we are dealing with registered SDRAM.
610 * If we are not registered we are unbuffered.
611 * This function must be called after spd_handle_unbuffered_dimms.
614 dcl = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
615 return !(dcl & DCL_UnBufDimm);
623 #if K8_4RANK_DIMM_SUPPORT == 1
628 static struct dimm_size spd_get_dimm_size(unsigned device)
630 /* Calculate the log base 2 size of a DIMM in bits */
637 #if K8_4RANK_DIMM_SUPPORT == 1
641 /* Note it might be easier to use byte 31 here, it has the DIMM size as
642 * a multiple of 4MB. The way we do it now we can size both
643 * sides of an assymetric dimm.
645 value = spd_read_byte(device, 3); /* rows */
646 if (value < 0) goto hw_err;
647 if ((value & 0xf) == 0) goto val_err;
648 sz.side1 += value & 0xf;
649 sz.rows = value & 0xf;
651 value = spd_read_byte(device, 4); /* columns */
652 if (value < 0) goto hw_err;
653 if ((value & 0xf) == 0) goto val_err;
654 sz.side1 += value & 0xf;
655 sz.col = value & 0xf;
657 value = spd_read_byte(device, 17); /* banks */
658 if (value < 0) goto hw_err;
659 if ((value & 0xff) == 0) goto val_err;
660 sz.side1 += log2(value & 0xff);
662 /* Get the module data width and convert it to a power of two */
663 value = spd_read_byte(device, 7); /* (high byte) */
664 if (value < 0) goto hw_err;
668 low = spd_read_byte(device, 6); /* (low byte) */
669 if (low < 0) goto hw_err;
670 value = value | (low & 0xff);
671 if ((value != 72) && (value != 64)) goto val_err;
672 sz.side1 += log2(value);
675 value = spd_read_byte(device, 5); /* number of physical banks */
676 if (value < 0) goto hw_err;
677 if (value == 1) goto out;
678 if ((value != 2) && (value != 4 )) {
681 #if K8_4RANK_DIMM_SUPPORT == 1
685 /* Start with the symmetrical case */
688 value = spd_read_byte(device, 3); /* rows */
689 if (value < 0) goto hw_err;
690 if ((value & 0xf0) == 0) goto out; /* If symmetrical we are done */
691 sz.side2 -= (value & 0x0f); /* Subtract out rows on side 1 */
692 sz.side2 += ((value >> 4) & 0x0f); /* Add in rows on side 2 */
694 value = spd_read_byte(device, 4); /* columns */
695 if (value < 0) goto hw_err;
696 if ((value & 0xff) == 0) goto val_err;
697 sz.side2 -= (value & 0x0f); /* Subtract out columns on side 1 */
698 sz.side2 += ((value >> 4) & 0x0f); /* Add in columsn on side 2 */
703 die("Bad SPD value\r\n");
704 /* If an hw_error occurs report that I have no memory */
710 #if K8_4RANK_DIMM_SUPPORT == 1
717 static const unsigned cs_map_aa[15] = {
718 /* (row=12, col=8)(14, 12) ---> (0, 0) (2, 4) */
724 static void set_dimm_size(const struct mem_controller *ctrl, struct dimm_size sz, unsigned index)
726 uint32_t base0, base1, map;
729 if (sz.side1 != sz.side2) {
732 map = pci_read_config32(ctrl->f2, DRAM_BANK_ADDR_MAP);
733 map &= ~(0xf << (index * 4));
734 #if K8_4RANK_DIMM_SUPPORT == 1
736 map &= ~(0xf << ( (index + 2) * 4));
740 /* For each base register.
741 * Place the dimm size in 32 MB quantities in the bits 31 - 21.
742 * The initialize dimm size is in bits.
743 * Set the base enable bit0.
748 /* Make certain side1 of the dimm is at least 32MB */
749 if (sz.side1 >= (25 +3)) {
750 if(is_cpu_pre_d0()) {
751 map |= (sz.side1 - (25 + 3)) << (index *4);
752 #if K8_4RANK_DIMM_SUPPORT == 1
754 map |= (sz.side1 - (25 + 3)) << ( (index + 2) * 4);
759 map |= cs_map_aa[(sz.rows - 12) * 5 + (sz.col - 8) ] << (index*4);
760 #if K8_4RANK_DIMM_SUPPORT == 1
762 map |= cs_map_aa[(sz.rows - 12) * 5 + (sz.col - 8) ] << ( (index + 2) * 4);
766 base0 = (1 << ((sz.side1 - (25 + 3)) + 21)) | 1;
769 /* Make certain side2 of the dimm is at least 32MB */
770 if (sz.side2 >= (25 + 3)) {
771 base1 = (1 << ((sz.side2 - (25 + 3)) + 21)) | 1;
774 /* Double the size if we are using dual channel memory */
775 if (is_dual_channel(ctrl)) {
776 base0 = (base0 << 1) | (base0 & 1);
777 base1 = (base1 << 1) | (base1 & 1);
780 /* Clear the reserved bits */
781 base0 &= ~0x001ffffe;
782 base1 &= ~0x001ffffe;
784 /* Set the appropriate DIMM base address register */
785 pci_write_config32(ctrl->f2, DRAM_CSBASE + (((index << 1)+0)<<2), base0);
786 pci_write_config32(ctrl->f2, DRAM_CSBASE + (((index << 1)+1)<<2), base1);
787 #if K8_4RANK_DIMM_SUPPORT == 1
789 pci_write_config32(ctrl->f2, DRAM_CSBASE + (((index << 1)+4)<<2), base0);
790 pci_write_config32(ctrl->f2, DRAM_CSBASE + (((index << 1)+5)<<2), base1);
794 pci_write_config32(ctrl->f2, DRAM_BANK_ADDR_MAP, map);
796 /* Enable the memory clocks for this DIMM */
798 dch = pci_read_config32(ctrl->f2, DRAM_CONFIG_HIGH);
799 dch |= DCH_MEMCLK_EN0 << index;
800 #if K8_4RANK_DIMM_SUPPORT == 1
802 dch |= DCH_MEMCLK_EN0 << (index + 2);
805 pci_write_config32(ctrl->f2, DRAM_CONFIG_HIGH, dch);
809 static long spd_set_ram_size(const struct mem_controller *ctrl, long dimm_mask)
813 for(i = 0; i < DIMM_SOCKETS; i++) {
815 if (!(dimm_mask & (1 << i))) {
818 sz = spd_get_dimm_size(ctrl->channel0[i]);
820 return -1; /* Report SPD error */
822 set_dimm_size(ctrl, sz, i);
827 static void route_dram_accesses(const struct mem_controller *ctrl,
828 unsigned long base_k, unsigned long limit_k)
830 /* Route the addresses to the controller node */
835 unsigned limit_reg, base_reg;
838 node_id = ctrl->node_id;
839 index = (node_id << 3);
840 limit = (limit_k << 2);
843 limit |= ( 0 << 8) | (node_id << 0);
844 base = (base_k << 2);
846 base |= (0 << 8) | (1<<1) | (1<<0);
848 limit_reg = 0x44 + index;
849 base_reg = 0x40 + index;
850 for(device = PCI_DEV(0, 0x18, 1); device <= PCI_DEV(0, 0x1f, 1); device += PCI_DEV(0, 1, 0)) {
851 pci_write_config32(device, limit_reg, limit);
852 pci_write_config32(device, base_reg, base);
856 static void set_top_mem(unsigned tom_k)
858 /* Error if I don't have memory */
863 /* Report the amount of memory. */
864 print_spew("RAM: 0x");
865 print_spew_hex32(tom_k);
866 print_spew(" KB\r\n");
868 /* Now set top of memory */
870 msr.lo = (tom_k & 0x003fffff) << 10;
871 msr.hi = (tom_k & 0xffc00000) >> 22;
872 wrmsr(TOP_MEM2, msr);
874 /* Leave a 64M hole between TOP_MEM and TOP_MEM2
875 * so I can see my rom chip and other I/O devices.
877 if (tom_k >= 0x003f0000) {
880 msr.lo = (tom_k & 0x003fffff) << 10;
881 msr.hi = (tom_k & 0xffc00000) >> 22;
885 static unsigned long interleave_chip_selects(const struct mem_controller *ctrl)
888 static const uint32_t csbase_low[] = {
889 /* 32MB */ (1 << (13 - 4)),
890 /* 64MB */ (1 << (14 - 4)),
891 /* 128MB */ (1 << (14 - 4)),
892 /* 256MB */ (1 << (15 - 4)),
893 /* 512MB */ (1 << (15 - 4)),
894 /* 1GB */ (1 << (16 - 4)),
895 /* 2GB */ (1 << (16 - 4)),
898 static const uint32_t csbase_low_d0[] = {
899 /* 32MB */ (1 << (13 - 4)),
900 /* 64MB */ (1 << (14 - 4)),
901 /* 128MB */ (1 << (14 - 4)),
902 /* 128MB */ (1 << (15 - 4)),
903 /* 256MB */ (1 << (15 - 4)),
904 /* 512MB */ (1 << (15 - 4)),
905 /* 256MB */ (1 << (16 - 4)),
906 /* 512MB */ (1 << (16 - 4)),
907 /* 1GB */ (1 << (16 - 4)),
908 /* 1GB */ (1 << (17 - 4)),
909 /* 2GB */ (1 << (17 - 4)),
912 /* cs_base_high is not changed */
915 int chip_selects, index;
917 unsigned common_size;
918 unsigned common_cs_mode;
919 uint32_t csbase, csmask;
921 /* See if all of the memory chip selects are the same size
922 * and if so count them.
927 for(index = 0; index < 8; index++) {
932 value = pci_read_config32(ctrl->f2, DRAM_CSBASE + (index << 2));
940 if (common_size == 0) {
943 /* The size differed fail */
944 if (common_size != size) {
948 value = pci_read_config32(ctrl->f2, DRAM_BANK_ADDR_MAP);
949 cs_mode =( value >> ((index>>1)*4)) & 0xf;
950 if(cs_mode == 0 ) continue;
951 if(common_cs_mode == 0) {
952 common_cs_mode = cs_mode;
954 /* The size differed fail */
955 if(common_cs_mode != cs_mode) {
960 /* Chip selects can only be interleaved when there is
961 * more than one and their is a power of two of them.
963 bits = log2(chip_selects);
964 if (((1 << bits) != chip_selects) || (bits < 1) || (bits > 3)) {
968 /* Find the bits of csbase that we need to interleave on */
970 csbase_inc = csbase_low[common_cs_mode];
971 if(is_dual_channel(ctrl)) {
972 /* Also we run out of address mask bits if we try and interleave 8 4GB dimms */
973 if ((bits == 3) && (common_size == (1 << (32 - 3)))) {
974 // print_debug("8 4GB chip selects cannot be interleaved\r\n");
981 csbase_inc = csbase_low_d0[common_cs_mode];
982 if(is_dual_channel(ctrl)) {
983 if( (bits==3) && (common_cs_mode > 8)) {
984 // print_debug("8 cs_mode>8 chip selects cannot be interleaved\r\n");
991 /* Compute the initial values for csbase and csbask.
992 * In csbase just set the enable bit and the base to zero.
993 * In csmask set the mask bits for the size and page level interleave.
996 csmask = (((common_size << bits) - 1) << 21);
997 csmask |= 0xfe00 & ~((csbase_inc << bits) - csbase_inc);
998 for(index = 0; index < 8; index++) {
1001 value = pci_read_config32(ctrl->f2, DRAM_CSBASE + (index << 2));
1002 /* Is it enabled? */
1006 pci_write_config32(ctrl->f2, DRAM_CSBASE + (index << 2), csbase);
1007 pci_write_config32(ctrl->f2, DRAM_CSMASK + (index << 2), csmask);
1008 csbase += csbase_inc;
1011 print_spew("Interleaved\r\n");
1013 /* Return the memory size in K */
1014 return common_size << (15 + bits);
1017 static unsigned long order_chip_selects(const struct mem_controller *ctrl)
1021 /* Remember which registers we have used in the high 8 bits of tom */
1024 /* Find the largest remaining canidate */
1025 unsigned index, canidate;
1026 uint32_t csbase, csmask;
1030 for(index = 0; index < 8; index++) {
1032 value = pci_read_config32(ctrl->f2, DRAM_CSBASE + (index << 2));
1034 /* Is it enabled? */
1039 /* Is it greater? */
1040 if (value <= csbase) {
1044 /* Has it already been selected */
1045 if (tom & (1 << (index + 24))) {
1048 /* I have a new canidate */
1052 /* See if I have found a new canidate */
1057 /* Remember the dimm size */
1058 size = csbase >> 21;
1060 /* Remember I have used this register */
1061 tom |= (1 << (canidate + 24));
1063 /* Recompute the cs base register value */
1064 csbase = (tom << 21) | 1;
1066 /* Increment the top of memory */
1069 /* Compute the memory mask */
1070 csmask = ((size -1) << 21);
1071 csmask |= 0xfe00; /* For now don't optimize */
1073 /* Write the new base register */
1074 pci_write_config32(ctrl->f2, DRAM_CSBASE + (canidate << 2), csbase);
1075 /* Write the new mask register */
1076 pci_write_config32(ctrl->f2, DRAM_CSMASK + (canidate << 2), csmask);
1079 /* Return the memory size in K */
1080 return (tom & ~0xff000000) << 15;
1083 unsigned long memory_end_k(const struct mem_controller *ctrl, int max_node_id)
1087 /* Find the last memory address used */
1089 for(node_id = 0; node_id < max_node_id; node_id++) {
1090 uint32_t limit, base;
1092 index = node_id << 3;
1093 base = pci_read_config32(ctrl->f1, 0x40 + index);
1094 /* Only look at the limit if the base is enabled */
1095 if ((base & 3) == 3) {
1096 limit = pci_read_config32(ctrl->f1, 0x44 + index);
1097 end_k = ((limit + 0x00010000) & 0xffff0000) >> 2;
1103 #if K8_E0_MEM_HOLE_SIZEK != 0
1104 #define K8_E0_MEM_HOLE_LIMITK 4*1024*1024
1105 #define K8_E0_MEM_HOLE_BASEK (K8_E0_MEM_HOLE_LIMITK - K8_E0_MEM_HOLE_SIZEK )
1107 static void set_e0_mem_hole(const struct mem_controller *ctrl, unsigned base_k)
1109 /* Route the addresses to the controller node */
1112 val = pci_read_config32(ctrl->f1,0xf0);
1115 val = (K8_E0_MEM_HOLE_BASEK << 10) | ((K8_E0_MEM_HOLE_SIZEK+base_k)>>(16-10)) | 1;
1117 pci_write_config32(ctrl->f1, 0xf0, val);
1122 static void order_dimms(const struct mem_controller *ctrl)
1124 unsigned long tom_k, base_k;
1126 if (read_option(CMOS_VSTART_interleave_chip_selects, CMOS_VLEN_interleave_chip_selects, 1) != 0) {
1127 tom_k = interleave_chip_selects(ctrl);
1129 print_debug("Interleaving disabled\r\n");
1133 tom_k = order_chip_selects(ctrl);
1135 /* Compute the memory base address */
1136 base_k = memory_end_k(ctrl, ctrl->node_id);
1138 #if K8_E0_MEM_HOLE_SIZEK != 0
1139 if(!is_cpu_pre_e0()) {
1140 /* See if I need to check the range cover hole */
1141 if ((base_k <= K8_E0_MEM_HOLE_BASEK) && (tom_k > K8_E0_MEM_HOLE_BASEK)) {
1142 tom_k += K8_E0_MEM_HOLE_SIZEK;
1146 route_dram_accesses(ctrl, base_k, tom_k);
1150 static long disable_dimm(const struct mem_controller *ctrl, unsigned index, long dimm_mask)
1152 print_debug("disabling dimm");
1153 print_debug_hex8(index);
1154 print_debug("\r\n");
1155 pci_write_config32(ctrl->f2, DRAM_CSBASE + (((index << 1)+0)<<2), 0);
1156 pci_write_config32(ctrl->f2, DRAM_CSBASE + (((index << 1)+1)<<2), 0);
1157 dimm_mask &= ~(1 << index);
1161 static long spd_handle_unbuffered_dimms(const struct mem_controller *ctrl, long dimm_mask)
1169 for(i = 0; (i < DIMM_SOCKETS); i++) {
1171 if (!(dimm_mask & (1 << i))) {
1174 value = spd_read_byte(ctrl->channel0[i], 21);
1178 /* Registered dimm ? */
1179 if (value & (1 << 1)) {
1182 /* Otherwise it must be an unbuffered dimm */
1187 if (unbuffered && registered) {
1188 die("Mixed buffered and registered dimms not supported");
1191 //By yhlu for debug Athlon64 939 can do dual channel, but it use unbuffer DIMM
1192 if (unbuffered && is_opteron(ctrl)) {
1193 die("Unbuffered Dimms not supported on Opteron");
1197 dcl = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
1198 dcl &= ~DCL_UnBufDimm;
1200 dcl |= DCL_UnBufDimm;
1202 pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dcl);
1204 if (is_registered(ctrl)) {
1205 print_debug("Registered\r\n");
1207 print_debug("Unbuffered\r\n");
1213 static unsigned int spd_detect_dimms(const struct mem_controller *ctrl)
1218 for(i = 0; i < DIMM_SOCKETS; i++) {
1221 device = ctrl->channel0[i];
1223 byte = spd_read_byte(ctrl->channel0[i], 2); /* Type */
1225 dimm_mask |= (1 << i);
1228 device = ctrl->channel1[i];
1230 byte = spd_read_byte(ctrl->channel1[i], 2);
1232 dimm_mask |= (1 << (i + DIMM_SOCKETS));
1239 static long spd_enable_2channels(const struct mem_controller *ctrl, long dimm_mask)
1243 /* SPD addresses to verify are identical */
1244 static const unsigned addresses[] = {
1245 2, /* Type should be DDR SDRAM */
1246 3, /* *Row addresses */
1247 4, /* *Column addresses */
1248 5, /* *Physical Banks */
1249 6, /* *Module Data Width low */
1250 7, /* *Module Data Width high */
1251 9, /* *Cycle time at highest CAS Latency CL=X */
1252 11, /* *SDRAM Type */
1253 13, /* *SDRAM Width */
1254 17, /* *Logical Banks */
1255 18, /* *Supported CAS Latencies */
1256 21, /* *SDRAM Module Attributes */
1257 23, /* *Cycle time at CAS Latnecy (CLX - 0.5) */
1258 26, /* *Cycle time at CAS Latnecy (CLX - 1.0) */
1259 27, /* *tRP Row precharge time */
1260 28, /* *Minimum Row Active to Row Active Delay (tRRD) */
1261 29, /* *tRCD RAS to CAS */
1262 30, /* *tRAS Activate to Precharge */
1263 41, /* *Minimum Active to Active/Auto Refresh Time(Trc) */
1264 42, /* *Minimum Auto Refresh Command Time(Trfc) */
1266 /* If the dimms are not in pairs do not do dual channels */
1267 if ((dimm_mask & ((1 << DIMM_SOCKETS) - 1)) !=
1268 ((dimm_mask >> DIMM_SOCKETS) & ((1 << DIMM_SOCKETS) - 1))) {
1269 goto single_channel;
1271 /* If the cpu is not capable of doing dual channels don't do dual channels */
1272 nbcap = pci_read_config32(ctrl->f3, NORTHBRIDGE_CAP);
1273 if (!(nbcap & NBCAP_128Bit)) {
1274 goto single_channel;
1276 for(i = 0; (i < 4) && (ctrl->channel0[i]); i++) {
1277 unsigned device0, device1;
1280 /* If I don't have a dimm skip this one */
1281 if (!(dimm_mask & (1 << i))) {
1284 device0 = ctrl->channel0[i];
1285 device1 = ctrl->channel1[i];
1286 for(j = 0; j < sizeof(addresses)/sizeof(addresses[0]); j++) {
1288 addr = addresses[j];
1289 value0 = spd_read_byte(device0, addr);
1293 value1 = spd_read_byte(device1, addr);
1297 if (value0 != value1) {
1298 goto single_channel;
1302 print_spew("Enabling dual channel memory\r\n");
1304 dcl = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
1305 dcl &= ~DCL_32ByteEn;
1306 dcl |= DCL_128BitEn;
1307 pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dcl);
1310 dimm_mask &= ~((1 << (DIMM_SOCKETS *2)) - (1 << DIMM_SOCKETS));
1316 uint8_t divisor; /* In 1/2 ns increments */
1319 uint32_t dch_memclk;
1320 uint16_t dch_tref4k, dch_tref8k;
1325 static const struct mem_param *get_mem_param(unsigned min_cycle_time)
1327 static const struct mem_param speed[] = {
1329 .name = "100Mhz\r\n",
1331 .divisor = (10 <<1),
1334 .dch_memclk = DCH_MEMCLK_100MHZ << DCH_MEMCLK_SHIFT,
1335 .dch_tref4k = DTH_TREF_100MHZ_4K,
1336 .dch_tref8k = DTH_TREF_100MHZ_8K,
1340 .name = "133Mhz\r\n",
1342 .divisor = (7<<1)+1,
1345 .dch_memclk = DCH_MEMCLK_133MHZ << DCH_MEMCLK_SHIFT,
1346 .dch_tref4k = DTH_TREF_133MHZ_4K,
1347 .dch_tref8k = DTH_TREF_133MHZ_8K,
1351 .name = "166Mhz\r\n",
1356 .dch_memclk = DCH_MEMCLK_166MHZ << DCH_MEMCLK_SHIFT,
1357 .dch_tref4k = DTH_TREF_166MHZ_4K,
1358 .dch_tref8k = DTH_TREF_166MHZ_8K,
1362 .name = "200Mhz\r\n",
1367 .dch_memclk = DCH_MEMCLK_200MHZ << DCH_MEMCLK_SHIFT,
1368 .dch_tref4k = DTH_TREF_200MHZ_4K,
1369 .dch_tref8k = DTH_TREF_200MHZ_8K,
1376 const struct mem_param *param;
1377 for(param = &speed[0]; param->cycle_time ; param++) {
1378 if (min_cycle_time > (param+1)->cycle_time) {
1382 if (!param->cycle_time) {
1383 die("min_cycle_time to low");
1385 print_spew(param->name);
1386 #ifdef DRAM_MIN_CYCLE_TIME
1387 print_debug(param->name);
1392 struct spd_set_memclk_result {
1393 const struct mem_param *param;
1396 static struct spd_set_memclk_result spd_set_memclk(const struct mem_controller *ctrl, long dimm_mask)
1398 /* Compute the minimum cycle time for these dimms */
1399 struct spd_set_memclk_result result;
1400 unsigned min_cycle_time, min_latency, bios_cycle_time;
1404 static const int latency_indicies[] = { 26, 23, 9 };
1405 static const unsigned char min_cycle_times[] = {
1406 [NBCAP_MEMCLK_200MHZ] = 0x50, /* 5ns */
1407 [NBCAP_MEMCLK_166MHZ] = 0x60, /* 6ns */
1408 [NBCAP_MEMCLK_133MHZ] = 0x75, /* 7.5ns */
1409 [NBCAP_MEMCLK_100MHZ] = 0xa0, /* 10ns */
1413 value = pci_read_config32(ctrl->f3, NORTHBRIDGE_CAP);
1414 min_cycle_time = min_cycle_times[(value >> NBCAP_MEMCLK_SHIFT) & NBCAP_MEMCLK_MASK];
1415 bios_cycle_time = min_cycle_times[
1416 read_option(CMOS_VSTART_max_mem_clock, CMOS_VLEN_max_mem_clock, 0)];
1417 if (bios_cycle_time > min_cycle_time) {
1418 min_cycle_time = bios_cycle_time;
1422 /* Compute the least latency with the fastest clock supported
1423 * by both the memory controller and the dimms.
1425 for(i = 0; i < DIMM_SOCKETS; i++) {
1426 int new_cycle_time, new_latency;
1431 if (!(dimm_mask & (1 << i))) {
1435 /* First find the supported CAS latencies
1436 * Byte 18 for DDR SDRAM is interpreted:
1437 * bit 0 == CAS Latency = 1.0
1438 * bit 1 == CAS Latency = 1.5
1439 * bit 2 == CAS Latency = 2.0
1440 * bit 3 == CAS Latency = 2.5
1441 * bit 4 == CAS Latency = 3.0
1442 * bit 5 == CAS Latency = 3.5
1446 new_cycle_time = 0xa0;
1449 latencies = spd_read_byte(ctrl->channel0[i], 18);
1450 if (latencies <= 0) continue;
1452 /* Compute the lowest cas latency supported */
1453 latency = log2(latencies) -2;
1455 /* Loop through and find a fast clock with a low latency */
1456 for(index = 0; index < 3; index++, latency++) {
1458 if ((latency < 2) || (latency > 4) ||
1459 (!(latencies & (1 << latency)))) {
1462 value = spd_read_byte(ctrl->channel0[i], latency_indicies[index]);
1467 /* Only increase the latency if we decreas the clock */
1468 if ((value >= min_cycle_time) && (value < new_cycle_time)) {
1469 new_cycle_time = value;
1470 new_latency = latency;
1473 if (new_latency > 4){
1476 /* Does min_latency need to be increased? */
1477 if (new_cycle_time > min_cycle_time) {
1478 min_cycle_time = new_cycle_time;
1480 /* Does min_cycle_time need to be increased? */
1481 if (new_latency > min_latency) {
1482 min_latency = new_latency;
1485 /* Make a second pass through the dimms and disable
1486 * any that cannot support the selected memclk and cas latency.
1489 for(i = 0; (i < 4) && (ctrl->channel0[i]); i++) {
1494 if (!(dimm_mask & (1 << i))) {
1497 latencies = spd_read_byte(ctrl->channel0[i], 18);
1498 if (latencies < 0) goto hw_error;
1499 if (latencies == 0) {
1503 /* Compute the lowest cas latency supported */
1504 latency = log2(latencies) -2;
1506 /* Walk through searching for the selected latency */
1507 for(index = 0; index < 3; index++, latency++) {
1508 if (!(latencies & (1 << latency))) {
1511 if (latency == min_latency)
1514 /* If I can't find the latency or my index is bad error */
1515 if ((latency != min_latency) || (index >= 3)) {
1519 /* Read the min_cycle_time for this latency */
1520 value = spd_read_byte(ctrl->channel0[i], latency_indicies[index]);
1521 if (value < 0) goto hw_error;
1523 /* All is good if the selected clock speed
1524 * is what I need or slower.
1526 if (value <= min_cycle_time) {
1529 /* Otherwise I have an error, disable the dimm */
1531 dimm_mask = disable_dimm(ctrl, i, dimm_mask);
1534 //down speed for full load 4 rank support
1535 #if K8_4RANK_DIMM_SUPPORT
1536 if(dimm_mask == (3|(3<<DIMM_SOCKETS)) ) {
1538 for(i = 0; (i < 4) && (ctrl->channel0[i]); i++) {
1540 if (!(dimm_mask & (1 << i))) {
1543 val = spd_read_byte(ctrl->channel0[i], 5);
1550 if(min_cycle_time <= 0x50 ) {
1551 min_cycle_time = 0x60;
1558 /* Now that I know the minimum cycle time lookup the memory parameters */
1559 result.param = get_mem_param(min_cycle_time);
1561 /* Update DRAM Config High with our selected memory speed */
1562 value = pci_read_config32(ctrl->f2, DRAM_CONFIG_HIGH);
1563 value &= ~(DCH_MEMCLK_MASK << DCH_MEMCLK_SHIFT);
1565 /* Improves DQS centering by correcting for case when core speed multiplier and MEMCLK speed result in odd clock divisor, by selecting the next lowest memory speed, required only at DDR400 and higher speeds with certain DIMM loadings ---- cheating???*/
1566 if(!is_cpu_pre_e0()) {
1567 if(min_cycle_time==0x50) {
1573 value |= result.param->dch_memclk;
1574 pci_write_config32(ctrl->f2, DRAM_CONFIG_HIGH, value);
1576 static const unsigned latencies[] = { DTL_CL_2, DTL_CL_2_5, DTL_CL_3 };
1577 /* Update DRAM Timing Low with our selected cas latency */
1578 value = pci_read_config32(ctrl->f2, DRAM_TIMING_LOW);
1579 value &= ~(DTL_TCL_MASK << DTL_TCL_SHIFT);
1580 value |= latencies[min_latency - 2] << DTL_TCL_SHIFT;
1581 pci_write_config32(ctrl->f2, DRAM_TIMING_LOW, value);
1583 result.dimm_mask = dimm_mask;
1586 result.param = (const struct mem_param *)0;
1587 result.dimm_mask = -1;
1592 static int update_dimm_Trc(const struct mem_controller *ctrl, const struct mem_param *param, int i)
1594 unsigned clocks, old_clocks;
1597 value = spd_read_byte(ctrl->channel0[i], 41);
1598 if (value < 0) return -1;
1599 if ((value == 0) || (value == 0xff)) {
1602 clocks = ((value << 1) + param->divisor - 1)/param->divisor;
1603 if (clocks < DTL_TRC_MIN) {
1604 clocks = DTL_TRC_MIN;
1606 if (clocks > DTL_TRC_MAX) {
1610 dtl = pci_read_config32(ctrl->f2, DRAM_TIMING_LOW);
1611 old_clocks = ((dtl >> DTL_TRC_SHIFT) & DTL_TRC_MASK) + DTL_TRC_BASE;
1612 if (old_clocks > clocks) {
1613 clocks = old_clocks;
1615 dtl &= ~(DTL_TRC_MASK << DTL_TRC_SHIFT);
1616 dtl |= ((clocks - DTL_TRC_BASE) << DTL_TRC_SHIFT);
1617 pci_write_config32(ctrl->f2, DRAM_TIMING_LOW, dtl);
1621 static int update_dimm_Trfc(const struct mem_controller *ctrl, const struct mem_param *param, int i)
1623 unsigned clocks, old_clocks;
1626 value = spd_read_byte(ctrl->channel0[i], 42);
1627 if (value < 0) return -1;
1628 if ((value == 0) || (value == 0xff)) {
1629 value = param->tRFC;
1631 clocks = ((value << 1) + param->divisor - 1)/param->divisor;
1632 if (clocks < DTL_TRFC_MIN) {
1633 clocks = DTL_TRFC_MIN;
1635 if (clocks > DTL_TRFC_MAX) {
1638 dtl = pci_read_config32(ctrl->f2, DRAM_TIMING_LOW);
1639 old_clocks = ((dtl >> DTL_TRFC_SHIFT) & DTL_TRFC_MASK) + DTL_TRFC_BASE;
1640 if (old_clocks > clocks) {
1641 clocks = old_clocks;
1643 dtl &= ~(DTL_TRFC_MASK << DTL_TRFC_SHIFT);
1644 dtl |= ((clocks - DTL_TRFC_BASE) << DTL_TRFC_SHIFT);
1645 pci_write_config32(ctrl->f2, DRAM_TIMING_LOW, dtl);
1650 static int update_dimm_Trcd(const struct mem_controller *ctrl, const struct mem_param *param, int i)
1652 unsigned clocks, old_clocks;
1655 value = spd_read_byte(ctrl->channel0[i], 29);
1656 if (value < 0) return -1;
1657 clocks = (value + (param->divisor << 1) -1)/(param->divisor << 1);
1658 if (clocks < DTL_TRCD_MIN) {
1659 clocks = DTL_TRCD_MIN;
1661 if (clocks > DTL_TRCD_MAX) {
1664 dtl = pci_read_config32(ctrl->f2, DRAM_TIMING_LOW);
1665 old_clocks = ((dtl >> DTL_TRCD_SHIFT) & DTL_TRCD_MASK) + DTL_TRCD_BASE;
1666 if (old_clocks > clocks) {
1667 clocks = old_clocks;
1669 dtl &= ~(DTL_TRCD_MASK << DTL_TRCD_SHIFT);
1670 dtl |= ((clocks - DTL_TRCD_BASE) << DTL_TRCD_SHIFT);
1671 pci_write_config32(ctrl->f2, DRAM_TIMING_LOW, dtl);
1675 static int update_dimm_Trrd(const struct mem_controller *ctrl, const struct mem_param *param, int i)
1677 unsigned clocks, old_clocks;
1680 value = spd_read_byte(ctrl->channel0[i], 28);
1681 if (value < 0) return -1;
1682 clocks = (value + (param->divisor << 1) -1)/(param->divisor << 1);
1683 if (clocks < DTL_TRRD_MIN) {
1684 clocks = DTL_TRRD_MIN;
1686 if (clocks > DTL_TRRD_MAX) {
1689 dtl = pci_read_config32(ctrl->f2, DRAM_TIMING_LOW);
1690 old_clocks = ((dtl >> DTL_TRRD_SHIFT) & DTL_TRRD_MASK) + DTL_TRRD_BASE;
1691 if (old_clocks > clocks) {
1692 clocks = old_clocks;
1694 dtl &= ~(DTL_TRRD_MASK << DTL_TRRD_SHIFT);
1695 dtl |= ((clocks - DTL_TRRD_BASE) << DTL_TRRD_SHIFT);
1696 pci_write_config32(ctrl->f2, DRAM_TIMING_LOW, dtl);
1700 static int update_dimm_Tras(const struct mem_controller *ctrl, const struct mem_param *param, int i)
1702 unsigned clocks, old_clocks;
1705 value = spd_read_byte(ctrl->channel0[i], 30);
1706 if (value < 0) return -1;
1707 clocks = ((value << 1) + param->divisor - 1)/param->divisor;
1708 if (clocks < DTL_TRAS_MIN) {
1709 clocks = DTL_TRAS_MIN;
1711 if (clocks > DTL_TRAS_MAX) {
1714 dtl = pci_read_config32(ctrl->f2, DRAM_TIMING_LOW);
1715 old_clocks = ((dtl >> DTL_TRAS_SHIFT) & DTL_TRAS_MASK) + DTL_TRAS_BASE;
1716 if (old_clocks > clocks) {
1717 clocks = old_clocks;
1719 dtl &= ~(DTL_TRAS_MASK << DTL_TRAS_SHIFT);
1720 dtl |= ((clocks - DTL_TRAS_BASE) << DTL_TRAS_SHIFT);
1721 pci_write_config32(ctrl->f2, DRAM_TIMING_LOW, dtl);
1725 static int update_dimm_Trp(const struct mem_controller *ctrl, const struct mem_param *param, int i)
1727 unsigned clocks, old_clocks;
1730 value = spd_read_byte(ctrl->channel0[i], 27);
1731 if (value < 0) return -1;
1732 clocks = (value + (param->divisor << 1) - 1)/(param->divisor << 1);
1733 if (clocks < DTL_TRP_MIN) {
1734 clocks = DTL_TRP_MIN;
1736 if (clocks > DTL_TRP_MAX) {
1739 dtl = pci_read_config32(ctrl->f2, DRAM_TIMING_LOW);
1740 old_clocks = ((dtl >> DTL_TRP_SHIFT) & DTL_TRP_MASK) + DTL_TRP_BASE;
1741 if (old_clocks > clocks) {
1742 clocks = old_clocks;
1744 dtl &= ~(DTL_TRP_MASK << DTL_TRP_SHIFT);
1745 dtl |= ((clocks - DTL_TRP_BASE) << DTL_TRP_SHIFT);
1746 pci_write_config32(ctrl->f2, DRAM_TIMING_LOW, dtl);
1750 static void set_Twr(const struct mem_controller *ctrl, const struct mem_param *param)
1753 dtl = pci_read_config32(ctrl->f2, DRAM_TIMING_LOW);
1754 dtl &= ~(DTL_TWR_MASK << DTL_TWR_SHIFT);
1755 dtl |= (param->dtl_twr - DTL_TWR_BASE) << DTL_TWR_SHIFT;
1756 pci_write_config32(ctrl->f2, DRAM_TIMING_LOW, dtl);
1760 static void init_Tref(const struct mem_controller *ctrl, const struct mem_param *param)
1763 dth = pci_read_config32(ctrl->f2, DRAM_TIMING_HIGH);
1764 dth &= ~(DTH_TREF_MASK << DTH_TREF_SHIFT);
1765 dth |= (param->dch_tref4k << DTH_TREF_SHIFT);
1766 pci_write_config32(ctrl->f2, DRAM_TIMING_HIGH, dth);
1769 static int update_dimm_Tref(const struct mem_controller *ctrl, const struct mem_param *param, int i)
1773 unsigned tref, old_tref;
1774 value = spd_read_byte(ctrl->channel0[i], 3);
1775 if (value < 0) return -1;
1778 tref = param->dch_tref8k;
1780 tref = param->dch_tref4k;
1783 dth = pci_read_config32(ctrl->f2, DRAM_TIMING_HIGH);
1784 old_tref = (dth >> DTH_TREF_SHIFT) & DTH_TREF_MASK;
1785 if ((value == 12) && (old_tref == param->dch_tref4k)) {
1786 tref = param->dch_tref4k;
1788 tref = param->dch_tref8k;
1790 dth &= ~(DTH_TREF_MASK << DTH_TREF_SHIFT);
1791 dth |= (tref << DTH_TREF_SHIFT);
1792 pci_write_config32(ctrl->f2, DRAM_TIMING_HIGH, dth);
1797 static int update_dimm_x4(const struct mem_controller *ctrl, const struct mem_param *param, int i)
1801 #if K8_4RANK_DIMM_SUPPORT == 1
1805 value = spd_read_byte(ctrl->channel0[i], 13);
1810 #if K8_4RANK_DIMM_SUPPORT == 1
1811 rank = spd_read_byte(ctrl->channel0[i], 5); /* number of physical banks */
1817 dimm = 1<<(DCL_x4DIMM_SHIFT+i);
1818 #if K8_4RANK_DIMM_SUPPORT == 1
1820 dimm |= 1<<(DCL_x4DIMM_SHIFT+i+2);
1823 dcl = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
1828 pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dcl);
1832 static int update_dimm_ecc(const struct mem_controller *ctrl, const struct mem_param *param, int i)
1836 value = spd_read_byte(ctrl->channel0[i], 11);
1841 dcl = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
1842 dcl &= ~DCL_DimmEccEn;
1843 pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dcl);
1848 static int count_dimms(const struct mem_controller *ctrl)
1853 for(index = 0; index < 8; index += 2) {
1855 csbase = pci_read_config32(ctrl->f2, (DRAM_CSBASE + (index << 2)));
1863 static void set_Twtr(const struct mem_controller *ctrl, const struct mem_param *param)
1867 clocks = 1; /* AMD says hard code this */
1868 dth = pci_read_config32(ctrl->f2, DRAM_TIMING_HIGH);
1869 dth &= ~(DTH_TWTR_MASK << DTH_TWTR_SHIFT);
1870 dth |= ((clocks - DTH_TWTR_BASE) << DTH_TWTR_SHIFT);
1871 pci_write_config32(ctrl->f2, DRAM_TIMING_HIGH, dth);
1874 static void set_Trwt(const struct mem_controller *ctrl, const struct mem_param *param)
1882 dtl = pci_read_config32(ctrl->f2, DRAM_TIMING_LOW);
1883 latency = (dtl >> DTL_TCL_SHIFT) & DTL_TCL_MASK;
1884 divisor = param->divisor;
1886 if (is_opteron(ctrl)) {
1887 if (latency == DTL_CL_2) {
1888 if (divisor == ((6 << 0) + 0)) {
1892 else if (divisor > ((6 << 0)+0)) {
1893 /* 100Mhz && 133Mhz */
1897 else if (latency == DTL_CL_2_5) {
1900 else if (latency == DTL_CL_3) {
1901 if (divisor == ((6 << 0)+0)) {
1905 else if (divisor > ((6 << 0)+0)) {
1906 /* 100Mhz && 133Mhz */
1911 else /* Athlon64 */ {
1912 if (is_registered(ctrl)) {
1913 if (latency == DTL_CL_2) {
1916 else if (latency == DTL_CL_2_5) {
1919 else if (latency == DTL_CL_3) {
1923 else /* Unbuffered */{
1924 if (latency == DTL_CL_2) {
1927 else if (latency == DTL_CL_2_5) {
1930 else if (latency == DTL_CL_3) {
1935 if ((clocks < DTH_TRWT_MIN) || (clocks > DTH_TRWT_MAX)) {
1936 die("Unknown Trwt\r\n");
1939 dth = pci_read_config32(ctrl->f2, DRAM_TIMING_HIGH);
1940 dth &= ~(DTH_TRWT_MASK << DTH_TRWT_SHIFT);
1941 dth |= ((clocks - DTH_TRWT_BASE) << DTH_TRWT_SHIFT);
1942 pci_write_config32(ctrl->f2, DRAM_TIMING_HIGH, dth);
1946 static void set_Twcl(const struct mem_controller *ctrl, const struct mem_param *param)
1948 /* Memory Clocks after CAS# */
1951 if (is_registered(ctrl)) {
1956 dth = pci_read_config32(ctrl->f2, DRAM_TIMING_HIGH);
1957 dth &= ~(DTH_TWCL_MASK << DTH_TWCL_SHIFT);
1958 dth |= ((clocks - DTH_TWCL_BASE) << DTH_TWCL_SHIFT);
1959 pci_write_config32(ctrl->f2, DRAM_TIMING_HIGH, dth);
1963 static void set_read_preamble(const struct mem_controller *ctrl, const struct mem_param *param)
1967 unsigned rdpreamble;
1968 divisor = param->divisor;
1969 dch = pci_read_config32(ctrl->f2, DRAM_CONFIG_HIGH);
1970 dch &= ~(DCH_RDPREAMBLE_MASK << DCH_RDPREAMBLE_SHIFT);
1972 if (is_registered(ctrl)) {
1973 if (divisor == ((10 << 1)+0)) {
1975 rdpreamble = ((9 << 1)+ 0);
1977 else if (divisor == ((7 << 1)+1)) {
1979 rdpreamble = ((8 << 1)+0);
1981 else if (divisor == ((6 << 1)+0)) {
1983 rdpreamble = ((7 << 1)+1);
1985 else if (divisor == ((5 << 1)+0)) {
1987 rdpreamble = ((7 << 1)+0);
1994 for(i = 0; i < 4; i++) {
1995 if (ctrl->channel0[i]) {
1999 if (divisor == ((10 << 1)+0)) {
2003 rdpreamble = ((9 << 1)+0);
2006 rdpreamble = ((14 << 1)+0);
2009 else if (divisor == ((7 << 1)+1)) {
2013 rdpreamble = ((7 << 1)+0);
2016 rdpreamble = ((11 << 1)+0);
2019 else if (divisor == ((6 << 1)+0)) {
2023 rdpreamble = ((7 << 1)+0);
2026 rdpreamble = ((9 << 1)+0);
2029 else if (divisor == ((5 << 1)+0)) {
2033 rdpreamble = ((5 << 1)+0);
2036 rdpreamble = ((7 << 1)+0);
2040 if ((rdpreamble < DCH_RDPREAMBLE_MIN) || (rdpreamble > DCH_RDPREAMBLE_MAX)) {
2041 die("Unknown rdpreamble");
2043 dch |= (rdpreamble - DCH_RDPREAMBLE_BASE) << DCH_RDPREAMBLE_SHIFT;
2044 pci_write_config32(ctrl->f2, DRAM_CONFIG_HIGH, dch);
2047 static void set_max_async_latency(const struct mem_controller *ctrl, const struct mem_param *param)
2053 dimms = count_dimms(ctrl);
2055 dch = pci_read_config32(ctrl->f2, DRAM_CONFIG_HIGH);
2056 dch &= ~(DCH_ASYNC_LAT_MASK << DCH_ASYNC_LAT_SHIFT);
2058 if (is_registered(ctrl)) {
2070 die("Too many unbuffered dimms");
2072 else if (dimms == 3) {
2081 dch |= ((async_lat - DCH_ASYNC_LAT_BASE) << DCH_ASYNC_LAT_SHIFT);
2082 pci_write_config32(ctrl->f2, DRAM_CONFIG_HIGH, dch);
2085 static void set_idle_cycle_limit(const struct mem_controller *ctrl, const struct mem_param *param)
2088 /* AMD says to Hardcode this */
2089 dch = pci_read_config32(ctrl->f2, DRAM_CONFIG_HIGH);
2090 dch &= ~(DCH_IDLE_LIMIT_MASK << DCH_IDLE_LIMIT_SHIFT);
2091 dch |= DCH_IDLE_LIMIT_16 << DCH_IDLE_LIMIT_SHIFT;
2092 dch |= DCH_DYN_IDLE_CTR_EN;
2093 pci_write_config32(ctrl->f2, DRAM_CONFIG_HIGH, dch);
2096 static long spd_set_dram_timing(const struct mem_controller *ctrl, const struct mem_param *param, long dimm_mask)
2100 init_Tref(ctrl, param);
2101 for(i = 0; i < DIMM_SOCKETS; i++) {
2103 if (!(dimm_mask & (1 << i))) {
2106 /* DRAM Timing Low Register */
2107 if ((rc = update_dimm_Trc (ctrl, param, i)) <= 0) goto dimm_err;
2108 if ((rc = update_dimm_Trfc(ctrl, param, i)) <= 0) goto dimm_err;
2109 if ((rc = update_dimm_Trcd(ctrl, param, i)) <= 0) goto dimm_err;
2110 if ((rc = update_dimm_Trrd(ctrl, param, i)) <= 0) goto dimm_err;
2111 if ((rc = update_dimm_Tras(ctrl, param, i)) <= 0) goto dimm_err;
2112 if ((rc = update_dimm_Trp (ctrl, param, i)) <= 0) goto dimm_err;
2114 /* DRAM Timing High Register */
2115 if ((rc = update_dimm_Tref(ctrl, param, i)) <= 0) goto dimm_err;
2118 /* DRAM Config Low */
2119 if ((rc = update_dimm_x4 (ctrl, param, i)) <= 0) goto dimm_err;
2120 if ((rc = update_dimm_ecc(ctrl, param, i)) <= 0) goto dimm_err;
2126 dimm_mask = disable_dimm(ctrl, i, dimm_mask);
2128 /* DRAM Timing Low Register */
2129 set_Twr(ctrl, param);
2131 /* DRAM Timing High Register */
2132 set_Twtr(ctrl, param);
2133 set_Trwt(ctrl, param);
2134 set_Twcl(ctrl, param);
2136 /* DRAM Config High */
2137 set_read_preamble(ctrl, param);
2138 set_max_async_latency(ctrl, param);
2139 set_idle_cycle_limit(ctrl, param);
2143 static void sdram_set_spd_registers(const struct mem_controller *ctrl)
2145 struct spd_set_memclk_result result;
2146 const struct mem_param *param;
2149 if (!controller_present(ctrl)) {
2150 // print_debug("No memory controller present\r\n");
2154 hw_enable_ecc(ctrl);
2155 activate_spd_rom(ctrl);
2156 dimm_mask = spd_detect_dimms(ctrl);
2157 if (!(dimm_mask & ((1 << DIMM_SOCKETS) - 1))) {
2158 print_debug("No memory for this cpu\r\n");
2161 dimm_mask = spd_enable_2channels(ctrl, dimm_mask);
2164 dimm_mask = spd_set_ram_size(ctrl , dimm_mask);
2167 dimm_mask = spd_handle_unbuffered_dimms(ctrl, dimm_mask);
2170 result = spd_set_memclk(ctrl, dimm_mask);
2171 param = result.param;
2172 dimm_mask = result.dimm_mask;
2175 dimm_mask = spd_set_dram_timing(ctrl, param , dimm_mask);
2181 /* Unrecoverable error reading SPD data */
2182 print_err("SPD error - reset\r\n");
2187 #define TIMEOUT_LOOPS 300000
2188 static void sdram_enable(int controllers, const struct mem_controller *ctrl)
2192 /* Error if I don't have memory */
2193 if (memory_end_k(ctrl, controllers) == 0) {
2194 die("No memory\r\n");
2197 /* Before enabling memory start the memory clocks */
2198 for(i = 0; i < controllers; i++) {
2200 if (!controller_present(ctrl + i))
2202 dch = pci_read_config32(ctrl[i].f2, DRAM_CONFIG_HIGH);
2203 if (dch & (DCH_MEMCLK_EN0|DCH_MEMCLK_EN1|DCH_MEMCLK_EN2|DCH_MEMCLK_EN3)) {
2204 dch |= DCH_MEMCLK_VALID;
2205 pci_write_config32(ctrl[i].f2, DRAM_CONFIG_HIGH, dch);
2208 /* Disable dram receivers */
2210 dcl = pci_read_config32(ctrl[i].f2, DRAM_CONFIG_LOW);
2211 dcl |= DCL_DisInRcvrs;
2212 pci_write_config32(ctrl[i].f2, DRAM_CONFIG_LOW, dcl);
2216 /* And if necessary toggle the the reset on the dimms by hand */
2217 memreset(controllers, ctrl);
2219 for(i = 0; i < controllers; i++) {
2221 if (!controller_present(ctrl + i))
2223 /* Skip everything if I don't have any memory on this controller */
2224 dch = pci_read_config32(ctrl[i].f2, DRAM_CONFIG_HIGH);
2225 if (!(dch & DCH_MEMCLK_VALID)) {
2229 /* Toggle DisDqsHys to get it working */
2230 dcl = pci_read_config32(ctrl[i].f2, DRAM_CONFIG_LOW);
2231 if (dcl & DCL_DimmEccEn) {
2233 print_spew("ECC enabled\r\n");
2234 mnc = pci_read_config32(ctrl[i].f3, MCA_NB_CONFIG);
2236 if (dcl & DCL_128BitEn) {
2237 mnc |= MNC_CHIPKILL_EN;
2239 pci_write_config32(ctrl[i].f3, MCA_NB_CONFIG, mnc);
2241 dcl |= DCL_DisDqsHys;
2242 pci_write_config32(ctrl[i].f2, DRAM_CONFIG_LOW, dcl);
2243 dcl &= ~DCL_DisDqsHys;
2244 dcl &= ~DCL_DLL_Disable;
2247 dcl |= DCL_DramInit;
2248 pci_write_config32(ctrl[i].f2, DRAM_CONFIG_LOW, dcl);
2251 for(i = 0; i < controllers; i++) {
2253 if (!controller_present(ctrl + i))
2255 /* Skip everything if I don't have any memory on this controller */
2256 dch = pci_read_config32(ctrl[i].f2, DRAM_CONFIG_HIGH);
2257 if (!(dch & DCH_MEMCLK_VALID)) {
2261 print_debug("Initializing memory: ");
2264 dcl = pci_read_config32(ctrl[i].f2, DRAM_CONFIG_LOW);
2266 if ((loops & 1023) == 0) {
2269 } while(((dcl & DCL_DramInit) != 0) && (loops < TIMEOUT_LOOPS));
2270 if (loops >= TIMEOUT_LOOPS) {
2271 print_debug(" failed\r\n");
2274 if (!is_cpu_pre_c0()) {
2275 /* Wait until it is safe to touch memory */
2276 dcl &= ~(DCL_MemClrStatus | DCL_DramEnable);
2277 pci_write_config32(ctrl[i].f2, DRAM_CONFIG_LOW, dcl);
2279 dcl = pci_read_config32(ctrl[i].f2, DRAM_CONFIG_LOW);
2280 } while(((dcl & DCL_MemClrStatus) == 0) || ((dcl & DCL_DramEnable) == 0) );
2283 // init e0 mem hole here
2284 #if K8_E0_MEM_HOLE_SIZEK != 0
2285 if (!is_cpu_pre_e0()) {
2286 uint32_t base, limit;
2287 unsigned base_k, limit_k;
2288 base = pci_read_config32(ctrl->f1, 0x40 + (i << 3));
2289 limit = pci_read_config32(ctrl->f1, 0x44 + (i << 3));
2290 base_k = (base & 0xffff0000) >> 2;
2291 limit_k = ((limit + 0x00010000) & 0xffff0000) >> 2;
2292 if ((base_k <= K8_E0_MEM_HOLE_BASEK) && (limit_k > K8_E0_MEM_HOLE_BASEK)) {
2293 set_e0_mem_hole(ctrl+i, base_k);
2299 print_debug(" done\r\n");
2302 //FIXME add enable node interleaving here -- yhlu
2304 1. check how many nodes we have , if not all has ram installed get out
2305 2. check cs_base lo is 0, node 0 f2 0x40,,,,, if any one is not using lo is CS_BASE, get out
2306 3. check if other node is the same as node 0 about f2 0x40,,,,, otherwise get out
2307 4. if all ready enable node_interleaving in f1 0x40..... of every node
2308 5. for node interleaving we need to set mem hole to every node ( need recalcute hole offset in f0 for every node)
2311 #if CONFIG_DCACHE_RAM == 0
2312 /* Make certain the first 1M of memory is intialized */
2313 print_debug("Clearing initial memory region: ");
2315 /* Use write combine caching while we setup the first 1M */
2316 cache_lbmem(MTRR_TYPE_WRCOMB);
2318 /* clear memory 1meg */
2319 clear_memory((void *)0, CONFIG_LB_MEM_TOPK << 10);
2321 /* The first 1M is now setup, use it */
2322 cache_lbmem(MTRR_TYPE_WRBACK);
2324 print_debug(" done\r\n");