1 /* This should be done by Eric
2 2004.11 yhlu add 4 rank DIMM support
3 2004.12 yhlu add D0 support
4 2005.02 yhlu add E0 memory hole support
7 #include <cpu/x86/mem.h>
8 #include <cpu/x86/cache.h>
9 #include <cpu/x86/mtrr.h>
13 #if (CONFIG_LB_MEM_TOPK & (CONFIG_LB_MEM_TOPK -1)) != 0
14 # error "CONFIG_LB_MEM_TOPK must be a power of 2"
17 #ifndef K8_4RANK_DIMM_SUPPORT
18 #define K8_4RANK_DIMM_SUPPORT 0
22 static void setup_resource_map(const unsigned int *register_values, int max)
25 // print_debug("setting up resource map....");
29 for(i = 0; i < max; i += 3) {
34 print_debug_hex32(register_values[i]);
36 print_debug_hex32(register_values[i+2]);
39 dev = register_values[i] & ~0xff;
40 where = register_values[i] & 0xff;
41 reg = pci_read_config32(dev, where);
42 reg &= register_values[i+1];
43 reg |= register_values[i+2];
44 pci_write_config32(dev, where, reg);
46 reg = pci_read_config32(register_values[i]);
47 reg &= register_values[i+1];
48 reg |= register_values[i+2] & ~register_values[i+1];
49 pci_write_config32(register_values[i], reg);
52 // print_debug("done.\r\n");
56 static int controller_present(const struct mem_controller *ctrl)
58 return pci_read_config32(ctrl->f0, 0) == 0x11001022;
61 static void sdram_set_registers(const struct mem_controller *ctrl)
63 static const unsigned int register_values[] = {
65 /* Careful set limit registers before base registers which contain the enables */
66 /* DRAM Limit i Registers
75 * [ 2: 0] Destination Node ID
85 * [10: 8] Interleave select
86 * specifies the values of A[14:12] to use with interleave enable.
88 * [31:16] DRAM Limit Address i Bits 39-24
89 * This field defines the upper address bits of a 40 bit address
90 * that define the end of the DRAM region.
92 PCI_ADDR(0, 0x18, 1, 0x44), 0x0000f8f8, 0x00000000,
93 PCI_ADDR(0, 0x18, 1, 0x4C), 0x0000f8f8, 0x00000001,
94 PCI_ADDR(0, 0x18, 1, 0x54), 0x0000f8f8, 0x00000002,
95 PCI_ADDR(0, 0x18, 1, 0x5C), 0x0000f8f8, 0x00000003,
96 PCI_ADDR(0, 0x18, 1, 0x64), 0x0000f8f8, 0x00000004,
97 PCI_ADDR(0, 0x18, 1, 0x6C), 0x0000f8f8, 0x00000005,
98 PCI_ADDR(0, 0x18, 1, 0x74), 0x0000f8f8, 0x00000006,
99 PCI_ADDR(0, 0x18, 1, 0x7C), 0x0000f8f8, 0x00000007,
100 /* DRAM Base i Registers
109 * [ 0: 0] Read Enable
112 * [ 1: 1] Write Enable
113 * 0 = Writes Disabled
116 * [10: 8] Interleave Enable
117 * 000 = No interleave
118 * 001 = Interleave on A[12] (2 nodes)
120 * 011 = Interleave on A[12] and A[14] (4 nodes)
124 * 111 = Interleve on A[12] and A[13] and A[14] (8 nodes)
126 * [13:16] DRAM Base Address i Bits 39-24
127 * This field defines the upper address bits of a 40-bit address
128 * that define the start of the DRAM region.
130 PCI_ADDR(0, 0x18, 1, 0x40), 0x0000f8fc, 0x00000000,
131 PCI_ADDR(0, 0x18, 1, 0x48), 0x0000f8fc, 0x00000000,
132 PCI_ADDR(0, 0x18, 1, 0x50), 0x0000f8fc, 0x00000000,
133 PCI_ADDR(0, 0x18, 1, 0x58), 0x0000f8fc, 0x00000000,
134 PCI_ADDR(0, 0x18, 1, 0x60), 0x0000f8fc, 0x00000000,
135 PCI_ADDR(0, 0x18, 1, 0x68), 0x0000f8fc, 0x00000000,
136 PCI_ADDR(0, 0x18, 1, 0x70), 0x0000f8fc, 0x00000000,
137 PCI_ADDR(0, 0x18, 1, 0x78), 0x0000f8fc, 0x00000000,
139 /* DRAM CS Base Address i Registers
148 * [ 0: 0] Chip-Select Bank Enable
152 * [15: 9] Base Address (19-13)
153 * An optimization used when all DIMM are the same size...
155 * [31:21] Base Address (35-25)
156 * This field defines the top 11 addresses bit of a 40-bit
157 * address that define the memory address space. These
158 * bits decode 32-MByte blocks of memory.
160 PCI_ADDR(0, 0x18, 2, 0x40), 0x001f01fe, 0x00000000,
161 PCI_ADDR(0, 0x18, 2, 0x44), 0x001f01fe, 0x00000000,
162 PCI_ADDR(0, 0x18, 2, 0x48), 0x001f01fe, 0x00000000,
163 PCI_ADDR(0, 0x18, 2, 0x4C), 0x001f01fe, 0x00000000,
164 PCI_ADDR(0, 0x18, 2, 0x50), 0x001f01fe, 0x00000000,
165 PCI_ADDR(0, 0x18, 2, 0x54), 0x001f01fe, 0x00000000,
166 PCI_ADDR(0, 0x18, 2, 0x58), 0x001f01fe, 0x00000000,
167 PCI_ADDR(0, 0x18, 2, 0x5C), 0x001f01fe, 0x00000000,
168 /* DRAM CS Mask Address i Registers
177 * Select bits to exclude from comparison with the DRAM Base address register.
179 * [15: 9] Address Mask (19-13)
180 * Address to be excluded from the optimized case
182 * [29:21] Address Mask (33-25)
183 * The bits with an address mask of 1 are excluded from address comparison
187 PCI_ADDR(0, 0x18, 2, 0x60), 0xC01f01ff, 0x00000000,
188 PCI_ADDR(0, 0x18, 2, 0x64), 0xC01f01ff, 0x00000000,
189 PCI_ADDR(0, 0x18, 2, 0x68), 0xC01f01ff, 0x00000000,
190 PCI_ADDR(0, 0x18, 2, 0x6C), 0xC01f01ff, 0x00000000,
191 PCI_ADDR(0, 0x18, 2, 0x70), 0xC01f01ff, 0x00000000,
192 PCI_ADDR(0, 0x18, 2, 0x74), 0xC01f01ff, 0x00000000,
193 PCI_ADDR(0, 0x18, 2, 0x78), 0xC01f01ff, 0x00000000,
194 PCI_ADDR(0, 0x18, 2, 0x7C), 0xC01f01ff, 0x00000000,
195 /* DRAM Bank Address Mapping Register
197 * Specify the memory module size
202 * 000 = 32Mbyte (Rows = 12 & Col = 8)
203 * 001 = 64Mbyte (Rows = 12 & Col = 9)
204 * 010 = 128Mbyte (Rows = 13 & Col = 9)|(Rows = 12 & Col = 10)
205 * 011 = 256Mbyte (Rows = 13 & Col = 10)|(Rows = 12 & Col = 11)
206 * 100 = 512Mbyte (Rows = 13 & Col = 11)|(Rows = 14 & Col = 10)
207 * 101 = 1Gbyte (Rows = 14 & Col = 11)|(Rows = 13 & Col = 12)
208 * 110 = 2Gbyte (Rows = 14 & Col = 12)
215 PCI_ADDR(0, 0x18, 2, 0x80), 0xffff8888, 0x00000000,
216 /* DRAM Timing Low Register
218 * [ 2: 0] Tcl (Cas# Latency, Cas# to read-data-valid)
228 * [ 7: 4] Trc (Row Cycle Time, Ras#-active to Ras#-active/bank auto refresh)
229 * 0000 = 7 bus clocks
230 * 0001 = 8 bus clocks
232 * 1110 = 21 bus clocks
233 * 1111 = 22 bus clocks
234 * [11: 8] Trfc (Row refresh Cycle time, Auto-refresh-active to RAS#-active or RAS#auto-refresh)
235 * 0000 = 9 bus clocks
236 * 0010 = 10 bus clocks
238 * 1110 = 23 bus clocks
239 * 1111 = 24 bus clocks
240 * [14:12] Trcd (Ras#-active to Case#-read/write Delay)
250 * [18:16] Trrd (Ras# to Ras# Delay)
260 * [23:20] Tras (Minmum Ras# Active Time)
261 * 0000 to 0100 = reserved
262 * 0101 = 5 bus clocks
264 * 1111 = 15 bus clocks
265 * [26:24] Trp (Row Precharge Time)
275 * [28:28] Twr (Write Recovery Time)
280 PCI_ADDR(0, 0x18, 2, 0x88), 0xe8088008, 0x02522001 /* 0x03623125 */ ,
281 /* DRAM Timing High Register
283 * [ 0: 0] Twtr (Write to Read Delay)
287 * [ 6: 4] Trwt (Read to Write Delay)
297 * [12: 8] Tref (Refresh Rate)
298 * 00000 = 100Mhz 4K rows
299 * 00001 = 133Mhz 4K rows
300 * 00010 = 166Mhz 4K rows
301 * 00011 = 200Mhz 4K rows
302 * 01000 = 100Mhz 8K/16K rows
303 * 01001 = 133Mhz 8K/16K rows
304 * 01010 = 166Mhz 8K/16K rows
305 * 01011 = 200Mhz 8K/16K rows
307 * [22:20] Twcl (Write CAS Latency)
308 * 000 = 1 Mem clock after CAS# (Unbuffered Dimms)
309 * 001 = 2 Mem clocks after CAS# (Registered Dimms)
312 PCI_ADDR(0, 0x18, 2, 0x8c), 0xff8fe08e, (0 << 20)|(0 << 8)|(0 << 4)|(0 << 0),
313 /* DRAM Config Low Register
315 * [ 0: 0] DLL Disable
324 * [ 3: 3] Disable DQS Hystersis (FIXME handle this one carefully)
325 * 0 = Enable DQS input filter
326 * 1 = Disable DQS input filtering
329 * 0 = Initialization done or not yet started.
330 * 1 = Initiate DRAM intialization sequence
331 * [ 9: 9] SO-Dimm Enable
333 * 1 = SO-Dimms present
335 * 0 = DRAM not enabled
336 * 1 = DRAM initialized and enabled
337 * [11:11] Memory Clear Status
338 * 0 = Memory Clear function has not completed
339 * 1 = Memory Clear function has completed
340 * [12:12] Exit Self-Refresh
341 * 0 = Exit from self-refresh done or not yet started
342 * 1 = DRAM exiting from self refresh
343 * [13:13] Self-Refresh Status
344 * 0 = Normal Operation
345 * 1 = Self-refresh mode active
346 * [15:14] Read/Write Queue Bypass Count
351 * [16:16] 128-bit/64-Bit
352 * 0 = 64bit Interface to DRAM
353 * 1 = 128bit Interface to DRAM
354 * [17:17] DIMM ECC Enable
355 * 0 = Some DIMMs do not have ECC
356 * 1 = ALL DIMMS have ECC bits
357 * [18:18] UnBuffered DIMMs
359 * 1 = Unbuffered DIMMS
360 * [19:19] Enable 32-Byte Granularity
361 * 0 = Optimize for 64byte bursts
362 * 1 = Optimize for 32byte bursts
363 * [20:20] DIMM 0 is x4
364 * [21:21] DIMM 1 is x4
365 * [22:22] DIMM 2 is x4
366 * [23:23] DIMM 3 is x4
368 * 1 = x4 DIMM present
369 * [24:24] Disable DRAM Receivers
370 * 0 = Receivers enabled
371 * 1 = Receivers disabled
373 * 000 = Arbiters chois is always respected
374 * 001 = Oldest entry in DCQ can be bypassed 1 time
375 * 010 = Oldest entry in DCQ can be bypassed 2 times
376 * 011 = Oldest entry in DCQ can be bypassed 3 times
377 * 100 = Oldest entry in DCQ can be bypassed 4 times
378 * 101 = Oldest entry in DCQ can be bypassed 5 times
379 * 110 = Oldest entry in DCQ can be bypassed 6 times
380 * 111 = Oldest entry in DCQ can be bypassed 7 times
383 PCI_ADDR(0, 0x18, 2, 0x90), 0xf0000000,
385 (0 << 23)|(0 << 22)|(0 << 21)|(0 << 20)|
386 (1 << 19)|(0 << 18)|(1 << 17)|(0 << 16)|
387 (2 << 14)|(0 << 13)|(0 << 12)|
388 (0 << 11)|(0 << 10)|(0 << 9)|(0 << 8)|
389 (0 << 3) |(0 << 1) |(0 << 0),
390 /* DRAM Config High Register
392 * [ 0: 3] Maximum Asynchronous Latency
397 * [11: 8] Read Preamble
415 * [18:16] Idle Cycle Limit
424 * [19:19] Dynamic Idle Cycle Center Enable
425 * 0 = Use Idle Cycle Limit
426 * 1 = Generate a dynamic Idle cycle limit
427 * [22:20] DRAM MEMCLK Frequency
437 * [25:25] Memory Clock Ratio Valid (FIXME carefully enable memclk)
438 * 0 = Disable MemClks
440 * [26:26] Memory Clock 0 Enable
443 * [27:27] Memory Clock 1 Enable
446 * [28:28] Memory Clock 2 Enable
449 * [29:29] Memory Clock 3 Enable
454 PCI_ADDR(0, 0x18, 2, 0x94), 0xc180f0f0,
455 (0 << 29)|(0 << 28)|(0 << 27)|(0 << 26)|(0 << 25)|
456 (0 << 20)|(0 << 19)|(DCH_IDLE_LIMIT_16 << 16)|(0 << 8)|(0 << 0),
457 /* DRAM Delay Line Register
459 * Adjust the skew of the input DQS strobe relative to DATA
461 * [23:16] Delay Line Adjust
462 * Adjusts the DLL derived PDL delay by one or more delay stages
463 * in either the faster or slower direction.
464 * [24:24} Adjust Slower
466 * 1 = Adj is used to increase the PDL delay
467 * [25:25] Adjust Faster
469 * 1 = Adj is used to decrease the PDL delay
472 PCI_ADDR(0, 0x18, 2, 0x98), 0xfc00ffff, 0x00000000,
473 /* DRAM Scrub Control Register
475 * [ 4: 0] DRAM Scrube Rate
477 * [12: 8] L2 Scrub Rate
479 * [20:16] Dcache Scrub
482 * 00000 = Do not scrub
504 * All Others = Reserved
506 PCI_ADDR(0, 0x18, 3, 0x58), 0xffe0e0e0, 0x00000000,
507 /* DRAM Scrub Address Low Register
509 * [ 0: 0] DRAM Scrubber Redirect Enable
511 * 1 = Scrubber Corrects errors found in normal operation
513 * [31: 6] DRAM Scrub Address 31-6
515 PCI_ADDR(0, 0x18, 3, 0x5C), 0x0000003e, 0x00000000,
516 /* DRAM Scrub Address High Register
518 * [ 7: 0] DRAM Scrubb Address 39-32
521 PCI_ADDR(0, 0x18, 3, 0x60), 0xffffff00, 0x00000000,
527 if (!controller_present(ctrl)) {
528 // print_debug("No memory controller present\r\n");
532 print_spew("setting up CPU");
533 print_spew_hex8(ctrl->node_id);
534 print_spew(" northbridge registers\r\n");
535 max = sizeof(register_values)/sizeof(register_values[0]);
536 for(i = 0; i < max; i += 3) {
541 print_spew_hex32(register_values[i]);
543 print_spew_hex32(register_values[i+2]);
546 dev = (register_values[i] & ~0xff) - PCI_DEV(0, 0x18, 0) + ctrl->f0;
547 where = register_values[i] & 0xff;
548 reg = pci_read_config32(dev, where);
549 reg &= register_values[i+1];
550 reg |= register_values[i+2];
551 pci_write_config32(dev, where, reg);
554 reg = pci_read_config32(register_values[i]);
555 reg &= register_values[i+1];
556 reg |= register_values[i+2];
557 pci_write_config32(register_values[i], reg);
560 print_spew("done.\r\n");
564 static void hw_enable_ecc(const struct mem_controller *ctrl)
567 nbcap = pci_read_config32(ctrl->f3, NORTHBRIDGE_CAP);
568 dcl = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
569 dcl &= ~DCL_DimmEccEn;
570 if (nbcap & NBCAP_ECC) {
571 dcl |= DCL_DimmEccEn;
573 if (read_option(CMOS_VSTART_ECC_memory, CMOS_VLEN_ECC_memory, 1) == 0) {
574 dcl &= ~DCL_DimmEccEn;
576 pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dcl);
580 static int is_dual_channel(const struct mem_controller *ctrl)
583 dcl = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
584 return dcl & DCL_128BitEn;
587 static int is_opteron(const struct mem_controller *ctrl)
589 /* Test to see if I am an Opteron.
590 * FIXME Testing dual channel capability is correct for now
591 * but a beter test is probably required.
593 #warning "FIXME implement a better test for opterons"
595 nbcap = pci_read_config32(ctrl->f3, NORTHBRIDGE_CAP);
596 return !!(nbcap & NBCAP_128Bit);
599 static int is_registered(const struct mem_controller *ctrl)
601 /* Test to see if we are dealing with registered SDRAM.
602 * If we are not registered we are unbuffered.
603 * This function must be called after spd_handle_unbuffered_dimms.
606 dcl = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
607 return !(dcl & DCL_UnBufDimm);
615 #if K8_4RANK_DIMM_SUPPORT == 1
620 static struct dimm_size spd_get_dimm_size(unsigned device)
622 /* Calculate the log base 2 size of a DIMM in bits */
629 #if K8_4RANK_DIMM_SUPPORT == 1
633 /* Note it might be easier to use byte 31 here, it has the DIMM size as
634 * a multiple of 4MB. The way we do it now we can size both
635 * sides of an assymetric dimm.
637 value = spd_read_byte(device, 3); /* rows */
638 if (value < 0) goto hw_err;
639 if ((value & 0xf) == 0) goto val_err;
640 sz.side1 += value & 0xf;
641 sz.rows = value & 0xf;
643 value = spd_read_byte(device, 4); /* columns */
644 if (value < 0) goto hw_err;
645 if ((value & 0xf) == 0) goto val_err;
646 sz.side1 += value & 0xf;
647 sz.col = value & 0xf;
649 value = spd_read_byte(device, 17); /* banks */
650 if (value < 0) goto hw_err;
651 if ((value & 0xff) == 0) goto val_err;
652 sz.side1 += log2(value & 0xff);
654 /* Get the module data width and convert it to a power of two */
655 value = spd_read_byte(device, 7); /* (high byte) */
656 if (value < 0) goto hw_err;
660 low = spd_read_byte(device, 6); /* (low byte) */
661 if (low < 0) goto hw_err;
662 value = value | (low & 0xff);
663 if ((value != 72) && (value != 64)) goto val_err;
664 sz.side1 += log2(value);
667 value = spd_read_byte(device, 5); /* number of physical banks */
668 if (value < 0) goto hw_err;
669 if (value == 1) goto out;
670 if ((value != 2) && (value != 4 )) {
673 #if K8_4RANK_DIMM_SUPPORT == 1
677 /* Start with the symmetrical case */
680 value = spd_read_byte(device, 3); /* rows */
681 if (value < 0) goto hw_err;
682 if ((value & 0xf0) == 0) goto out; /* If symmetrical we are done */
683 sz.side2 -= (value & 0x0f); /* Subtract out rows on side 1 */
684 sz.side2 += ((value >> 4) & 0x0f); /* Add in rows on side 2 */
686 value = spd_read_byte(device, 4); /* columns */
687 if (value < 0) goto hw_err;
688 if ((value & 0xff) == 0) goto val_err;
689 sz.side2 -= (value & 0x0f); /* Subtract out columns on side 1 */
690 sz.side2 += ((value >> 4) & 0x0f); /* Add in columsn on side 2 */
695 die("Bad SPD value\r\n");
696 /* If an hw_error occurs report that I have no memory */
702 #if K8_4RANK_DIMM_SUPPORT == 1
709 static const unsigned cs_map_aa[15] = {
710 /* (row=12, col=8)(14, 12) ---> (0, 0) (2, 4) */
716 static void set_dimm_size(const struct mem_controller *ctrl, struct dimm_size sz, unsigned index)
718 uint32_t base0, base1, map;
721 if (sz.side1 != sz.side2) {
724 map = pci_read_config32(ctrl->f2, DRAM_BANK_ADDR_MAP);
725 map &= ~(0xf << (index * 4));
726 #if K8_4RANK_DIMM_SUPPORT == 1
728 map &= ~(0xf << ( (index + 2) * 4));
732 /* For each base register.
733 * Place the dimm size in 32 MB quantities in the bits 31 - 21.
734 * The initialize dimm size is in bits.
735 * Set the base enable bit0.
740 /* Make certain side1 of the dimm is at least 32MB */
741 if (sz.side1 >= (25 +3)) {
742 if(is_cpu_pre_d0()) {
743 map |= (sz.side1 - (25 + 3)) << (index *4);
744 #if K8_4RANK_DIMM_SUPPORT == 1
746 map |= (sz.side1 - (25 + 3)) << ( (index + 2) * 4);
751 map |= cs_map_aa[(sz.rows - 12) * 5 + (sz.col - 8) ] << (index*4);
752 #if K8_4RANK_DIMM_SUPPORT == 1
754 map |= cs_map_aa[(sz.rows - 12) * 5 + (sz.col - 8) ] << ( (index + 2) * 4);
758 base0 = (1 << ((sz.side1 - (25 + 3)) + 21)) | 1;
761 /* Make certain side2 of the dimm is at least 32MB */
762 if (sz.side2 >= (25 + 3)) {
763 base1 = (1 << ((sz.side2 - (25 + 3)) + 21)) | 1;
766 /* Double the size if we are using dual channel memory */
767 if (is_dual_channel(ctrl)) {
768 base0 = (base0 << 1) | (base0 & 1);
769 base1 = (base1 << 1) | (base1 & 1);
772 /* Clear the reserved bits */
773 base0 &= ~0x001ffffe;
774 base1 &= ~0x001ffffe;
776 /* Set the appropriate DIMM base address register */
777 pci_write_config32(ctrl->f2, DRAM_CSBASE + (((index << 1)+0)<<2), base0);
778 pci_write_config32(ctrl->f2, DRAM_CSBASE + (((index << 1)+1)<<2), base1);
779 #if K8_4RANK_DIMM_SUPPORT == 1
781 pci_write_config32(ctrl->f2, DRAM_CSBASE + (((index << 1)+4)<<2), base0);
782 pci_write_config32(ctrl->f2, DRAM_CSBASE + (((index << 1)+5)<<2), base1);
786 pci_write_config32(ctrl->f2, DRAM_BANK_ADDR_MAP, map);
788 /* Enable the memory clocks for this DIMM */
790 dch = pci_read_config32(ctrl->f2, DRAM_CONFIG_HIGH);
791 dch |= DCH_MEMCLK_EN0 << index;
792 #if K8_4RANK_DIMM_SUPPORT == 1
794 dch |= DCH_MEMCLK_EN0 << (index + 2);
797 pci_write_config32(ctrl->f2, DRAM_CONFIG_HIGH, dch);
801 static long spd_set_ram_size(const struct mem_controller *ctrl, long dimm_mask)
805 for(i = 0; i < DIMM_SOCKETS; i++) {
807 if (!(dimm_mask & (1 << i))) {
810 sz = spd_get_dimm_size(ctrl->channel0[i]);
812 return -1; /* Report SPD error */
814 set_dimm_size(ctrl, sz, i);
819 static void route_dram_accesses(const struct mem_controller *ctrl,
820 unsigned long base_k, unsigned long limit_k)
822 /* Route the addresses to the controller node */
827 unsigned limit_reg, base_reg;
830 node_id = ctrl->node_id;
831 index = (node_id << 3);
832 limit = (limit_k << 2);
835 limit |= ( 0 << 8) | (node_id << 0);
836 base = (base_k << 2);
838 base |= (0 << 8) | (1<<1) | (1<<0);
840 limit_reg = 0x44 + index;
841 base_reg = 0x40 + index;
842 for(device = PCI_DEV(0, 0x18, 1); device <= PCI_DEV(0, 0x1f, 1); device += PCI_DEV(0, 1, 0)) {
843 pci_write_config32(device, limit_reg, limit);
844 pci_write_config32(device, base_reg, base);
848 static void set_top_mem(unsigned tom_k)
850 /* Error if I don't have memory */
855 /* Report the amount of memory. */
856 print_spew("RAM: 0x");
857 print_spew_hex32(tom_k);
858 print_spew(" KB\r\n");
860 /* Now set top of memory */
862 msr.lo = (tom_k & 0x003fffff) << 10;
863 msr.hi = (tom_k & 0xffc00000) >> 22;
864 wrmsr(TOP_MEM2, msr);
866 /* Leave a 64M hole between TOP_MEM and TOP_MEM2
867 * so I can see my rom chip and other I/O devices.
869 if (tom_k >= 0x003f0000) {
872 msr.lo = (tom_k & 0x003fffff) << 10;
873 msr.hi = (tom_k & 0xffc00000) >> 22;
877 static unsigned long interleave_chip_selects(const struct mem_controller *ctrl)
880 static const uint32_t csbase_low[] = {
881 /* 32MB */ (1 << (13 - 4)),
882 /* 64MB */ (1 << (14 - 4)),
883 /* 128MB */ (1 << (14 - 4)),
884 /* 256MB */ (1 << (15 - 4)),
885 /* 512MB */ (1 << (15 - 4)),
886 /* 1GB */ (1 << (16 - 4)),
887 /* 2GB */ (1 << (16 - 4)),
890 static const uint32_t csbase_low_d0[] = {
891 /* 32MB */ (1 << (13 - 4)),
892 /* 64MB */ (1 << (14 - 4)),
893 /* 128MB */ (1 << (14 - 4)),
894 /* 128MB */ (1 << (15 - 4)),
895 /* 256MB */ (1 << (15 - 4)),
896 /* 512MB */ (1 << (15 - 4)),
897 /* 256MB */ (1 << (16 - 4)),
898 /* 512MB */ (1 << (16 - 4)),
899 /* 1GB */ (1 << (16 - 4)),
900 /* 1GB */ (1 << (17 - 4)),
901 /* 2GB */ (1 << (17 - 4)),
904 /* cs_base_high is not changed */
907 int chip_selects, index;
909 unsigned common_size;
910 unsigned common_cs_mode;
911 uint32_t csbase, csmask;
913 /* See if all of the memory chip selects are the same size
914 * and if so count them.
919 for(index = 0; index < 8; index++) {
924 value = pci_read_config32(ctrl->f2, DRAM_CSBASE + (index << 2));
932 if (common_size == 0) {
935 /* The size differed fail */
936 if (common_size != size) {
940 value = pci_read_config32(ctrl->f2, DRAM_BANK_ADDR_MAP);
941 cs_mode =( value >> ((index>>1)*4)) & 0xf;
942 if(cs_mode == 0 ) continue;
943 if(common_cs_mode == 0) {
944 common_cs_mode = cs_mode;
946 /* The size differed fail */
947 if(common_cs_mode != cs_mode) {
952 /* Chip selects can only be interleaved when there is
953 * more than one and their is a power of two of them.
955 bits = log2(chip_selects);
956 if (((1 << bits) != chip_selects) || (bits < 1) || (bits > 3)) {
960 /* Find the bits of csbase that we need to interleave on */
962 csbase_inc = csbase_low[common_cs_mode];
963 if(is_dual_channel(ctrl)) {
964 /* Also we run out of address mask bits if we try and interleave 8 4GB dimms */
965 if ((bits == 3) && (common_size == (1 << (32 - 3)))) {
966 // print_debug("8 4GB chip selects cannot be interleaved\r\n");
973 csbase_inc = csbase_low_d0[common_cs_mode];
974 if(is_dual_channel(ctrl)) {
975 if( (bits==3) && (common_cs_mode > 8)) {
976 // print_debug("8 cs_mode>8 chip selects cannot be interleaved\r\n");
983 /* Compute the initial values for csbase and csbask.
984 * In csbase just set the enable bit and the base to zero.
985 * In csmask set the mask bits for the size and page level interleave.
988 csmask = (((common_size << bits) - 1) << 21);
989 csmask |= 0xfe00 & ~((csbase_inc << bits) - csbase_inc);
990 for(index = 0; index < 8; index++) {
993 value = pci_read_config32(ctrl->f2, DRAM_CSBASE + (index << 2));
998 pci_write_config32(ctrl->f2, DRAM_CSBASE + (index << 2), csbase);
999 pci_write_config32(ctrl->f2, DRAM_CSMASK + (index << 2), csmask);
1000 csbase += csbase_inc;
1003 print_spew("Interleaved\r\n");
1005 /* Return the memory size in K */
1006 return common_size << (15 + bits);
1009 static unsigned long order_chip_selects(const struct mem_controller *ctrl)
1013 /* Remember which registers we have used in the high 8 bits of tom */
1016 /* Find the largest remaining canidate */
1017 unsigned index, canidate;
1018 uint32_t csbase, csmask;
1022 for(index = 0; index < 8; index++) {
1024 value = pci_read_config32(ctrl->f2, DRAM_CSBASE + (index << 2));
1026 /* Is it enabled? */
1031 /* Is it greater? */
1032 if (value <= csbase) {
1036 /* Has it already been selected */
1037 if (tom & (1 << (index + 24))) {
1040 /* I have a new canidate */
1044 /* See if I have found a new canidate */
1049 /* Remember the dimm size */
1050 size = csbase >> 21;
1052 /* Remember I have used this register */
1053 tom |= (1 << (canidate + 24));
1055 /* Recompute the cs base register value */
1056 csbase = (tom << 21) | 1;
1058 /* Increment the top of memory */
1061 /* Compute the memory mask */
1062 csmask = ((size -1) << 21);
1063 csmask |= 0xfe00; /* For now don't optimize */
1065 /* Write the new base register */
1066 pci_write_config32(ctrl->f2, DRAM_CSBASE + (canidate << 2), csbase);
1067 /* Write the new mask register */
1068 pci_write_config32(ctrl->f2, DRAM_CSMASK + (canidate << 2), csmask);
1071 /* Return the memory size in K */
1072 return (tom & ~0xff000000) << 15;
1075 unsigned long memory_end_k(const struct mem_controller *ctrl, int max_node_id)
1079 /* Find the last memory address used */
1081 for(node_id = 0; node_id < max_node_id; node_id++) {
1082 uint32_t limit, base;
1084 index = node_id << 3;
1085 base = pci_read_config32(ctrl->f1, 0x40 + index);
1086 /* Only look at the limit if the base is enabled */
1087 if ((base & 3) == 3) {
1088 limit = pci_read_config32(ctrl->f1, 0x44 + index);
1089 end_k = ((limit + 0x00010000) & 0xffff0000) >> 2;
1095 #if K8_E0_MEM_HOLE_SIZEK != 0
1096 #define K8_E0_MEM_HOLE_LIMITK 4*1024*1024
1097 #define K8_E0_MEM_HOLE_BASEK (K8_E0_MEM_HOLE_LIMITK - K8_E0_MEM_HOLE_SIZEK )
1099 static void set_e0_mem_hole(const struct mem_controller *ctrl, unsigned base_k)
1101 /* Route the addresses to the controller node */
1104 val = pci_read_config32(ctrl->f1,0xf0);
1107 val = (K8_E0_MEM_HOLE_BASEK << 10) | ((K8_E0_MEM_HOLE_SIZEK+base_k)>>(16-10)) | 1;
1109 pci_write_config32(ctrl->f1, 0xf0, val);
1114 static void order_dimms(const struct mem_controller *ctrl)
1116 unsigned long tom_k, base_k;
1118 if (read_option(CMOS_VSTART_interleave_chip_selects, CMOS_VLEN_interleave_chip_selects, 1) != 0) {
1119 tom_k = interleave_chip_selects(ctrl);
1121 print_debug("Interleaving disabled\r\n");
1125 tom_k = order_chip_selects(ctrl);
1127 /* Compute the memory base address */
1128 base_k = memory_end_k(ctrl, ctrl->node_id);
1130 #if K8_E0_MEM_HOLE_SIZEK != 0
1131 if(!is_cpu_pre_e0()) {
1132 /* See if I need to check the range cover hole */
1133 if ((base_k <= K8_E0_MEM_HOLE_BASEK) && (tom_k > K8_E0_MEM_HOLE_BASEK)) {
1134 tom_k += K8_E0_MEM_HOLE_SIZEK;
1138 route_dram_accesses(ctrl, base_k, tom_k);
1142 static long disable_dimm(const struct mem_controller *ctrl, unsigned index, long dimm_mask)
1144 print_debug("disabling dimm");
1145 print_debug_hex8(index);
1146 print_debug("\r\n");
1147 pci_write_config32(ctrl->f2, DRAM_CSBASE + (((index << 1)+0)<<2), 0);
1148 pci_write_config32(ctrl->f2, DRAM_CSBASE + (((index << 1)+1)<<2), 0);
1149 dimm_mask &= ~(1 << index);
1153 static long spd_handle_unbuffered_dimms(const struct mem_controller *ctrl, long dimm_mask)
1161 for(i = 0; (i < DIMM_SOCKETS); i++) {
1163 if (!(dimm_mask & (1 << i))) {
1166 value = spd_read_byte(ctrl->channel0[i], 21);
1170 /* Registered dimm ? */
1171 if (value & (1 << 1)) {
1174 /* Otherwise it must be an unbuffered dimm */
1179 if (unbuffered && registered) {
1180 die("Mixed buffered and registered dimms not supported");
1183 //By yhlu for debug Athlon64 939 can do dual channel, but it use unbuffer DIMM
1184 if (unbuffered && is_opteron(ctrl)) {
1185 die("Unbuffered Dimms not supported on Opteron");
1189 dcl = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
1190 dcl &= ~DCL_UnBufDimm;
1192 dcl |= DCL_UnBufDimm;
1194 pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dcl);
1196 if (is_registered(ctrl)) {
1197 print_debug("Registered\r\n");
1199 print_debug("Unbuffered\r\n");
1205 static unsigned int spd_detect_dimms(const struct mem_controller *ctrl)
1210 for(i = 0; i < DIMM_SOCKETS; i++) {
1213 device = ctrl->channel0[i];
1215 byte = spd_read_byte(ctrl->channel0[i], 2); /* Type */
1217 dimm_mask |= (1 << i);
1220 device = ctrl->channel1[i];
1222 byte = spd_read_byte(ctrl->channel1[i], 2);
1224 dimm_mask |= (1 << (i + DIMM_SOCKETS));
1231 static long spd_enable_2channels(const struct mem_controller *ctrl, long dimm_mask)
1235 /* SPD addresses to verify are identical */
1236 static const unsigned addresses[] = {
1237 2, /* Type should be DDR SDRAM */
1238 3, /* *Row addresses */
1239 4, /* *Column addresses */
1240 5, /* *Physical Banks */
1241 6, /* *Module Data Width low */
1242 7, /* *Module Data Width high */
1243 9, /* *Cycle time at highest CAS Latency CL=X */
1244 11, /* *SDRAM Type */
1245 13, /* *SDRAM Width */
1246 17, /* *Logical Banks */
1247 18, /* *Supported CAS Latencies */
1248 21, /* *SDRAM Module Attributes */
1249 23, /* *Cycle time at CAS Latnecy (CLX - 0.5) */
1250 26, /* *Cycle time at CAS Latnecy (CLX - 1.0) */
1251 27, /* *tRP Row precharge time */
1252 28, /* *Minimum Row Active to Row Active Delay (tRRD) */
1253 29, /* *tRCD RAS to CAS */
1254 30, /* *tRAS Activate to Precharge */
1255 41, /* *Minimum Active to Active/Auto Refresh Time(Trc) */
1256 42, /* *Minimum Auto Refresh Command Time(Trfc) */
1258 /* If the dimms are not in pairs do not do dual channels */
1259 if ((dimm_mask & ((1 << DIMM_SOCKETS) - 1)) !=
1260 ((dimm_mask >> DIMM_SOCKETS) & ((1 << DIMM_SOCKETS) - 1))) {
1261 goto single_channel;
1263 /* If the cpu is not capable of doing dual channels don't do dual channels */
1264 nbcap = pci_read_config32(ctrl->f3, NORTHBRIDGE_CAP);
1265 if (!(nbcap & NBCAP_128Bit)) {
1266 goto single_channel;
1268 for(i = 0; (i < 4) && (ctrl->channel0[i]); i++) {
1269 unsigned device0, device1;
1272 /* If I don't have a dimm skip this one */
1273 if (!(dimm_mask & (1 << i))) {
1276 device0 = ctrl->channel0[i];
1277 device1 = ctrl->channel1[i];
1278 for(j = 0; j < sizeof(addresses)/sizeof(addresses[0]); j++) {
1280 addr = addresses[j];
1281 value0 = spd_read_byte(device0, addr);
1285 value1 = spd_read_byte(device1, addr);
1289 if (value0 != value1) {
1290 goto single_channel;
1294 print_spew("Enabling dual channel memory\r\n");
1296 dcl = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
1297 dcl &= ~DCL_32ByteEn;
1298 dcl |= DCL_128BitEn;
1299 pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dcl);
1302 dimm_mask &= ~((1 << (DIMM_SOCKETS *2)) - (1 << DIMM_SOCKETS));
1308 uint8_t divisor; /* In 1/2 ns increments */
1311 uint32_t dch_memclk;
1312 uint16_t dch_tref4k, dch_tref8k;
1317 static const struct mem_param *get_mem_param(unsigned min_cycle_time)
1319 static const struct mem_param speed[] = {
1321 .name = "100Mhz\r\n",
1323 .divisor = (10 <<1),
1326 .dch_memclk = DCH_MEMCLK_100MHZ << DCH_MEMCLK_SHIFT,
1327 .dch_tref4k = DTH_TREF_100MHZ_4K,
1328 .dch_tref8k = DTH_TREF_100MHZ_8K,
1332 .name = "133Mhz\r\n",
1334 .divisor = (7<<1)+1,
1337 .dch_memclk = DCH_MEMCLK_133MHZ << DCH_MEMCLK_SHIFT,
1338 .dch_tref4k = DTH_TREF_133MHZ_4K,
1339 .dch_tref8k = DTH_TREF_133MHZ_8K,
1343 .name = "166Mhz\r\n",
1348 .dch_memclk = DCH_MEMCLK_166MHZ << DCH_MEMCLK_SHIFT,
1349 .dch_tref4k = DTH_TREF_166MHZ_4K,
1350 .dch_tref8k = DTH_TREF_166MHZ_8K,
1354 .name = "200Mhz\r\n",
1359 .dch_memclk = DCH_MEMCLK_200MHZ << DCH_MEMCLK_SHIFT,
1360 .dch_tref4k = DTH_TREF_200MHZ_4K,
1361 .dch_tref8k = DTH_TREF_200MHZ_8K,
1368 const struct mem_param *param;
1369 for(param = &speed[0]; param->cycle_time ; param++) {
1370 if (min_cycle_time > (param+1)->cycle_time) {
1374 if (!param->cycle_time) {
1375 die("min_cycle_time to low");
1377 print_spew(param->name);
1378 #ifdef DRAM_MIN_CYCLE_TIME
1379 print_debug(param->name);
1384 struct spd_set_memclk_result {
1385 const struct mem_param *param;
1388 static struct spd_set_memclk_result spd_set_memclk(const struct mem_controller *ctrl, long dimm_mask)
1390 /* Compute the minimum cycle time for these dimms */
1391 struct spd_set_memclk_result result;
1392 unsigned min_cycle_time, min_latency, bios_cycle_time;
1396 static const int latency_indicies[] = { 26, 23, 9 };
1397 static const unsigned char min_cycle_times[] = {
1398 [NBCAP_MEMCLK_200MHZ] = 0x50, /* 5ns */
1399 [NBCAP_MEMCLK_166MHZ] = 0x60, /* 6ns */
1400 [NBCAP_MEMCLK_133MHZ] = 0x75, /* 7.5ns */
1401 [NBCAP_MEMCLK_100MHZ] = 0xa0, /* 10ns */
1405 value = pci_read_config32(ctrl->f3, NORTHBRIDGE_CAP);
1406 min_cycle_time = min_cycle_times[(value >> NBCAP_MEMCLK_SHIFT) & NBCAP_MEMCLK_MASK];
1407 bios_cycle_time = min_cycle_times[
1408 read_option(CMOS_VSTART_max_mem_clock, CMOS_VLEN_max_mem_clock, 0)];
1409 if (bios_cycle_time > min_cycle_time) {
1410 min_cycle_time = bios_cycle_time;
1414 /* Compute the least latency with the fastest clock supported
1415 * by both the memory controller and the dimms.
1417 for(i = 0; i < DIMM_SOCKETS; i++) {
1418 int new_cycle_time, new_latency;
1423 if (!(dimm_mask & (1 << i))) {
1427 /* First find the supported CAS latencies
1428 * Byte 18 for DDR SDRAM is interpreted:
1429 * bit 0 == CAS Latency = 1.0
1430 * bit 1 == CAS Latency = 1.5
1431 * bit 2 == CAS Latency = 2.0
1432 * bit 3 == CAS Latency = 2.5
1433 * bit 4 == CAS Latency = 3.0
1434 * bit 5 == CAS Latency = 3.5
1438 new_cycle_time = 0xa0;
1441 latencies = spd_read_byte(ctrl->channel0[i], 18);
1442 if (latencies <= 0) continue;
1444 /* Compute the lowest cas latency supported */
1445 latency = log2(latencies) -2;
1447 /* Loop through and find a fast clock with a low latency */
1448 for(index = 0; index < 3; index++, latency++) {
1450 if ((latency < 2) || (latency > 4) ||
1451 (!(latencies & (1 << latency)))) {
1454 value = spd_read_byte(ctrl->channel0[i], latency_indicies[index]);
1459 /* Only increase the latency if we decreas the clock */
1460 if ((value >= min_cycle_time) && (value < new_cycle_time)) {
1461 new_cycle_time = value;
1462 new_latency = latency;
1465 if (new_latency > 4){
1468 /* Does min_latency need to be increased? */
1469 if (new_cycle_time > min_cycle_time) {
1470 min_cycle_time = new_cycle_time;
1472 /* Does min_cycle_time need to be increased? */
1473 if (new_latency > min_latency) {
1474 min_latency = new_latency;
1477 /* Make a second pass through the dimms and disable
1478 * any that cannot support the selected memclk and cas latency.
1481 for(i = 0; (i < 4) && (ctrl->channel0[i]); i++) {
1486 if (!(dimm_mask & (1 << i))) {
1489 latencies = spd_read_byte(ctrl->channel0[i], 18);
1490 if (latencies < 0) goto hw_error;
1491 if (latencies == 0) {
1495 /* Compute the lowest cas latency supported */
1496 latency = log2(latencies) -2;
1498 /* Walk through searching for the selected latency */
1499 for(index = 0; index < 3; index++, latency++) {
1500 if (!(latencies & (1 << latency))) {
1503 if (latency == min_latency)
1506 /* If I can't find the latency or my index is bad error */
1507 if ((latency != min_latency) || (index >= 3)) {
1511 /* Read the min_cycle_time for this latency */
1512 value = spd_read_byte(ctrl->channel0[i], latency_indicies[index]);
1513 if (value < 0) goto hw_error;
1515 /* All is good if the selected clock speed
1516 * is what I need or slower.
1518 if (value <= min_cycle_time) {
1521 /* Otherwise I have an error, disable the dimm */
1523 dimm_mask = disable_dimm(ctrl, i, dimm_mask);
1526 //down speed for full load 4 rank support
1527 #if K8_4RANK_DIMM_SUPPORT
1528 if(dimm_mask == (3|(3<<DIMM_SOCKETS)) ) {
1530 for(i = 0; (i < 4) && (ctrl->channel0[i]); i++) {
1532 if (!(dimm_mask & (1 << i))) {
1535 val = spd_read_byte(ctrl->channel0[i], 5);
1542 if(min_cycle_time <= 0x50 ) {
1543 min_cycle_time = 0x60;
1550 /* Now that I know the minimum cycle time lookup the memory parameters */
1551 result.param = get_mem_param(min_cycle_time);
1553 /* Update DRAM Config High with our selected memory speed */
1554 value = pci_read_config32(ctrl->f2, DRAM_CONFIG_HIGH);
1555 value &= ~(DCH_MEMCLK_MASK << DCH_MEMCLK_SHIFT);
1557 /* Improves DQS centering by correcting for case when core speed multiplier and MEMCLK speed result in odd clock divisor, by selecting the next lowest memory speed, required only at DDR400 and higher speeds with certain DIMM loadings ---- cheating???*/
1558 if(!is_cpu_pre_e0()) {
1559 if(min_cycle_time==0x50) {
1565 value |= result.param->dch_memclk;
1566 pci_write_config32(ctrl->f2, DRAM_CONFIG_HIGH, value);
1568 static const unsigned latencies[] = { DTL_CL_2, DTL_CL_2_5, DTL_CL_3 };
1569 /* Update DRAM Timing Low with our selected cas latency */
1570 value = pci_read_config32(ctrl->f2, DRAM_TIMING_LOW);
1571 value &= ~(DTL_TCL_MASK << DTL_TCL_SHIFT);
1572 value |= latencies[min_latency - 2] << DTL_TCL_SHIFT;
1573 pci_write_config32(ctrl->f2, DRAM_TIMING_LOW, value);
1575 result.dimm_mask = dimm_mask;
1578 result.param = (const struct mem_param *)0;
1579 result.dimm_mask = -1;
1584 static int update_dimm_Trc(const struct mem_controller *ctrl, const struct mem_param *param, int i)
1586 unsigned clocks, old_clocks;
1589 value = spd_read_byte(ctrl->channel0[i], 41);
1590 if (value < 0) return -1;
1591 if ((value == 0) || (value == 0xff)) {
1594 clocks = ((value << 1) + param->divisor - 1)/param->divisor;
1595 if (clocks < DTL_TRC_MIN) {
1596 clocks = DTL_TRC_MIN;
1598 if (clocks > DTL_TRC_MAX) {
1602 dtl = pci_read_config32(ctrl->f2, DRAM_TIMING_LOW);
1603 old_clocks = ((dtl >> DTL_TRC_SHIFT) & DTL_TRC_MASK) + DTL_TRC_BASE;
1604 if (old_clocks > clocks) {
1605 clocks = old_clocks;
1607 dtl &= ~(DTL_TRC_MASK << DTL_TRC_SHIFT);
1608 dtl |= ((clocks - DTL_TRC_BASE) << DTL_TRC_SHIFT);
1609 pci_write_config32(ctrl->f2, DRAM_TIMING_LOW, dtl);
1613 static int update_dimm_Trfc(const struct mem_controller *ctrl, const struct mem_param *param, int i)
1615 unsigned clocks, old_clocks;
1618 value = spd_read_byte(ctrl->channel0[i], 42);
1619 if (value < 0) return -1;
1620 if ((value == 0) || (value == 0xff)) {
1621 value = param->tRFC;
1623 clocks = ((value << 1) + param->divisor - 1)/param->divisor;
1624 if (clocks < DTL_TRFC_MIN) {
1625 clocks = DTL_TRFC_MIN;
1627 if (clocks > DTL_TRFC_MAX) {
1630 dtl = pci_read_config32(ctrl->f2, DRAM_TIMING_LOW);
1631 old_clocks = ((dtl >> DTL_TRFC_SHIFT) & DTL_TRFC_MASK) + DTL_TRFC_BASE;
1632 if (old_clocks > clocks) {
1633 clocks = old_clocks;
1635 dtl &= ~(DTL_TRFC_MASK << DTL_TRFC_SHIFT);
1636 dtl |= ((clocks - DTL_TRFC_BASE) << DTL_TRFC_SHIFT);
1637 pci_write_config32(ctrl->f2, DRAM_TIMING_LOW, dtl);
1642 static int update_dimm_Trcd(const struct mem_controller *ctrl, const struct mem_param *param, int i)
1644 unsigned clocks, old_clocks;
1647 value = spd_read_byte(ctrl->channel0[i], 29);
1648 if (value < 0) return -1;
1649 clocks = (value + (param->divisor << 1) -1)/(param->divisor << 1);
1650 if (clocks < DTL_TRCD_MIN) {
1651 clocks = DTL_TRCD_MIN;
1653 if (clocks > DTL_TRCD_MAX) {
1656 dtl = pci_read_config32(ctrl->f2, DRAM_TIMING_LOW);
1657 old_clocks = ((dtl >> DTL_TRCD_SHIFT) & DTL_TRCD_MASK) + DTL_TRCD_BASE;
1658 if (old_clocks > clocks) {
1659 clocks = old_clocks;
1661 dtl &= ~(DTL_TRCD_MASK << DTL_TRCD_SHIFT);
1662 dtl |= ((clocks - DTL_TRCD_BASE) << DTL_TRCD_SHIFT);
1663 pci_write_config32(ctrl->f2, DRAM_TIMING_LOW, dtl);
1667 static int update_dimm_Trrd(const struct mem_controller *ctrl, const struct mem_param *param, int i)
1669 unsigned clocks, old_clocks;
1672 value = spd_read_byte(ctrl->channel0[i], 28);
1673 if (value < 0) return -1;
1674 clocks = (value + (param->divisor << 1) -1)/(param->divisor << 1);
1675 if (clocks < DTL_TRRD_MIN) {
1676 clocks = DTL_TRRD_MIN;
1678 if (clocks > DTL_TRRD_MAX) {
1681 dtl = pci_read_config32(ctrl->f2, DRAM_TIMING_LOW);
1682 old_clocks = ((dtl >> DTL_TRRD_SHIFT) & DTL_TRRD_MASK) + DTL_TRRD_BASE;
1683 if (old_clocks > clocks) {
1684 clocks = old_clocks;
1686 dtl &= ~(DTL_TRRD_MASK << DTL_TRRD_SHIFT);
1687 dtl |= ((clocks - DTL_TRRD_BASE) << DTL_TRRD_SHIFT);
1688 pci_write_config32(ctrl->f2, DRAM_TIMING_LOW, dtl);
1692 static int update_dimm_Tras(const struct mem_controller *ctrl, const struct mem_param *param, int i)
1694 unsigned clocks, old_clocks;
1697 value = spd_read_byte(ctrl->channel0[i], 30);
1698 if (value < 0) return -1;
1699 clocks = ((value << 1) + param->divisor - 1)/param->divisor;
1700 if (clocks < DTL_TRAS_MIN) {
1701 clocks = DTL_TRAS_MIN;
1703 if (clocks > DTL_TRAS_MAX) {
1706 dtl = pci_read_config32(ctrl->f2, DRAM_TIMING_LOW);
1707 old_clocks = ((dtl >> DTL_TRAS_SHIFT) & DTL_TRAS_MASK) + DTL_TRAS_BASE;
1708 if (old_clocks > clocks) {
1709 clocks = old_clocks;
1711 dtl &= ~(DTL_TRAS_MASK << DTL_TRAS_SHIFT);
1712 dtl |= ((clocks - DTL_TRAS_BASE) << DTL_TRAS_SHIFT);
1713 pci_write_config32(ctrl->f2, DRAM_TIMING_LOW, dtl);
1717 static int update_dimm_Trp(const struct mem_controller *ctrl, const struct mem_param *param, int i)
1719 unsigned clocks, old_clocks;
1722 value = spd_read_byte(ctrl->channel0[i], 27);
1723 if (value < 0) return -1;
1724 clocks = (value + (param->divisor << 1) - 1)/(param->divisor << 1);
1725 if (clocks < DTL_TRP_MIN) {
1726 clocks = DTL_TRP_MIN;
1728 if (clocks > DTL_TRP_MAX) {
1731 dtl = pci_read_config32(ctrl->f2, DRAM_TIMING_LOW);
1732 old_clocks = ((dtl >> DTL_TRP_SHIFT) & DTL_TRP_MASK) + DTL_TRP_BASE;
1733 if (old_clocks > clocks) {
1734 clocks = old_clocks;
1736 dtl &= ~(DTL_TRP_MASK << DTL_TRP_SHIFT);
1737 dtl |= ((clocks - DTL_TRP_BASE) << DTL_TRP_SHIFT);
1738 pci_write_config32(ctrl->f2, DRAM_TIMING_LOW, dtl);
1742 static void set_Twr(const struct mem_controller *ctrl, const struct mem_param *param)
1745 dtl = pci_read_config32(ctrl->f2, DRAM_TIMING_LOW);
1746 dtl &= ~(DTL_TWR_MASK << DTL_TWR_SHIFT);
1747 dtl |= (param->dtl_twr - DTL_TWR_BASE) << DTL_TWR_SHIFT;
1748 pci_write_config32(ctrl->f2, DRAM_TIMING_LOW, dtl);
1752 static void init_Tref(const struct mem_controller *ctrl, const struct mem_param *param)
1755 dth = pci_read_config32(ctrl->f2, DRAM_TIMING_HIGH);
1756 dth &= ~(DTH_TREF_MASK << DTH_TREF_SHIFT);
1757 dth |= (param->dch_tref4k << DTH_TREF_SHIFT);
1758 pci_write_config32(ctrl->f2, DRAM_TIMING_HIGH, dth);
1761 static int update_dimm_Tref(const struct mem_controller *ctrl, const struct mem_param *param, int i)
1765 unsigned tref, old_tref;
1766 value = spd_read_byte(ctrl->channel0[i], 3);
1767 if (value < 0) return -1;
1770 tref = param->dch_tref8k;
1772 tref = param->dch_tref4k;
1775 dth = pci_read_config32(ctrl->f2, DRAM_TIMING_HIGH);
1776 old_tref = (dth >> DTH_TREF_SHIFT) & DTH_TREF_MASK;
1777 if ((value == 12) && (old_tref == param->dch_tref4k)) {
1778 tref = param->dch_tref4k;
1780 tref = param->dch_tref8k;
1782 dth &= ~(DTH_TREF_MASK << DTH_TREF_SHIFT);
1783 dth |= (tref << DTH_TREF_SHIFT);
1784 pci_write_config32(ctrl->f2, DRAM_TIMING_HIGH, dth);
1789 static int update_dimm_x4(const struct mem_controller *ctrl, const struct mem_param *param, int i)
1793 #if K8_4RANK_DIMM_SUPPORT == 1
1797 value = spd_read_byte(ctrl->channel0[i], 13);
1802 #if K8_4RANK_DIMM_SUPPORT == 1
1803 rank = spd_read_byte(ctrl->channel0[i], 5); /* number of physical banks */
1809 dimm = 1<<(DCL_x4DIMM_SHIFT+i);
1810 #if K8_4RANK_DIMM_SUPPORT == 1
1812 dimm |= 1<<(DCL_x4DIMM_SHIFT+i+2);
1815 dcl = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
1820 pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dcl);
1824 static int update_dimm_ecc(const struct mem_controller *ctrl, const struct mem_param *param, int i)
1828 value = spd_read_byte(ctrl->channel0[i], 11);
1833 dcl = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
1834 dcl &= ~DCL_DimmEccEn;
1835 pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dcl);
1840 static int count_dimms(const struct mem_controller *ctrl)
1845 for(index = 0; index < 8; index += 2) {
1847 csbase = pci_read_config32(ctrl->f2, (DRAM_CSBASE + (index << 2)));
1855 static void set_Twtr(const struct mem_controller *ctrl, const struct mem_param *param)
1859 clocks = 1; /* AMD says hard code this */
1860 dth = pci_read_config32(ctrl->f2, DRAM_TIMING_HIGH);
1861 dth &= ~(DTH_TWTR_MASK << DTH_TWTR_SHIFT);
1862 dth |= ((clocks - DTH_TWTR_BASE) << DTH_TWTR_SHIFT);
1863 pci_write_config32(ctrl->f2, DRAM_TIMING_HIGH, dth);
1866 static void set_Trwt(const struct mem_controller *ctrl, const struct mem_param *param)
1874 dtl = pci_read_config32(ctrl->f2, DRAM_TIMING_LOW);
1875 latency = (dtl >> DTL_TCL_SHIFT) & DTL_TCL_MASK;
1876 divisor = param->divisor;
1878 if (is_opteron(ctrl)) {
1879 if (latency == DTL_CL_2) {
1880 if (divisor == ((6 << 0) + 0)) {
1884 else if (divisor > ((6 << 0)+0)) {
1885 /* 100Mhz && 133Mhz */
1889 else if (latency == DTL_CL_2_5) {
1892 else if (latency == DTL_CL_3) {
1893 if (divisor == ((6 << 0)+0)) {
1897 else if (divisor > ((6 << 0)+0)) {
1898 /* 100Mhz && 133Mhz */
1903 else /* Athlon64 */ {
1904 if (is_registered(ctrl)) {
1905 if (latency == DTL_CL_2) {
1908 else if (latency == DTL_CL_2_5) {
1911 else if (latency == DTL_CL_3) {
1915 else /* Unbuffered */{
1916 if (latency == DTL_CL_2) {
1919 else if (latency == DTL_CL_2_5) {
1922 else if (latency == DTL_CL_3) {
1927 if ((clocks < DTH_TRWT_MIN) || (clocks > DTH_TRWT_MAX)) {
1928 die("Unknown Trwt\r\n");
1931 dth = pci_read_config32(ctrl->f2, DRAM_TIMING_HIGH);
1932 dth &= ~(DTH_TRWT_MASK << DTH_TRWT_SHIFT);
1933 dth |= ((clocks - DTH_TRWT_BASE) << DTH_TRWT_SHIFT);
1934 pci_write_config32(ctrl->f2, DRAM_TIMING_HIGH, dth);
1938 static void set_Twcl(const struct mem_controller *ctrl, const struct mem_param *param)
1940 /* Memory Clocks after CAS# */
1943 if (is_registered(ctrl)) {
1948 dth = pci_read_config32(ctrl->f2, DRAM_TIMING_HIGH);
1949 dth &= ~(DTH_TWCL_MASK << DTH_TWCL_SHIFT);
1950 dth |= ((clocks - DTH_TWCL_BASE) << DTH_TWCL_SHIFT);
1951 pci_write_config32(ctrl->f2, DRAM_TIMING_HIGH, dth);
1955 static void set_read_preamble(const struct mem_controller *ctrl, const struct mem_param *param)
1959 unsigned rdpreamble;
1960 divisor = param->divisor;
1961 dch = pci_read_config32(ctrl->f2, DRAM_CONFIG_HIGH);
1962 dch &= ~(DCH_RDPREAMBLE_MASK << DCH_RDPREAMBLE_SHIFT);
1964 if (is_registered(ctrl)) {
1965 if (divisor == ((10 << 1)+0)) {
1967 rdpreamble = ((9 << 1)+ 0);
1969 else if (divisor == ((7 << 1)+1)) {
1971 rdpreamble = ((8 << 1)+0);
1973 else if (divisor == ((6 << 1)+0)) {
1975 rdpreamble = ((7 << 1)+1);
1977 else if (divisor == ((5 << 1)+0)) {
1979 rdpreamble = ((7 << 1)+0);
1986 for(i = 0; i < 4; i++) {
1987 if (ctrl->channel0[i]) {
1991 if (divisor == ((10 << 1)+0)) {
1995 rdpreamble = ((9 << 1)+0);
1998 rdpreamble = ((14 << 1)+0);
2001 else if (divisor == ((7 << 1)+1)) {
2005 rdpreamble = ((7 << 1)+0);
2008 rdpreamble = ((11 << 1)+0);
2011 else if (divisor == ((6 << 1)+0)) {
2015 rdpreamble = ((7 << 1)+0);
2018 rdpreamble = ((9 << 1)+0);
2021 else if (divisor == ((5 << 1)+0)) {
2025 rdpreamble = ((5 << 1)+0);
2028 rdpreamble = ((7 << 1)+0);
2032 if ((rdpreamble < DCH_RDPREAMBLE_MIN) || (rdpreamble > DCH_RDPREAMBLE_MAX)) {
2033 die("Unknown rdpreamble");
2035 dch |= (rdpreamble - DCH_RDPREAMBLE_BASE) << DCH_RDPREAMBLE_SHIFT;
2036 pci_write_config32(ctrl->f2, DRAM_CONFIG_HIGH, dch);
2039 static void set_max_async_latency(const struct mem_controller *ctrl, const struct mem_param *param)
2045 dimms = count_dimms(ctrl);
2047 dch = pci_read_config32(ctrl->f2, DRAM_CONFIG_HIGH);
2048 dch &= ~(DCH_ASYNC_LAT_MASK << DCH_ASYNC_LAT_SHIFT);
2050 if (is_registered(ctrl)) {
2062 die("Too many unbuffered dimms");
2064 else if (dimms == 3) {
2073 dch |= ((async_lat - DCH_ASYNC_LAT_BASE) << DCH_ASYNC_LAT_SHIFT);
2074 pci_write_config32(ctrl->f2, DRAM_CONFIG_HIGH, dch);
2077 static void set_idle_cycle_limit(const struct mem_controller *ctrl, const struct mem_param *param)
2080 /* AMD says to Hardcode this */
2081 dch = pci_read_config32(ctrl->f2, DRAM_CONFIG_HIGH);
2082 dch &= ~(DCH_IDLE_LIMIT_MASK << DCH_IDLE_LIMIT_SHIFT);
2083 dch |= DCH_IDLE_LIMIT_16 << DCH_IDLE_LIMIT_SHIFT;
2084 dch |= DCH_DYN_IDLE_CTR_EN;
2085 pci_write_config32(ctrl->f2, DRAM_CONFIG_HIGH, dch);
2088 static long spd_set_dram_timing(const struct mem_controller *ctrl, const struct mem_param *param, long dimm_mask)
2092 init_Tref(ctrl, param);
2093 for(i = 0; i < DIMM_SOCKETS; i++) {
2095 if (!(dimm_mask & (1 << i))) {
2098 /* DRAM Timing Low Register */
2099 if ((rc = update_dimm_Trc (ctrl, param, i)) <= 0) goto dimm_err;
2100 if ((rc = update_dimm_Trfc(ctrl, param, i)) <= 0) goto dimm_err;
2101 if ((rc = update_dimm_Trcd(ctrl, param, i)) <= 0) goto dimm_err;
2102 if ((rc = update_dimm_Trrd(ctrl, param, i)) <= 0) goto dimm_err;
2103 if ((rc = update_dimm_Tras(ctrl, param, i)) <= 0) goto dimm_err;
2104 if ((rc = update_dimm_Trp (ctrl, param, i)) <= 0) goto dimm_err;
2106 /* DRAM Timing High Register */
2107 if ((rc = update_dimm_Tref(ctrl, param, i)) <= 0) goto dimm_err;
2110 /* DRAM Config Low */
2111 if ((rc = update_dimm_x4 (ctrl, param, i)) <= 0) goto dimm_err;
2112 if ((rc = update_dimm_ecc(ctrl, param, i)) <= 0) goto dimm_err;
2118 dimm_mask = disable_dimm(ctrl, i, dimm_mask);
2120 /* DRAM Timing Low Register */
2121 set_Twr(ctrl, param);
2123 /* DRAM Timing High Register */
2124 set_Twtr(ctrl, param);
2125 set_Trwt(ctrl, param);
2126 set_Twcl(ctrl, param);
2128 /* DRAM Config High */
2129 set_read_preamble(ctrl, param);
2130 set_max_async_latency(ctrl, param);
2131 set_idle_cycle_limit(ctrl, param);
2135 static void sdram_set_spd_registers(const struct mem_controller *ctrl)
2137 struct spd_set_memclk_result result;
2138 const struct mem_param *param;
2141 if (!controller_present(ctrl)) {
2142 // print_debug("No memory controller present\r\n");
2146 hw_enable_ecc(ctrl);
2147 activate_spd_rom(ctrl);
2148 dimm_mask = spd_detect_dimms(ctrl);
2149 if (!(dimm_mask & ((1 << DIMM_SOCKETS) - 1))) {
2150 print_debug("No memory for this cpu\r\n");
2153 dimm_mask = spd_enable_2channels(ctrl, dimm_mask);
2156 dimm_mask = spd_set_ram_size(ctrl , dimm_mask);
2159 dimm_mask = spd_handle_unbuffered_dimms(ctrl, dimm_mask);
2162 result = spd_set_memclk(ctrl, dimm_mask);
2163 param = result.param;
2164 dimm_mask = result.dimm_mask;
2167 dimm_mask = spd_set_dram_timing(ctrl, param , dimm_mask);
2173 /* Unrecoverable error reading SPD data */
2174 print_err("SPD error - reset\r\n");
2179 #define TIMEOUT_LOOPS 300000
2180 static void sdram_enable(int controllers, const struct mem_controller *ctrl)
2184 /* Error if I don't have memory */
2185 if (memory_end_k(ctrl, controllers) == 0) {
2186 die("No memory\r\n");
2189 /* Before enabling memory start the memory clocks */
2190 for(i = 0; i < controllers; i++) {
2192 if (!controller_present(ctrl + i))
2194 dch = pci_read_config32(ctrl[i].f2, DRAM_CONFIG_HIGH);
2195 if (dch & (DCH_MEMCLK_EN0|DCH_MEMCLK_EN1|DCH_MEMCLK_EN2|DCH_MEMCLK_EN3)) {
2196 dch |= DCH_MEMCLK_VALID;
2197 pci_write_config32(ctrl[i].f2, DRAM_CONFIG_HIGH, dch);
2200 /* Disable dram receivers */
2202 dcl = pci_read_config32(ctrl[i].f2, DRAM_CONFIG_LOW);
2203 dcl |= DCL_DisInRcvrs;
2204 pci_write_config32(ctrl[i].f2, DRAM_CONFIG_LOW, dcl);
2208 /* And if necessary toggle the the reset on the dimms by hand */
2209 memreset(controllers, ctrl);
2211 for(i = 0; i < controllers; i++) {
2213 if (!controller_present(ctrl + i))
2215 /* Skip everything if I don't have any memory on this controller */
2216 dch = pci_read_config32(ctrl[i].f2, DRAM_CONFIG_HIGH);
2217 if (!(dch & DCH_MEMCLK_VALID)) {
2221 /* Toggle DisDqsHys to get it working */
2222 dcl = pci_read_config32(ctrl[i].f2, DRAM_CONFIG_LOW);
2223 if (dcl & DCL_DimmEccEn) {
2225 print_spew("ECC enabled\r\n");
2226 mnc = pci_read_config32(ctrl[i].f3, MCA_NB_CONFIG);
2228 if (dcl & DCL_128BitEn) {
2229 mnc |= MNC_CHIPKILL_EN;
2231 pci_write_config32(ctrl[i].f3, MCA_NB_CONFIG, mnc);
2233 dcl |= DCL_DisDqsHys;
2234 pci_write_config32(ctrl[i].f2, DRAM_CONFIG_LOW, dcl);
2235 dcl &= ~DCL_DisDqsHys;
2236 dcl &= ~DCL_DLL_Disable;
2239 dcl |= DCL_DramInit;
2240 pci_write_config32(ctrl[i].f2, DRAM_CONFIG_LOW, dcl);
2243 for(i = 0; i < controllers; i++) {
2245 if (!controller_present(ctrl + i))
2247 /* Skip everything if I don't have any memory on this controller */
2248 dch = pci_read_config32(ctrl[i].f2, DRAM_CONFIG_HIGH);
2249 if (!(dch & DCH_MEMCLK_VALID)) {
2253 print_debug("Initializing memory: ");
2256 dcl = pci_read_config32(ctrl[i].f2, DRAM_CONFIG_LOW);
2258 if ((loops & 1023) == 0) {
2261 } while(((dcl & DCL_DramInit) != 0) && (loops < TIMEOUT_LOOPS));
2262 if (loops >= TIMEOUT_LOOPS) {
2263 print_debug(" failed\r\n");
2266 if (!is_cpu_pre_c0()) {
2267 /* Wait until it is safe to touch memory */
2268 dcl &= ~(DCL_MemClrStatus | DCL_DramEnable);
2269 pci_write_config32(ctrl[i].f2, DRAM_CONFIG_LOW, dcl);
2271 dcl = pci_read_config32(ctrl[i].f2, DRAM_CONFIG_LOW);
2272 } while(((dcl & DCL_MemClrStatus) == 0) || ((dcl & DCL_DramEnable) == 0) );
2275 // init e0 mem hole here
2276 #if K8_E0_MEM_HOLE_SIZEK != 0
2277 if (!is_cpu_pre_e0()) {
2278 uint32_t base, limit;
2279 unsigned base_k, limit_k;
2280 base = pci_read_config32(ctrl->f1, 0x40 + (i << 3));
2281 limit = pci_read_config32(ctrl->f1, 0x44 + (i << 3));
2282 base_k = (base & 0xffff0000) >> 2;
2283 limit_k = ((limit + 0x00010000) & 0xffff0000) >> 2;
2284 if ((base_k <= K8_E0_MEM_HOLE_BASEK) && (limit_k > K8_E0_MEM_HOLE_BASEK)) {
2285 set_e0_mem_hole(ctrl+i, base_k);
2291 print_debug(" done\r\n");
2294 //FIXME add enable node interleaving here --yhlu
2296 1. check how many nodes we have , if not all has ram installed get out
2297 2. check cs_base lo is 0, node 0 f2 0x40,,,,, if any one is not using lo is CS_BASE, get out
2298 3. check if other node is the same as node 0 about f2 0x40,,,,, otherwise get out
2299 4. if all ready enable node_interleaving in f1 0x40..... of every node
2300 5. for node interleaving we need to set mem hole to every node ( need recalcute hole offset in f0 for every node)
2304 /* Make certain the first 1M of memory is intialized */
2305 print_debug("Clearing initial memory region: ");
2307 /* Use write combine caching while we setup the first 1M */
2308 cache_lbmem(MTRR_TYPE_WRCOMB);
2310 /* clear memory 1meg */
2311 clear_memory((void *)0, CONFIG_LB_MEM_TOPK << 10);
2313 /* The first 1M is now setup, use it */
2314 cache_lbmem(MTRR_TYPE_WRBACK);
2316 print_debug(" done\r\n");