2 * This file is part of the coreboot project.
4 * Copyright (C) 2005 YingHai Lu
5 * Copyright (C) 2008 Advanced Micro Devices, Inc.
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; version 2 of the License.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 //0: mean no debug info
22 #define DQS_TRAIN_DEBUG 0
24 static inline void print_debug_dqs(const char *str, unsigned val, unsigned level)
26 #if DQS_TRAIN_DEBUG > 0
27 if(DQS_TRAIN_DEBUG > level) {
28 printk(BIOS_DEBUG, "%s%x\n", str, val);
33 static inline void print_debug_dqs_pair(const char *str, unsigned val, const char *str2, unsigned val2, unsigned level)
35 #if DQS_TRAIN_DEBUG > 0
36 if(DQS_TRAIN_DEBUG > level) {
37 printk(BIOS_DEBUG, "%s%08x%s%08x\n", str, val, str2, val2);
42 static inline void print_debug_dqs_tsc(const char *str, unsigned i, unsigned val, unsigned val2, unsigned level)
44 #if DQS_TRAIN_DEBUG > 0
45 if(DQS_TRAIN_DEBUG > level) {
46 printk(BIOS_DEBUG, "%s[%02x]=%08x%08x\n", str, i, val, val2);
51 static inline void print_debug_dqs_tsc_x(const char *str, unsigned i, unsigned val, unsigned val2)
53 printk(BIOS_DEBUG, "%s[%02x]=%08x%08x\n", str, i, val, val2);
57 static void fill_mem_cs_sysinfo(unsigned nodeid, const struct mem_controller *ctrl, struct sys_info *sysinfo)
61 sysinfo->mem_base[nodeid] = pci_read_config32(ctrl->f1, 0x40 + (nodeid<<3));
64 sysinfo->cs_base[nodeid*8+i] = pci_read_config32(ctrl->f2, 0x40 + (i<<2));
67 sysinfo->hole_reg[nodeid] = pci_read_config32(ctrl->f1, 0xf0);
70 static unsigned Get_MCTSysAddr(const struct mem_controller *ctrl, unsigned cs_idx, struct sys_info *sysinfo)
74 unsigned nodeid = ctrl->node_id;
76 #if CONFIG_HW_MEM_HOLE_SIZEK != 0
80 //get the local base addr of the chipselect
81 dword = sysinfo->cs_base[nodeid * 8 + cs_idx];
84 //sys addr= node base + local cs base
85 mem_base = sysinfo->mem_base[nodeid];
86 mem_base &= 0xffff0000;
89 #if CONFIG_HW_MEM_HOLE_SIZEK != 0
90 hole_reg = sysinfo->hole_reg[nodeid];
93 hole_startk = (hole_reg & (0xff<<24)) >> 10;
94 if( (dword >= (hole_startk<<2)) && (dword < ((4*1024*1024)<<2))) {
95 dword += ((4*1024*1024 - hole_startk)<<2);
100 //add 1MB offset to avoid compat area
101 dword += (1<<(20-8));
103 //So final result is upper 32 bit addr
109 static unsigned Get_RcvrSysAddr(const struct mem_controller * ctrl, unsigned channel, unsigned cs_idx, struct sys_info *sysinfo)
111 return Get_MCTSysAddr(ctrl, cs_idx, sysinfo);
115 static inline unsigned long read_cr4(void)
118 asm volatile ("movl %%cr4, %0" : "=r" (cr4));
122 static inline void write_cr4(unsigned long cr4)
124 asm volatile ("movl %0, %%cr4" : : "r" (cr4));
128 static inline void enable_sse2(void)
136 static inline void disable_sse2(void)
145 static void set_wrap32dis(void) {
148 msr = rdmsr(0xc0010015);
151 wrmsr(0xc0010015, msr);
155 static void clear_wrap32dis(void) {
158 msr = rdmsr(0xc0010015);
161 wrmsr(0xc0010015, msr);
165 static void set_FSBASE(uint32_t addr_hi)
169 //set fs and use fs prefix to access the mem
172 wrmsr(0xc0000100, msr); //FS_BASE
176 static unsigned ChipSelPresent(const struct mem_controller *ctrl, unsigned cs_idx, struct sys_info *sysinfo)
179 unsigned nodeid = ctrl->node_id;
182 enabled = sysinfo->cs_base[nodeid * 8 + cs_idx];
189 static unsigned RcvrRankEnabled(const struct mem_controller *ctrl, int channel, int cs_idx, unsigned is_Width128, struct sys_info *sysinfo)
191 return ChipSelPresent(ctrl, cs_idx, sysinfo);
194 static void WriteLNTestPattern(unsigned addr_lo, uint8_t *buf_a, unsigned line_num)
198 "movdqa (%3), %%xmm0\n\t"
199 "movntdq %%xmm0, %%fs:(%0)\n\t" /* xmm0 is 128 bit */
204 :: "a" (addr_lo), "d" (16), "c" (line_num * 4), "b"(buf_a)
210 static void Write1LTestPattern(unsigned addr, unsigned p, uint8_t *buf_a, uint8_t *buf_b)
213 if(p==1) { buf = buf_b; }
214 else { buf = buf_a; }
216 set_FSBASE (addr>>24);
218 WriteLNTestPattern(addr<<8, buf, 1);
221 static void Read1LTestPattern(unsigned addr)
225 set_FSBASE(addr>>24);
227 /* 1st move causes read fill (to exclusive or shared)*/
229 "movl %%fs:(%1), %0\n\t"
230 :"=b"(value): "a" (addr<<8)
238 #define DQS_FIRST_PASS 1
239 #define DQS_SECOND_PASS 2
241 #define SB_NORCVREN 11
242 #define RCVREN_MARGIN 6
243 #define SB_SmallRCVR 13
244 #define SB_CHA2BRCVREN 12
245 #define SB_NODQSPOS 14
246 #define MIN_DQS_WNDW 3
247 #define SB_SMALLDQS 15
250 static unsigned CompareTestPatternQW0(unsigned channel, unsigned addr, unsigned pattern, const uint32_t *TestPattern0, const uint32_t *TestPattern1, const uint32_t *TestPattern2, unsigned Pass, unsigned is_Width128)
256 unsigned result = DQS_FAIL;
258 if(Pass == DQS_FIRST_PASS) {
260 test_buf = (uint32_t *)TestPattern1;
263 test_buf = (uint32_t *)TestPattern0;
267 test_buf = (uint32_t *)TestPattern2;
270 set_FSBASE(addr>>24);
274 if(is_Width128 && (channel == 1)) {
275 addr_lo += 8; //second channel
280 "movl %%fs:(%1), %0\n\t"
281 :"=b"(value): "a" (addr_lo)
284 value_test = *test_buf;
287 print_debug_dqs_pair("\t\t\t\t\t\tQW0.lo : test_buf= ", (unsigned)test_buf, " value = ", value_test, 4);
288 print_debug_dqs_pair("\t\t\t\t\t\tQW0.lo : addr_lo = ", addr_lo, " value = ", value, 4);
290 if(value == value_test) {
294 "movl %%fs:(%1), %0\n\t"
295 :"=b"(value): "a" (addr_lo)
297 value_test = *test_buf;
298 print_debug_dqs_pair("\t\t\t\t\t\tQW0.hi : test_buf= ", (unsigned)test_buf, " value = ", value_test, 4);
299 print_debug_dqs_pair("\t\t\t\t\t\tQW0.hi : addr_lo = ", addr_lo, " value = ", value, 4);
301 if(value == value_test){
306 if(Pass == DQS_SECOND_PASS) { // second pass need to be inverted
307 if(result==DQS_PASS) {
319 static void SetMaxAL_RcvrDly(const struct mem_controller *ctrl, unsigned dly)
323 dly += (20-1); // round it
324 dly /= 20; // convert from unit 50ps to 1ns
329 reg = pci_read_config32(ctrl->f2, DRAM_CONFIG_HIGH);
330 reg &= ~(DCH_MaxAsyncLat_MASK <<DCH_MaxAsyncLat_SHIFT);
331 reg |= ((dly - DCH_MaxAsyncLat_BASE) << DCH_MaxAsyncLat_SHIFT);
332 pci_write_config32(ctrl->f2, DRAM_CONFIG_HIGH, reg);
337 Set the Target range to WT IO (using an IORR overlapping the already existing
338 WB dram type). Use IORR0
340 static void SetTargetWTIO(unsigned addr)
345 wrmsr(0xc0010016, msr); //IORR0 BASE
348 msr.lo = 0xfc000800; // 64MB Mask
349 wrmsr(0xc0010017, msr); // IORR0 Mask
352 static void ResetTargetWTIO(void)
358 wrmsr(0xc0010017, msr); // IORR0 Mask
361 static void proc_CLFLUSH(unsigned addr)
364 set_FSBASE(addr>>24);
366 /* 1st move causes read fill (to exclusive or shared)*/
368 /* clflush fs:[eax] */
369 "clflush %%fs:(%0)\n\t"
374 static void proc_IOCLFLUSH(unsigned addr)
381 static void ResetDCTWrPtr(const struct mem_controller *ctrl)
384 unsigned index = 0x10;
386 dword = pci_read_config32_index_wait(ctrl->f2, 0x98, index);
387 pci_write_config32_index_wait(ctrl->f2, 0x98, index, dword);
390 dword = pci_read_config32_index_wait(ctrl->f2, 0x98, index);
391 pci_write_config32_index_wait(ctrl->f2, 0x98, index, dword);
396 static uint16_t get_exact_T1000(unsigned i)
399 static const uint16_t T1000_a[]= { 5000, 3759, 3003, 2500 };
401 static const uint16_t TT_a[] = {
403 /*4 */ 6250, 6250, 6250, 6250,
404 /*5 */ 5000, 5000, 5000, 2500,
405 /*6 */ 5000, 4166, 4166, 2500,
406 /*7 */ 5000, 4285, 3571, 2500,
408 /*8 */ 5000, 3750, 3125, 2500,
409 /*9 */ 5000, 3888, 3333, 2500,
410 /*10*/ 5000, 4000, 3000, 2500,
411 /*11*/ 5000, 4090, 3181, 2500,
413 /*12*/ 5000, 3750, 3333, 2500,
414 /*13*/ 5000, 3846, 3076, 2500,
415 /*14*/ 5000, 3928, 3214, 2500,
416 /*15*/ 5000, 4000, 3000, 2500,
422 /* Check for FID control support */
423 struct cpuid_result cpuid1;
424 cpuid1 = cpuid(0x80000007);
425 if( cpuid1.edx & 0x02 ) {
426 /* Use current FID */
428 msr = rdmsr(0xc0010042);
429 fid_cur = msr.lo & 0x3f;
433 /* Use startup FID */
435 msr = rdmsr(0xc0010015);
436 fid_start = (msr.lo & (0x3f << 24));
438 index = fid_start>>25;
441 if(index>12) return T1000_a[i];
443 return TT_a[index * 4+i];
447 static void InitDQSPos4RcvrEn(const struct mem_controller *ctrl)
453 for(i=1; i<=3; i++) {
454 /* Program the DQS Write Timing Control Registers (Function 2:Offset 0x9c, index 0x01-0x03, 0x21-0x23) to 0x00 for all bytes */
455 pci_write_config32_index_wait(ctrl->f2, 0x98, i, dword);
456 pci_write_config32_index_wait(ctrl->f2, 0x98, i+0x20, dword);
460 for(i=5; i<=7; i++) {
461 /* Program the DQS Write Timing Control Registers (Function 2:Offset 0x9c, index 0x05-0x07, 0x25-0x27) to 0x2f for all bytes */
462 pci_write_config32_index_wait(ctrl->f2, 0x98, i, dword);
463 pci_write_config32_index_wait(ctrl->f2, 0x98, i+0x20, dword);
468 #ifndef K8_REV_F_SUPPORT_F0_F1_WORKAROUND
469 #define K8_REV_F_SUPPORT_F0_F1_WORKAROUND 1
472 static unsigned TrainRcvrEn(const struct mem_controller *ctrl, unsigned Pass, struct sys_info *sysinfo)
475 static const uint32_t TestPattern0[] = {
476 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
477 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
478 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
479 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
481 static const uint32_t TestPattern1[] = {
482 0x55555555, 0x55555555, 0x55555555, 0x55555555,
483 0x55555555, 0x55555555, 0x55555555, 0x55555555,
484 0x55555555, 0x55555555, 0x55555555, 0x55555555,
485 0x55555555, 0x55555555, 0x55555555, 0x55555555,
487 static const uint32_t TestPattern2[] = {
488 0x12345678, 0x87654321, 0x23456789, 0x98765432,
489 0x59385824, 0x30496724, 0x24490795, 0x99938733,
490 0x40385642, 0x38465245, 0x29432163, 0x05067894,
491 0x12349045, 0x98723467, 0x12387634, 0x34587623,
494 uint8_t pattern_buf_x[64 * 4 + 16]; // We need to two cache line So have more 16 bytes to keep 16 byte alignment */
495 uint8_t *buf_a, *buf_b;
498 uint8_t *dqs_rcvr_dly_a = &sysinfo->dqs_rcvr_dly_a[ctrl->node_id * 2* 8] ; //8 node, channel 2, receiver 8
502 unsigned channel, receiver;
505 unsigned CTLRMaxDelay;
510 unsigned Test0, Test1;
512 unsigned RcvrEnDlyRmin;
520 unsigned TestAddr0, TestAddr0B, TestAddr1 = 0, TestAddr1B = 0;
522 unsigned CurrRcvrCHADelay = 0;
526 unsigned is_Width128 = sysinfo->meminfo[ctrl->node_id].is_Width128;
528 #if K8_REV_F_SUPPORT_F0_F1_WORKAROUND == 1
532 if(Pass == DQS_FIRST_PASS) {
533 InitDQSPos4RcvrEn(ctrl);
543 dword = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
544 ecc_bit = dword & DCL_DimmEccEn;
545 dword &= ~(DCL_DimmEccEn);
546 pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dword);
549 if(Pass == DQS_FIRST_PASS) {
550 #if K8_REV_F_SUPPORT_F0_F1_WORKAROUND == 1
551 cpu_f0_f1 = is_cpu_pre_f2_in_bsp(ctrl->node_id);
556 /* Set the DqsRcvEnTrain bit */
557 dword = pci_read_config32(ctrl->f2, DRAM_CTRL);
558 dword |= DC_DqsRcvEnTrain;
559 pci_write_config32(ctrl->f2, DRAM_CTRL, dword);
564 //get T1000 figures (cycle time (ns)) * 1K
565 dword = pci_read_config32(ctrl->f2, DRAM_CONFIG_HIGH);
566 dword &= DCH_MemClkFreq_MASK;
568 T1000 = get_exact_T1000(dword);
571 buf_a = (uint8_t *)(((uint32_t)(&pattern_buf_x[0]) + 0x10) & (0xfffffff0));
572 buf_b = buf_a + 128; //??
573 if(Pass==DQS_FIRST_PASS) {
575 *((uint32_t *)(buf_a + i*4)) = TestPattern0[i];
576 *((uint32_t *)(buf_b + i*4)) = TestPattern1[i];
581 *((uint32_t *)(buf_a + i*4)) = TestPattern2[i];
582 *((uint32_t *)(buf_b + i*4)) = TestPattern2[i];
586 print_debug_dqs("\nTrainRcvEn: 0 ctrl", ctrl->node_id, 0);
588 print_debug_addr("TrainRcvEn: buf_a:", buf_a);
591 /* for each channel */
595 if (!(sysinfo->meminfo[ctrl->node_id].dimm_mask & 0x0F) &&
596 (sysinfo->meminfo[ctrl->node_id].dimm_mask & 0xF0)) { /* channelB only? */
600 for ( ; (channel < 2) && (!Errors); channel++)
602 print_debug_dqs("\tTrainRcvEn51: channel ",channel, 1);
605 /* there are four recriver pairs, loosely associated with CS */
606 for( receiver = 0; (receiver < 8) && (!Errors); receiver+=2)
609 unsigned index=(receiver>>1) * 3 + 0x10;
611 print_debug_dqs("\t\tTrainRcvEn52: index ", index, 2);
615 dword = pci_read_config32_index_wait(ctrl->f2, 0x98, index);
616 CurrRcvrCHADelay= dword & 0xff;
626 RcvrEnDlyRmin = 0xaf;
628 if(!RcvrRankEnabled(ctrl, channel, receiver, is_Width128, sysinfo)) continue;
630 /* for each DQS receiver enable setting */
632 TestAddr0 = Get_RcvrSysAddr(ctrl, channel, receiver, sysinfo);
634 TestAddr0B = TestAddr0 + (1<<(20+2-8)); // 4MB
636 if(RcvrRankEnabled(ctrl, channel, receiver+1, is_Width128, sysinfo)) {
637 TestAddr1 = Get_RcvrSysAddr(ctrl, channel, receiver+1, sysinfo);
638 TestAddr1B = TestAddr1 + (1<<(20+2-8)); //4MB
645 print_debug_dqs("\t\tTrainRcvEn53: TestAddr0B ", TestAddr0B, 2);
647 Write1LTestPattern(TestAddr0, 0, buf_a, buf_b); // rank0 of dimm, test p0
648 Write1LTestPattern(TestAddr0B, 1, buf_a, buf_b); //rank0 of dimm, test p1
651 Write1LTestPattern(TestAddr1, 0, buf_a, buf_b); //rank 1 of dimm
652 Write1LTestPattern(TestAddr1B, 1, buf_a, buf_b);//rank 1 of dimm
655 if(Pass == DQS_FIRST_PASS) {
658 RcvrEnDly = dqs_rcvr_dly_a[channel * 8 + receiver];
661 while ( RcvrEnDly < 0xaf) { // Sweep Delay value here
662 print_debug_dqs("\t\t\tTrainRcvEn541: RcvrEnDly ", RcvrEnDly, 3);
665 /* Odd steps get another pattern such that even
666 and odd steps alternate.
667 The pointers to the patterns will be swapped
668 at the end of the loop so they are correspond
679 /* Program current Receiver enable delay */
680 pci_write_config32_index_wait(ctrl->f2, 0x98, index, RcvrEnDly);
681 /* FIXME: 64bit MUX */
684 /* Program current Receiver enable delay chaannel b */
685 pci_write_config32_index_wait(ctrl->f2, 0x98, index+ 0x20, RcvrEnDly);
688 /* Program the MaxAsyncLat filed with the
689 current DQS receiver enable setting plus 6ns
691 /*Porgram MaxAsyncLat to correspond with current delay */
692 SetMaxAL_RcvrDly(ctrl, RcvrEnDly);
696 Read1LTestPattern(TestAddr0); //Cache Fill
697 /* ROM vs cache compare */
698 Test0 = CompareTestPatternQW0(channel, TestAddr0, PatternA, TestPattern0, TestPattern1, TestPattern2, Pass, is_Width128);
699 proc_IOCLFLUSH(TestAddr0);
703 print_debug_dqs("\t\t\tTrainRcvEn542: Test0 ", Test0, 3);
705 if(Test0 == DQS_PASS) {
707 Read1LTestPattern(TestAddr0B);
708 Test1 = CompareTestPatternQW0(channel, TestAddr0B, PatternB, TestPattern0, TestPattern1, TestPattern2, Pass, is_Width128);
709 proc_IOCLFLUSH(TestAddr0B);
713 print_debug_dqs("\t\t\tTrainRcvEn543: Test1 ", Test1, 3);
715 if(Test1 == DQS_PASS) {
717 Read1LTestPattern(TestAddr1);
718 Test0 = CompareTestPatternQW0(channel, TestAddr1, PatternA, TestPattern0, TestPattern1, TestPattern2, Pass, is_Width128);
719 proc_IOCLFLUSH(TestAddr1);
722 if(Test0 == DQS_PASS) {
723 Read1LTestPattern(TestAddr1B);
724 Test1 = CompareTestPatternQW0(channel, TestAddr1B, PatternB, TestPattern0, TestPattern1, TestPattern2, Pass, is_Width128);
725 proc_IOCLFLUSH(TestAddr1B);
728 if(Test1 == DQS_PASS) {
732 print_debug_dqs("\t\t\tTrainRcvEn544: Test0 ", Test0, 3);
740 print_debug_dqs("\t\t\tTrainRcvEn55: RcvrEnDly ", RcvrEnDly, 3);
742 if(CurrTest == DQS_PASS) {
743 if(LastTest == DQS_FAIL) {
744 RcvrEnDlyRmin = RcvrEnDly;
751 /* swap the rank 0 pointers */
753 TestAddr0 = TestAddr0B;
756 /* swap the rank 1 pointers */
758 TestAddr1 = TestAddr1B;
761 print_debug_dqs("\t\t\tTrainRcvEn56: RcvrEnDly ", RcvrEnDly, 3);
767 print_debug_dqs("\t\tTrainRcvEn61: RcvrEnDly ", RcvrEnDly, 2);
769 if(RcvrEnDlyRmin == 0xaf) {
771 Errors |= SB_NORCVREN;
774 if(Pass == DQS_FIRST_PASS) {
775 // We need a better value for DQSPos trainning
776 RcvrEnDly = RcvrEnDlyRmin /* + RCVREN_MARGIN * T1000/64/50 */;
778 RcvrEnDly = RcvrEnDlyRmin;
781 if(RcvrEnDly > 0xae) {
782 //passing window too narrow, too far delayed
783 Errors |= SB_SmallRCVR;
787 if(Pass == DQS_SECOND_PASS) { //second pass must average vales
788 RcvrEnDly += dqs_rcvr_dly_a[channel * 8 + receiver] /* - (RCVREN_MARGIN * T1000/64/50)*/;
792 dqs_rcvr_dly_a[channel * 8 + receiver] = RcvrEnDly;
794 //Set final RcvrEnDly for this DIMM and Channel
795 pci_write_config32_index_wait(ctrl->f2, 0x98, index, RcvrEnDly);
798 pci_write_config32_index_wait(ctrl->f2, 0x98, index+0x20, RcvrEnDly); // channel B
800 pci_write_config32_index_wait(ctrl->f2, 0x98, index, CurrRcvrCHADelay);
801 if(RcvrEnDly > CurrRcvrCHADelay) {
802 dword = RcvrEnDly - CurrRcvrCHADelay;
805 dword = CurrRcvrCHADelay - RcvrEnDly;
809 Errors |= SB_CHA2BRCVREN;
814 print_debug_dqs("\t\tTrainRcvEn63: RcvrEnDly ", RcvrEnDly, 2);
816 if(RcvrEnDly > CTLRMaxDelay) {
817 CTLRMaxDelay = RcvrEnDly;
820 print_debug_dqs("\t\tTrainRcvEn64: CTLRMaxDelay ", CTLRMaxDelay, 2);
825 print_debug_dqs("\tTrainRcvEn65: CTLRMaxDelay ", CTLRMaxDelay, 1);
827 /* Program the MaxAsysncLat field with the largest DQS Receiver Enable setting */
828 SetMaxAL_RcvrDly(ctrl, CTLRMaxDelay);
832 dword = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
833 dword &= ~(DCL_DimmEccEn);
835 pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dword);
837 if(Pass == DQS_FIRST_PASS) {
838 #if K8_REV_F_SUPPORT_F0_F1_WORKAROUND == 1
842 dword = pci_read_config32(ctrl->f2, DRAM_CTRL);
843 dword &= ~DC_DqsRcvEnTrain;
844 pci_write_config32(ctrl->f2, DRAM_CTRL, dword);
852 //restore SSE2 setting
855 #if CONFIG_MEM_TRAIN_SEQ != 1
856 /* We need tidy output for type 1 */
857 printk(BIOS_DEBUG, " CTLRMaxDelay=%02x\n", CTLRMaxDelay);
860 return (CTLRMaxDelay==0xae)?1:0;
864 #define DQS_READDIR 1
865 #define DQS_WRITEDIR 0
868 static void SetDQSDelayCSR(const struct mem_controller *ctrl, unsigned channel, unsigned bytelane, unsigned direction, unsigned dqs_delay)
869 { //ByteLane could be 0-8, last is for ECC
876 index = (bytelane>>2) + 1 + channel * 0x20 + (direction << 2);
881 shift <<= 3; // 8 bit
883 dword = pci_read_config32_index_wait(ctrl->f2, 0x98, index);
884 dword &= ~(0x3f<<shift);
885 dword |= (dqs_delay<<shift);
886 pci_write_config32_index_wait(ctrl->f2, 0x98, index, dword);
890 static void SetDQSDelayAllCSR(const struct mem_controller *ctrl, unsigned channel, unsigned direction, unsigned dqs_delay)
899 dword |= dqs_delay<<(i*8);
902 index = 1 + channel * 0x20 + direction * 4;
905 pci_write_config32_index_wait(ctrl->f2, 0x98, index + i, dword);
910 static unsigned MiddleDQS(unsigned min_d, unsigned max_d)
913 size_d = max_d-min_d;
914 if(size_d & 1) { //need round up
917 return ( min_d + (size_d>>1));
920 static inline void save_dqs_delay(unsigned channel, unsigned bytelane, unsigned direction, uint8_t *dqs_delay_a, uint8_t dqs_delay)
922 dqs_delay_a[channel * 2*9 + direction * 9 + bytelane] = dqs_delay;
925 static void WriteDQSTestPattern(unsigned addr_lo, unsigned pattern , uint8_t *buf_a)
927 WriteLNTestPattern(addr_lo, buf_a, (pattern+1) * 9);
930 static void ReadL18TestPattern(unsigned addr_lo)
932 //set fs and use fs prefix to access the mem
934 "movl %%fs:-128(%%esi), %%eax\n\t" //TestAddr cache line
935 "movl %%fs:-64(%%esi), %%eax\n\t" //+1
936 "movl %%fs:(%%esi), %%eax\n\t" //+2
937 "movl %%fs:64(%%esi), %%eax\n\t" //+3
939 "movl %%fs:-128(%%edi), %%eax\n\t" //+4
940 "movl %%fs:-64(%%edi), %%eax\n\t" //+5
941 "movl %%fs:(%%edi), %%eax\n\t" //+6
942 "movl %%fs:64(%%edi), %%eax\n\t" //+7
944 "movl %%fs:-128(%%ebx), %%eax\n\t" //+8
945 "movl %%fs:-64(%%ebx), %%eax\n\t" //+9
946 "movl %%fs:(%%ebx), %%eax\n\t" //+10
947 "movl %%fs:64(%%ebx), %%eax\n\t" //+11
949 "movl %%fs:-128(%%ecx), %%eax\n\t" //+12
950 "movl %%fs:-64(%%ecx), %%eax\n\t" //+13
951 "movl %%fs:(%%ecx), %%eax\n\t" //+14
952 "movl %%fs:64(%%ecx), %%eax\n\t" //+15
954 "movl %%fs:-128(%%edx), %%eax\n\t" //+16
955 "movl %%fs:-64(%%edx), %%eax\n\t" //+17
957 :: "a"(0), "b" (addr_lo+128+8*64), "c" (addr_lo+128+12*64), "d" (addr_lo +128+16*64), "S"(addr_lo+128), "D"(addr_lo+128+4*64)
962 static void ReadL9TestPattern(unsigned addr_lo)
965 //set fs and use fs prefix to access the mem
968 "movl %%fs:-128(%%ecx), %%eax\n\t" //TestAddr cache line
969 "movl %%fs:-64(%%ecx), %%eax\n\t" //+1
970 "movl %%fs:(%%ecx), %%eax\n\t" //+2
971 "movl %%fs:64(%%ecx), %%eax\n\t" //+3
973 "movl %%fs:-128(%%edx), %%eax\n\t" //+4
974 "movl %%fs:-64(%%edx), %%eax\n\t" //+5
975 "movl %%fs:(%%edx), %%eax\n\t" //+6
976 "movl %%fs:64(%%edx), %%eax\n\t" //+7
978 "movl %%fs:-128(%%ebx), %%eax\n\t" //+8
980 :: "a"(0), "b" (addr_lo+128+8*64), "c"(addr_lo+128), "d"(addr_lo+128+4*64)
986 static void ReadDQSTestPattern(unsigned addr_lo, unsigned pattern)
989 ReadL9TestPattern(addr_lo);
992 ReadL18TestPattern(addr_lo);
996 static void FlushDQSTestPattern_L9(unsigned addr_lo)
999 "clflush %%fs:-128(%%ecx)\n\t"
1000 "clflush %%fs:-64(%%ecx)\n\t"
1001 "clflush %%fs:(%%ecx)\n\t"
1002 "clflush %%fs:64(%%ecx)\n\t"
1004 "clflush %%fs:-128(%%eax)\n\t"
1005 "clflush %%fs:-64(%%eax)\n\t"
1006 "clflush %%fs:(%%eax)\n\t"
1007 "clflush %%fs:64(%%eax)\n\t"
1009 "clflush %%fs:-128(%%ebx)\n\t"
1011 :: "b" (addr_lo+128+8*64), "c"(addr_lo+128), "a"(addr_lo+128+4*64)
1015 static __attribute__((noinline)) void FlushDQSTestPattern_L18(unsigned addr_lo)
1018 "clflush %%fs:-128(%%eax)\n\t"
1019 "clflush %%fs:-64(%%eax)\n\t"
1020 "clflush %%fs:(%%eax)\n\t"
1021 "clflush %%fs:64(%%eax)\n\t"
1023 "clflush %%fs:-128(%%edi)\n\t"
1024 "clflush %%fs:-64(%%edi)\n\t"
1025 "clflush %%fs:(%%edi)\n\t"
1026 "clflush %%fs:64(%%edi)\n\t"
1028 "clflush %%fs:-128(%%ebx)\n\t"
1029 "clflush %%fs:-64(%%ebx)\n\t"
1030 "clflush %%fs:(%%ebx)\n\t"
1031 "clflush %%fs:64(%%ebx)\n\t"
1033 "clflush %%fs:-128(%%ecx)\n\t"
1034 "clflush %%fs:-64(%%ecx)\n\t"
1035 "clflush %%fs:(%%ecx)\n\t"
1036 "clflush %%fs:64(%%ecx)\n\t"
1038 "clflush %%fs:-128(%%edx)\n\t"
1039 "clflush %%fs:-64(%%edx)\n\t"
1041 :: "b" (addr_lo+128+8*64), "c" (addr_lo+128+12*64), "d" (addr_lo +128+16*64), "a"(addr_lo+128), "D"(addr_lo+128+4*64)
1045 static void FlushDQSTestPattern(unsigned addr_lo, unsigned pattern )
1049 FlushDQSTestPattern_L9(addr_lo);
1052 FlushDQSTestPattern_L18(addr_lo);
1056 static unsigned CompareDQSTestPattern(unsigned channel, unsigned addr_lo, unsigned pattern, uint8_t *buf_a)
1059 unsigned bitmap = 0xff;
1064 uint32_t value_test;
1066 test_buf = (uint32_t *)buf_a;
1069 if(pattern && channel) {
1070 addr_lo += 8; //second channel
1075 for(i=0;i<9*64/4;i++) {
1077 "movl %%fs:(%1), %0\n\t"
1078 :"=b"(value): "a" (addr_lo)
1080 value_test = *test_buf;
1082 print_debug_dqs_pair("\t\t\t\t\t\ttest_buf= ", (unsigned)test_buf, " value = ", value_test, 7);
1083 print_debug_dqs_pair("\t\t\t\t\t\ttaddr_lo = ",addr_lo, " value = ", value, 7);
1085 for(j=0;j<4*8;j+=8) {
1086 if(((value>>j)&0xff) != ((value_test>>j)& 0xff)) {
1087 bitmap &= ~(1<<bytelane);
1093 print_debug_dqs("\t\t\t\t\t\tbitmap = ", bitmap, 7);
1096 if(pattern == 1) { //dual channel
1097 addr_lo += 8; //skip over other channel's data
1111 static unsigned TrainDQSPos(const struct mem_controller *ctrl, unsigned channel, unsigned Direction, unsigned Pattern, uint8_t *buf_a, uint8_t *dqs_delay_a, struct sys_info *sysinfo)
1115 unsigned BanksPresent;
1117 unsigned MutualCSPassW[48];
1125 unsigned RnkDlyFilterMax, RnkDlyFilterMin = 0;
1126 unsigned RnkDlySeqPassMax, RnkDlySeqPassMin = 0;
1131 print_debug_dqs("\t\t\tTrainDQSPos begin ", 0, 3);
1133 printk(BIOS_DEBUG, "TrainDQSPos: MutualCSPassW[48] :%p\n", MutualCSPassW);
1135 for(DQSDelay=0; DQSDelay<48; DQSDelay++) {
1136 MutualCSPassW[DQSDelay] = 0xff; // Bitmapped status per delay setting, 0xff=All positions passing (1= PASS)
1139 for(ChipSel = 0; ChipSel < 8; ChipSel++) { //logical register chipselects 0..7
1140 print_debug_dqs("\t\t\t\tTrainDQSPos: 11 ChipSel ", ChipSel, 4);
1141 //FIXME: process 64MUXedMode
1142 if(!ChipSelPresent(ctrl, ChipSel, sysinfo)) continue;
1145 TestAddr = Get_MCTSysAddr(ctrl, ChipSel, sysinfo);
1147 print_debug_dqs("\t\t\t\tTrainDQSPos: 12 TestAddr ", TestAddr, 4);
1149 //set fs and use fs prefix to access the mem
1150 set_FSBASE(TestAddr>>24);
1152 if(Direction == DQS_READDIR) {
1153 print_debug_dqs("\t\t\t\tTrainDQSPos: 13 for read so write at first", 0, 4);
1154 WriteDQSTestPattern(TestAddr<<8, Pattern, buf_a);
1157 for(DQSDelay = 0; DQSDelay < 48; DQSDelay++ ){
1158 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 141 DQSDelay ", DQSDelay, 5);
1159 if(MutualCSPassW[DQSDelay] == 0) continue; //skip current delay value if other chipselects have failed all 8 bytelanes
1160 SetDQSDelayAllCSR(ctrl, channel, Direction, DQSDelay);
1161 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 142 MutualCSPassW ", MutualCSPassW[DQSDelay], 5);
1162 if(Direction == DQS_WRITEDIR) {
1163 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 143 for write", 0, 5);
1164 WriteDQSTestPattern(TestAddr<<8, Pattern, buf_a);
1166 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 144 Pattern ", Pattern, 5);
1167 ReadDQSTestPattern(TestAddr<<8, Pattern);
1168 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 145 MutualCSPassW ", MutualCSPassW[DQSDelay], 5);
1169 MutualCSPassW[DQSDelay] &= CompareDQSTestPattern(channel, TestAddr<<8, Pattern, buf_a); //0: fail, 1=pass
1170 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 146 MutualCSPassW ", MutualCSPassW[DQSDelay], 5);
1171 SetTargetWTIO(TestAddr);
1172 FlushDQSTestPattern(TestAddr<<8, Pattern);
1178 for(ByteLane = 0; ByteLane < 8; ByteLane++) {
1179 print_debug_dqs("\t\t\t\tTrainDQSPos: 31 ByteLane ",ByteLane, 4);
1181 LastTest = DQS_FAIL;
1182 RnkDlySeqPassMax = 0;
1183 RnkDlyFilterMax = 0;
1184 RnkDlyFilterMin = 0;
1185 for(DQSDelay=0; DQSDelay<48; DQSDelay++) {
1186 if(MutualCSPassW[DQSDelay] & (1<<ByteLane)) {
1188 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 321 DQSDelay ", DQSDelay, 5);
1189 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 322 MutualCSPassW ", MutualCSPassW[DQSDelay], 5);
1191 RnkDlySeqPassMax = DQSDelay;
1192 if(LastTest == DQS_FAIL) {
1193 RnkDlySeqPassMin = DQSDelay; //start sequential run
1195 if((RnkDlySeqPassMax - RnkDlySeqPassMin)>(RnkDlyFilterMax-RnkDlyFilterMin)){
1196 RnkDlyFilterMin = RnkDlySeqPassMin;
1197 RnkDlyFilterMax = RnkDlySeqPassMax;
1199 LastTest = DQS_PASS;
1202 LastTest = DQS_FAIL;
1205 print_debug_dqs("\t\t\t\tTrainDQSPos: 33 RnkDlySeqPassMax ", RnkDlySeqPassMax, 4);
1207 if(RnkDlySeqPassMax == 0) {
1208 Errors |= SB_NODQSPOS; // no passing window
1211 print_debug_dqs("\t\t\t\tTrainDQSPos: 34 RnkDlyFilterMax ", RnkDlyFilterMax, 4);
1212 print_debug_dqs("\t\t\t\tTrainDQSPos: 34 RnkDlyFilterMin ", RnkDlyFilterMin, 4);
1213 if((RnkDlyFilterMax - RnkDlyFilterMin)< MIN_DQS_WNDW){
1214 Errors |= SB_SMALLDQS;
1217 unsigned middle_dqs;
1218 middle_dqs = MiddleDQS(RnkDlyFilterMin, RnkDlyFilterMax);
1219 print_debug_dqs("\t\t\t\tTrainDQSPos: 35 middle_dqs ",middle_dqs, 4);
1220 SetDQSDelayCSR(ctrl, channel, ByteLane, Direction, middle_dqs);
1221 save_dqs_delay(channel, ByteLane, Direction, dqs_delay_a, middle_dqs);
1227 print_debug_dqs("\t\t\tTrainDQSPos: end", 0xff, 3);
1234 static unsigned TrainReadDQS(const struct mem_controller *ctrl, unsigned channel, unsigned pattern, uint8_t *buf_a, uint8_t *dqs_delay_a, struct sys_info *sysinfo)
1236 print_debug_dqs("\t\tTrainReadPos", 0, 2);
1237 return TrainDQSPos(ctrl, channel, DQS_READDIR, pattern, buf_a, dqs_delay_a, sysinfo);
1240 static unsigned TrainWriteDQS(const struct mem_controller *ctrl, unsigned channel, unsigned pattern, uint8_t *buf_a, uint8_t *dqs_delay_a, struct sys_info *sysinfo)
1242 print_debug_dqs("\t\tTrainWritePos", 0, 2);
1243 return TrainDQSPos(ctrl, channel, DQS_WRITEDIR, pattern, buf_a, dqs_delay_a, sysinfo);
1248 static unsigned TrainDQSRdWrPos(const struct mem_controller *ctrl, struct sys_info *sysinfo)
1250 static const uint32_t TestPatternJD1a[] = {
1251 0x00000000,0x00000000,0xFFFFFFFF,0xFFFFFFFF, // QW0-1, ALL-EVEN
1252 0x00000000,0x00000000,0x00000000,0x00000000, // QW2-3, ALL-EVEN
1253 0x00000000,0x00000000,0xFFFFFFFF,0xFFFFFFFF, // QW4-5, ALL-EVEN
1254 0x00000000,0x00000000,0x00000000,0x00000000, // QW6-7, ALL-EVEN
1255 0xFeFeFeFe,0xFeFeFeFe,0x01010101,0x01010101, // QW0-1, DQ0-ODD
1256 0xFeFeFeFe,0xFeFeFeFe,0x01010101,0x01010101, // QW2-3, DQ0-ODD
1257 0x01010101,0x01010101,0xFeFeFeFe,0xFeFeFeFe, // QW4-5, DQ0-ODD
1258 0xFeFeFeFe,0xFeFeFeFe,0x01010101,0x01010101, // QW6-7, DQ0-ODD
1259 0x02020202,0x02020202,0x02020202,0x02020202, // QW0-1, DQ1-ODD
1260 0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, // QW2-3, DQ1-ODD
1261 0xFdFdFdFd,0xFdFdFdFd,0x02020202,0x02020202, // QW4-5, DQ1-ODD
1262 0x02020202,0x02020202,0x02020202,0x02020202, // QW6-7, DQ1-ODD
1263 0x04040404,0x04040404,0xfBfBfBfB,0xfBfBfBfB, // QW0-1, DQ2-ODD
1264 0x04040404,0x04040404,0x04040404,0x04040404, // QW2-3, DQ2-ODD
1265 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW4-5, DQ2-ODD
1266 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW6-7, DQ2-ODD
1267 0x08080808,0x08080808,0xF7F7F7F7,0xF7F7F7F7, // QW0-1, DQ3-ODD
1268 0x08080808,0x08080808,0x08080808,0x08080808, // QW2-3, DQ3-ODD
1269 0xF7F7F7F7,0xF7F7F7F7,0x08080808,0x08080808, // QW4-5, DQ3-ODD
1270 0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, // QW6-7, DQ3-ODD
1271 0x10101010,0x10101010,0x10101010,0x10101010, // QW0-1, DQ4-ODD
1272 0xeFeFeFeF,0xeFeFeFeF,0x10101010,0x10101010, // QW2-3, DQ4-ODD
1273 0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, // QW4-5, DQ4-ODD
1274 0xeFeFeFeF,0xeFeFeFeF,0x10101010,0x10101010, // QW6-7, DQ4-ODD
1275 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW0-1, DQ5-ODD
1276 0xdFdFdFdF,0xdFdFdFdF,0x20202020,0x20202020, // QW2-3, DQ5-ODD
1277 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW4-5, DQ5-ODD
1278 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW6-7, DQ5-ODD
1279 0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW0-1, DQ6-ODD
1280 0x40404040,0x40404040,0xBfBfBfBf,0xBfBfBfBf, // QW2-3, DQ6-ODD
1281 0x40404040,0x40404040,0xBfBfBfBf,0xBfBfBfBf, // QW4-5, DQ6-ODD
1282 0x40404040,0x40404040,0xBfBfBfBf,0xBfBfBfBf, // QW6-7, DQ6-ODD
1283 0x80808080,0x80808080,0x7F7F7F7F,0x7F7F7F7F, // QW0-1, DQ7-ODD
1284 0x80808080,0x80808080,0x7F7F7F7F,0x7F7F7F7F, // QW2-3, DQ7-ODD
1285 0x80808080,0x80808080,0x7F7F7F7F,0x7F7F7F7F, // QW4-5, DQ7-ODD
1286 0x80808080,0x80808080,0x80808080,0x80808080 // QW6-7, DQ7-ODD
1288 static const uint32_t TestPatternJD1b[] = {
1289 0x00000000,0x00000000,0x00000000,0x00000000, // QW0,CHA-B, ALL-EVEN
1290 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, // QW1,CHA-B, ALL-EVEN
1291 0x00000000,0x00000000,0x00000000,0x00000000, // QW2,CHA-B, ALL-EVEN
1292 0x00000000,0x00000000,0x00000000,0x00000000, // QW3,CHA-B, ALL-EVEN
1293 0x00000000,0x00000000,0x00000000,0x00000000, // QW4,CHA-B, ALL-EVEN
1294 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, // QW5,CHA-B, ALL-EVEN
1295 0x00000000,0x00000000,0x00000000,0x00000000, // QW6,CHA-B, ALL-EVEN
1296 0x00000000,0x00000000,0x00000000,0x00000000, // QW7,CHA-B, ALL-EVEN
1297 0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, // QW0,CHA-B, DQ0-ODD
1298 0x01010101,0x01010101,0x01010101,0x01010101, // QW1,CHA-B, DQ0-ODD
1299 0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, // QW2,CHA-B, DQ0-ODD
1300 0x01010101,0x01010101,0x01010101,0x01010101, // QW3,CHA-B, DQ0-ODD
1301 0x01010101,0x01010101,0x01010101,0x01010101, // QW4,CHA-B, DQ0-ODD
1302 0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, // QW5,CHA-B, DQ0-ODD
1303 0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, // QW6,CHA-B, DQ0-ODD
1304 0x01010101,0x01010101,0x01010101,0x01010101, // QW7,CHA-B, DQ0-ODD
1305 0x02020202,0x02020202,0x02020202,0x02020202, // QW0,CHA-B, DQ1-ODD
1306 0x02020202,0x02020202,0x02020202,0x02020202, // QW1,CHA-B, DQ1-ODD
1307 0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, // QW2,CHA-B, DQ1-ODD
1308 0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, // QW3,CHA-B, DQ1-ODD
1309 0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, // QW4,CHA-B, DQ1-ODD
1310 0x02020202,0x02020202,0x02020202,0x02020202, // QW5,CHA-B, DQ1-ODD
1311 0x02020202,0x02020202,0x02020202,0x02020202, // QW6,CHA-B, DQ1-ODD
1312 0x02020202,0x02020202,0x02020202,0x02020202, // QW7,CHA-B, DQ1-ODD
1313 0x04040404,0x04040404,0x04040404,0x04040404, // QW0,CHA-B, DQ2-ODD
1314 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW1,CHA-B, DQ2-ODD
1315 0x04040404,0x04040404,0x04040404,0x04040404, // QW2,CHA-B, DQ2-ODD
1316 0x04040404,0x04040404,0x04040404,0x04040404, // QW3,CHA-B, DQ2-ODD
1317 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW4,CHA-B, DQ2-ODD
1318 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW5,CHA-B, DQ2-ODD
1319 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW6,CHA-B, DQ2-ODD
1320 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW7,CHA-B, DQ2-ODD
1321 0x08080808,0x08080808,0x08080808,0x08080808, // QW0,CHA-B, DQ3-ODD
1322 0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, // QW1,CHA-B, DQ3-ODD
1323 0x08080808,0x08080808,0x08080808,0x08080808, // QW2,CHA-B, DQ3-ODD
1324 0x08080808,0x08080808,0x08080808,0x08080808, // QW3,CHA-B, DQ3-ODD
1325 0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, // QW4,CHA-B, DQ3-ODD
1326 0x08080808,0x08080808,0x08080808,0x08080808, // QW5,CHA-B, DQ3-ODD
1327 0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, // QW6,CHA-B, DQ3-ODD
1328 0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, // QW7,CHA-B, DQ3-ODD
1329 0x10101010,0x10101010,0x10101010,0x10101010, // QW0,CHA-B, DQ4-ODD
1330 0x10101010,0x10101010,0x10101010,0x10101010, // QW1,CHA-B, DQ4-ODD
1331 0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, // QW2,CHA-B, DQ4-ODD
1332 0x10101010,0x10101010,0x10101010,0x10101010, // QW3,CHA-B, DQ4-ODD
1333 0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, // QW4,CHA-B, DQ4-ODD
1334 0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, // QW5,CHA-B, DQ4-ODD
1335 0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, // QW6,CHA-B, DQ4-ODD
1336 0x10101010,0x10101010,0x10101010,0x10101010, // QW7,CHA-B, DQ4-ODD
1337 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW0,CHA-B, DQ5-ODD
1338 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW1,CHA-B, DQ5-ODD
1339 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW2,CHA-B, DQ5-ODD
1340 0x20202020,0x20202020,0x20202020,0x20202020, // QW3,CHA-B, DQ5-ODD
1341 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW4,CHA-B, DQ5-ODD
1342 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW5,CHA-B, DQ5-ODD
1343 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW6,CHA-B, DQ5-ODD
1344 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW7,CHA-B, DQ5-ODD
1345 0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW0,CHA-B, DQ6-ODD
1346 0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW1,CHA-B, DQ6-ODD
1347 0x40404040,0x40404040,0x40404040,0x40404040, // QW2,CHA-B, DQ6-ODD
1348 0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW3,CHA-B, DQ6-ODD
1349 0x40404040,0x40404040,0x40404040,0x40404040, // QW4,CHA-B, DQ6-ODD
1350 0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW5,CHA-B, DQ6-ODD
1351 0x40404040,0x40404040,0x40404040,0x40404040, // QW6,CHA-B, DQ6-ODD
1352 0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW7,CHA-B, DQ6-ODD
1353 0x80808080,0x80808080,0x80808080,0x80808080, // QW0,CHA-B, DQ7-ODD
1354 0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F, // QW1,CHA-B, DQ7-ODD
1355 0x80808080,0x80808080,0x80808080,0x80808080, // QW2,CHA-B, DQ7-ODD
1356 0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F, // QW3,CHA-B, DQ7-ODD
1357 0x80808080,0x80808080,0x80808080,0x80808080, // QW4,CHA-B, DQ7-ODD
1358 0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F, // QW5,CHA-B, DQ7-ODD
1359 0x80808080,0x80808080,0x80808080,0x80808080, // QW6,CHA-B, DQ7-ODD
1360 0x80808080,0x80808080,0x80808080,0x80808080 // QW7,CHA-B, DQ7-ODD
1362 uint8_t pattern_buf_x[64 * 18 + 16]; // We need to two cache line So have more 16 bytes to keep 16 byte alignment */
1371 unsigned DQSWrDelay;
1372 unsigned is_Width128 = sysinfo->meminfo[ctrl->node_id].is_Width128;
1373 uint8_t *dqs_delay_a = &sysinfo->dqs_delay_a[ctrl->node_id * 2*2*9]; //channel 2, direction 2 , bytelane *9
1382 dword = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
1383 ecc_bit = dword & DCL_DimmEccEn;
1384 dword &= ~(DCL_DimmEccEn);
1385 pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dword);
1388 buf_a = (uint8_t *)(((uint32_t)(&pattern_buf_x[0]) + 0x10) & (~0xf));
1392 for(i=0;i<16*18;i++) {
1393 *((uint32_t *)(buf_a + i*4)) = TestPatternJD1b[i];
1398 for(i=0; i<16*9;i++) {
1399 *((uint32_t *)(buf_a + i*4)) = TestPatternJD1a[i];
1404 print_debug_dqs("\nTrainDQSRdWrPos: 0 ctrl ", ctrl->node_id, 0);
1406 printk(BIOS_DEBUG, "TrainDQSRdWrPos: buf_a:%p\n", buf_a);
1411 if (!(sysinfo->meminfo[ctrl->node_id].dimm_mask & 0x0F) &&
1412 (sysinfo->meminfo[ctrl->node_id].dimm_mask & 0xF0)) { /* channelB only? */
1416 while( (channel<2) && (!Errors)) {
1417 print_debug_dqs("\tTrainDQSRdWrPos: 1 channel ",channel, 1);
1418 for(DQSWrDelay = 0; DQSWrDelay < 48; DQSWrDelay++) {
1420 SetDQSDelayAllCSR(ctrl, channel, DQS_WRITEDIR, DQSWrDelay);
1421 print_debug_dqs("\t\tTrainDQSRdWrPos: 21 DQSWrDelay ", DQSWrDelay, 2);
1422 err= TrainReadDQS(ctrl, channel, pattern, buf_a, dqs_delay_a, sysinfo);
1423 print_debug_dqs("\t\tTrainDQSRdWrPos: 22 err ",err, 2);
1428 print_debug_dqs("\tTrainDQSRdWrPos: 3 DQSWrDelay ", DQSWrDelay, 1);
1430 if(DQSWrDelay < 48) {
1431 Errors = TrainWriteDQS(ctrl, channel, pattern, buf_a, dqs_delay_a, sysinfo);
1432 print_debug_dqs("\tTrainDQSRdWrPos: 4 Errors ", Errors, 1);
1437 //FIXME: 64MuxMode??
1438 channel++; // skip channel if 64-bit mode
1443 dword = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
1444 dword &= ~(DCL_DimmEccEn);
1446 pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dword);
1452 //restore SSE2 setting
1455 print_debug_dqs("TrainDQSRdWrPos: ", 5, 0);
1460 static inline uint8_t get_dqs_delay(unsigned channel, unsigned bytelane, unsigned direction, uint8_t *dqs_delay_a)
1462 return dqs_delay_a[channel * 2*9 + direction * 9 + bytelane];
1465 static unsigned CalcEccDQSPos(unsigned channel,unsigned ByteLane0, unsigned ByteLane1, unsigned InterFactor, unsigned Direction, uint8_t *dqs_delay_a)
1466 /* InterFactor: 0: 100% ByteLane 0
1467 0x80: 50% between ByteLane 0 and 1
1468 0xff: 99.6% ByteLane 1 and 0.4% like 0
1471 unsigned DQSDelay0, DQSDelay1;
1474 DQSDelay0 = get_dqs_delay(channel, ByteLane0, Direction, dqs_delay_a);
1475 DQSDelay1 = get_dqs_delay(channel, ByteLane1, Direction, dqs_delay_a);
1477 if(DQSDelay0>DQSDelay1) {
1478 DQSDelay = DQSDelay0 - DQSDelay1;
1479 InterFactor = 0xff - InterFactor;
1482 DQSDelay = DQSDelay1 - DQSDelay0;
1485 DQSDelay *= InterFactor;
1487 DQSDelay >>= 8; // /255
1489 if(DQSDelay0>DQSDelay1) {
1490 DQSDelay += DQSDelay1;
1493 DQSDelay += DQSDelay0;
1500 static void SetEccDQSRdWrPos(const struct mem_controller *ctrl, struct sys_info *sysinfo)
1505 unsigned lane0, lane1, ratio;
1508 unsigned direction[] = { DQS_READDIR, DQS_WRITEDIR };
1510 uint8_t *dqs_delay_a = &sysinfo->dqs_delay_a[ctrl->node_id * 2*2*9]; //channel 2, direction 2 , bytelane *9
1514 for(channel = 0; channel < 2; channel++) {
1516 Direction = direction[i];
1517 lane0 = 4; lane1 = 5; ratio = 0;
1518 dqs_delay = CalcEccDQSPos(channel, lane0, lane1, ratio, Direction, dqs_delay_a);
1519 print_debug_dqs_pair("\t\tSetEccDQSRdWrPos: channel ", channel, Direction==DQS_READDIR? " R dqs_delay":" W dqs_delay", dqs_delay, 2);
1520 SetDQSDelayCSR(ctrl, channel, ByteLane, Direction, dqs_delay);
1521 save_dqs_delay(channel, ByteLane, Direction, dqs_delay_a, dqs_delay);
1526 static unsigned train_DqsRcvrEn(const struct mem_controller *ctrl, unsigned Pass, struct sys_info *sysinfo)
1528 print_debug_dqs("\ntrain_DqsRcvrEn: begin ctrl ", ctrl->node_id, 0);
1529 if(TrainRcvrEn(ctrl, Pass, sysinfo)) {
1532 print_debug_dqs("\ntrain_DqsRcvrEn: end ctrl ", ctrl->node_id, 0);
1536 static unsigned train_DqsPos(const struct mem_controller *ctrl, struct sys_info *sysinfo)
1538 print_debug_dqs("\ntrain_DqsPos: begin ctrl ", ctrl->node_id, 0);
1539 if(TrainDQSRdWrPos(ctrl, sysinfo) != 0) {
1540 printk(BIOS_ERR, "\nDQS Training Rd Wr failed ctrl%02x\n", ctrl->node_id);
1544 SetEccDQSRdWrPos(ctrl, sysinfo);
1546 print_debug_dqs("\ntrain_DqsPos: end ctrl ", ctrl->node_id, 0);
1551 #if K8_REV_F_SUPPORT_F0_F1_WORKAROUND == 1
1552 static void f0_svm_workaround(int controllers, const struct mem_controller *ctrl, tsc_t *tsc0, struct sys_info *sysinfo)
1555 unsigned cpu_f0_f1[8];
1558 print_debug_addr("dqs_timing: tsc1[8] :", tsc1);
1560 for(i = 0; i < controllers; i++) {
1561 if (!sysinfo->ctrl_present[i])
1564 /* Skip everything if I don't have any memory on this controller */
1565 if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
1569 cpu_f0_f1[i] = is_cpu_pre_f2_in_bsp(i);
1571 if(!cpu_f0_f1[i]) continue;
1573 dword = pci_read_config32(ctrl[i].f2, DRAM_CTRL);
1574 dword &= ~DC_DqsRcvEnTrain;
1575 pci_write_config32(ctrl[i].f2, DRAM_CTRL, dword);
1577 dword = pci_read_config32(ctrl[i].f2, DRAM_INIT);
1578 dword |= DI_EnDramInit;
1579 pci_write_config32(ctrl[i].f2, DRAM_INIT, dword);
1580 dword &= ~DI_EnDramInit;
1581 pci_write_config32(ctrl[i].f2, DRAM_INIT, dword);
1584 print_debug_dqs_tsc("begin: tsc1", i, tsc1[i].hi, tsc1[i].lo, 2);
1586 dword = tsc1[i].lo + tsc0[i].lo;
1587 if((dword<tsc1[i].lo) || (dword<tsc0[i].lo)) {
1591 tsc1[i].hi+= tsc0[i].hi;
1593 print_debug_dqs_tsc("end : tsc1", i, tsc1[i].hi, tsc1[i].lo, 2);
1597 for(i = 0; i < controllers; i++) {
1598 if (!sysinfo->ctrl_present[i])
1601 /* Skip everything if I don't have any memory on this controller */
1602 if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
1604 if(!cpu_f0_f1[i]) continue;
1610 } while ((tsc1[i].hi>tsc.hi) || ((tsc1[i].hi==tsc.hi) && (tsc1[i].lo>tsc.lo)));
1612 print_debug_dqs_tsc("end : tsc ", i, tsc.hi, tsc.lo, 2);
1620 /* setting variable mtrr, comes from linux kernel source */
1621 static void set_var_mtrr_dqs(
1622 unsigned int reg, unsigned long basek, unsigned long sizek,
1623 unsigned char type, unsigned address_bits)
1626 unsigned address_mask_high;
1628 address_mask_high = ((1u << (address_bits - 32u)) - 1u);
1630 base.hi = basek >> 22;
1631 base.lo = basek << 10;
1633 if (sizek < 4*1024*1024) {
1634 mask.hi = address_mask_high;
1635 mask.lo = ~((sizek << 10) -1);
1638 mask.hi = address_mask_high & (~((sizek >> 22) -1));
1647 zero.lo = zero.hi = 0;
1648 /* The invalid bit is kept in the mask, so we simply clear the
1649 relevant mask register to disable a range. */
1650 wrmsr (MTRRphysMask_MSR(reg), zero);
1652 /* Bit 32-35 of MTRRphysMask should be set to 1 */
1655 wrmsr (MTRRphysBase_MSR(reg), base);
1656 wrmsr (MTRRphysMask_MSR(reg), mask);
1661 /* fms: find most sigificant bit set, stolen from Linux Kernel Source. */
1662 static inline unsigned int fms(unsigned int x)
1666 __asm__("bsrl %1,%0\n\t"
1669 "1:" : "=r" (r) : "g" (x));
1673 /* fls: find least sigificant bit set */
1674 static inline unsigned int fls(unsigned int x)
1678 __asm__("bsfl %1,%0\n\t"
1681 "1:" : "=r" (r) : "g" (x));
1685 static unsigned int range_to_mtrr(unsigned int reg,
1686 unsigned long range_startk, unsigned long range_sizek,
1687 unsigned long next_range_startk, unsigned char type, unsigned address_bits)
1689 if (!range_sizek || (reg >= 8)) {
1692 while(range_sizek) {
1693 unsigned long max_align, align;
1694 unsigned long sizek;
1695 /* Compute the maximum size I can make a range */
1696 max_align = fls(range_startk);
1697 align = fms(range_sizek);
1698 if (align > max_align) {
1702 #if CONFIG_MEM_TRAIN_SEQ != 1
1703 printk(BIOS_DEBUG, "Setting variable MTRR %d, base: %4ldMB, range: %4ldMB, type %s\n",
1704 reg, range_startk >>10, sizek >> 10,
1705 (type==MTRR_TYPE_UNCACHEABLE)?"UC":
1706 ((type==MTRR_TYPE_WRBACK)?"WB":"Other")
1709 set_var_mtrr_dqs(reg++, range_startk, sizek, type, address_bits);
1710 range_startk += sizek;
1711 range_sizek -= sizek;
1718 static void set_top_mem_ap(unsigned tom_k, unsigned tom2_k)
1722 /* Now set top of memory */
1723 msr.lo = (tom2_k & 0x003fffff) << 10;
1724 msr.hi = (tom2_k & 0xffc00000) >> 22;
1725 wrmsr(TOP_MEM2, msr);
1727 msr.lo = (tom_k & 0x003fffff) << 10;
1728 msr.hi = (tom_k & 0xffc00000) >> 22;
1729 wrmsr(TOP_MEM, msr);
1732 static void setup_mtrr_dqs(unsigned tom_k, unsigned tom2_k)
1738 //still enable from cache_as_ram.inc
1739 msr = rdmsr(SYSCFG_MSR);
1740 msr.lo |= SYSCFG_MSR_MtrrFixDramModEn;
1741 wrmsr(SYSCFG_MSR,msr);
1744 //[0,512k), [512k, 640k)
1745 msr.hi = 0x1e1e1e1e;
1751 reg = range_to_mtrr(2, 0, tom_k,4*1024*1024, MTRR_TYPE_WRBACK, 40);
1755 //enable tom2 and type
1756 msr = rdmsr(SYSCFG_MSR);
1757 msr.lo |= (1<<21) | (1<<22); //MtrrTom2En and Tom2ForceMemTypeWB
1758 wrmsr(SYSCFG_MSR, msr);
1763 static void clear_mtrr_dqs(unsigned tom2_k)
1768 //still enable from cache_as_ram.inc
1769 msr = rdmsr(SYSCFG_MSR);
1770 msr.lo |= SYSCFG_MSR_MtrrFixDramModEn;
1771 wrmsr(SYSCFG_MSR,msr);
1773 //[0,512k), [512k, 640k)
1780 for(i=0x204;i<0x210;i++) {
1786 //enable tom2 and type
1787 msr = rdmsr(SYSCFG_MSR);
1788 msr.lo &= ~((1<<21) | (1<<22)); //MtrrTom2En and Tom2ForceMemTypeWB
1789 wrmsr(SYSCFG_MSR, msr);
1793 static void set_htic_bit(unsigned i, unsigned val, unsigned bit)
1796 dword = pci_read_config32(PCI_DEV(0, 0x18+i, 0), HT_INIT_CONTROL);
1798 dword |= ((val & 1) <<bit);
1799 pci_write_config32(PCI_DEV(0, 0x18+i, 0), HT_INIT_CONTROL, dword);
1803 static unsigned get_htic_bit(unsigned i, unsigned bit)
1806 dword = pci_read_config32(PCI_DEV(0, 0x18+i, 0), HT_INIT_CONTROL);
1811 static void wait_till_sysinfo_in_ram(void)
1814 if(get_htic_bit(0, 9)) return;
1818 static void set_sysinfo_in_ram(unsigned val)
1820 set_htic_bit(0, val, 9);
1823 #ifdef S3_NVRAM_EARLY
1824 int s3_save_nvram_early(u32 dword, int size, int nvram_pos);
1825 int s3_load_nvram_early(int size, u32 *old_dword, int nvram_pos);
1827 int s3_save_nvram_early(u32 dword, int size, int nvram_pos)
1832 int s3_load_nvram_early(int size, u32 *old_dword, int nvram_pos)
1834 die("No memory NVRAM loader for DQS data! Unable to restore memory state\n");
1836 return nvram_pos; /* Make GCC happy */
1840 static int save_index_to_pos(unsigned int dev, int size, int index, int nvram_pos)
1842 u32 dword = pci_read_config32_index_wait(dev, 0x98, index);
1844 return s3_save_nvram_early(dword, size, nvram_pos);
1847 static int load_index_to_pos(unsigned int dev, int size, int index, int nvram_pos)
1850 u32 old_dword = pci_read_config32_index_wait(dev, 0x98, index);
1851 nvram_pos = s3_load_nvram_early(size, &old_dword, nvram_pos);
1852 pci_write_config32_index_wait(dev, 0x98, index, old_dword);
1856 static int dqs_load_MC_NVRAM_ch(unsigned int dev, int ch, int pos)
1858 /* 30 bytes per channel */
1860 pos = load_index_to_pos(dev, 4, 0x00 + ch, pos);
1861 pos = load_index_to_pos(dev, 4, 0x01 + ch, pos);
1862 pos = load_index_to_pos(dev, 4, 0x02 + ch, pos);
1863 pos = load_index_to_pos(dev, 1, 0x03 + ch, pos);
1864 pos = load_index_to_pos(dev, 4, 0x04 + ch, pos);
1865 pos = load_index_to_pos(dev, 4, 0x05 + ch, pos);
1866 pos = load_index_to_pos(dev, 4, 0x06 + ch, pos);
1867 pos = load_index_to_pos(dev, 1, 0x07 + ch, pos);
1868 pos = load_index_to_pos(dev, 1, 0x10 + ch, pos);
1869 pos = load_index_to_pos(dev, 1, 0x13 + ch, pos);
1870 pos = load_index_to_pos(dev, 1, 0x16 + ch, pos);
1871 pos = load_index_to_pos(dev, 1, 0x19 + ch, pos);
1875 static int dqs_save_MC_NVRAM_ch(unsigned int dev, int ch, int pos)
1877 /* 30 bytes per channel */
1879 pos = save_index_to_pos(dev, 4, 0x00 + ch, pos);
1880 pos = save_index_to_pos(dev, 4, 0x01 + ch, pos);
1881 pos = save_index_to_pos(dev, 4, 0x02 + ch, pos);
1882 pos = save_index_to_pos(dev, 1, 0x03 + ch, pos);
1883 pos = save_index_to_pos(dev, 4, 0x04 + ch, pos);
1884 pos = save_index_to_pos(dev, 4, 0x05 + ch, pos);
1885 pos = save_index_to_pos(dev, 4, 0x06 + ch, pos);
1886 pos = save_index_to_pos(dev, 1, 0x07 + ch, pos);
1887 pos = save_index_to_pos(dev, 1, 0x10 + ch, pos);
1888 pos = save_index_to_pos(dev, 1, 0x13 + ch, pos);
1889 pos = save_index_to_pos(dev, 1, 0x16 + ch, pos);
1890 pos = save_index_to_pos(dev, 1, 0x19 + ch, pos);
1894 static void dqs_save_MC_NVRAM(unsigned int dev)
1898 printk(BIOS_DEBUG, "DQS SAVE NVRAM: %x\n", dev);
1899 pos = dqs_save_MC_NVRAM_ch(dev, 0, pos);
1900 pos = dqs_save_MC_NVRAM_ch(dev, 1, pos);
1901 /* save the maxasync lat here */
1902 reg = pci_read_config32(dev, DRAM_CONFIG_HIGH);
1903 pos = s3_save_nvram_early(reg, 4, pos);
1906 static void dqs_restore_MC_NVRAM(unsigned int dev)
1911 printk(BIOS_DEBUG, "DQS RESTORE FROM NVRAM: %x\n", dev);
1912 pos = dqs_load_MC_NVRAM_ch(dev, 0, pos);
1913 pos = dqs_load_MC_NVRAM_ch(dev, 1, pos);
1914 /* load the maxasync lat here */
1915 pos = s3_load_nvram_early(4, ®, pos);
1916 reg &= (DCH_MaxAsyncLat_MASK <<DCH_MaxAsyncLat_SHIFT);
1917 reg |= pci_read_config32(dev, DRAM_CONFIG_HIGH);
1918 pci_write_config32(dev, DRAM_CONFIG_HIGH, reg);
1921 #if CONFIG_MEM_TRAIN_SEQ == 0
1922 #if K8_REV_F_SUPPORT_F0_F1_WORKAROUND == 1
1923 static void dqs_timing(int controllers, const struct mem_controller *ctrl, tsc_t *tsc0, struct sys_info *sysinfo)
1925 static void dqs_timing(int controllers, const struct mem_controller *ctrl, struct sys_info *sysinfo)
1932 //need to enable mtrr, so dqs training could access the test address
1933 setup_mtrr_dqs(sysinfo->tom_k, sysinfo->tom2_k);
1935 for(i = 0; i < controllers; i++) {
1936 if (!sysinfo->ctrl_present[ i ])
1939 /* Skip everything if I don't have any memory on this controller */
1940 if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
1942 fill_mem_cs_sysinfo(i, ctrl+i, sysinfo);
1946 for(i = 0; i < controllers; i++) {
1947 if (!sysinfo->ctrl_present[ i ])
1950 /* Skip everything if I don't have any memory on this controller */
1951 if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
1953 printk(BIOS_DEBUG, "DQS Training:RcvrEn:Pass1: %02x\n", i);
1954 if(train_DqsRcvrEn(ctrl+i, 1, sysinfo)) goto out;
1955 printk(BIOS_DEBUG, " done\n");
1959 #if K8_REV_F_SUPPORT_F0_F1_WORKAROUND == 1
1960 f0_svm_workaround(controllers, ctrl, tsc0, sysinfo);
1964 for(i = 0; i < controllers; i++) {
1965 if (!sysinfo->ctrl_present[i])
1968 /* Skip everything if I don't have any memory on this controller */
1969 if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
1971 printk(BIOS_DEBUG, "DQS Training:DQSPos: %02x\n", i);
1972 if(train_DqsPos(ctrl+i, sysinfo)) goto out;
1973 printk(BIOS_DEBUG, " done\n");
1977 for(i = 0; i < controllers; i++) {
1978 if (!sysinfo->ctrl_present[i])
1981 /* Skip everything if I don't have any memory on this controller */
1982 if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
1984 printk(BIOS_DEBUG, "DQS Training:RcvrEn:Pass2: %02x\n", i);
1985 if(train_DqsRcvrEn(ctrl+i, 2, sysinfo)) goto out;
1986 printk(BIOS_DEBUG, " done\n");
1987 sysinfo->mem_trained[i]=1;
1988 dqs_save_MC_NVRAM((ctrl+i)->f2);
1993 clear_mtrr_dqs(sysinfo->tom2_k);
1997 print_debug_dqs_tsc_x("DQS Training:tsc", i, tsc[i].hi, tsc[i].lo);
2007 #if CONFIG_MEM_TRAIN_SEQ > 0
2009 static void dqs_timing(int i, const struct mem_controller *ctrl, struct sys_info *sysinfo, unsigned v)
2016 if(sysinfo->mem_trained[i] != 0x80) return;
2018 #if CONFIG_MEM_TRAIN_SEQ == 1
2019 //need to enable mtrr, so dqs training could access the test address
2020 setup_mtrr_dqs(sysinfo->tom_k, sysinfo->tom2_k);
2023 fill_mem_cs_sysinfo(i, ctrl, sysinfo);
2028 printk(BIOS_DEBUG, "set DQS timing:RcvrEn:Pass1: %02x\n", i);
2030 if(train_DqsRcvrEn(ctrl, 1, sysinfo)) {
2031 sysinfo->mem_trained[i]=0x81; //
2036 printk(BIOS_DEBUG, " done\n");
2038 printk(BIOS_DEBUG, "set DQS timing:DQSPos: %02x\n", i);
2041 if(train_DqsPos(ctrl, sysinfo)) {
2042 sysinfo->mem_trained[i]=0x82; //
2047 printk(BIOS_DEBUG, " done\n");
2050 printk(BIOS_DEBUG, "set DQS timing:RcvrEn:Pass2: %02x\n", i);
2052 if(train_DqsRcvrEn(ctrl, 2, sysinfo)){
2053 sysinfo->mem_trained[i]=0x83; //
2058 printk(BIOS_DEBUG, " done\n");
2064 #if CONFIG_MEM_TRAIN_SEQ == 1
2065 clear_mtrr_dqs(sysinfo->tom2_k);
2069 for(ii=0;ii<4;ii++) {
2070 print_debug_dqs_tsc_x("Total DQS Training : tsc ", ii, tsc[ii].hi, tsc[ii].lo);
2074 if(sysinfo->mem_trained[i] == 0x80) {
2075 sysinfo->mem_trained[i]=1;
2081 #if CONFIG_MEM_TRAIN_SEQ == 1
2082 static void train_ram(unsigned nodeid, struct sys_info *sysinfo, struct sys_info *sysinfox)
2084 dqs_timing(nodeid, &sysinfo->ctrl[nodeid], sysinfo, 0); // keep the output tidy
2085 // memcpy(&sysinfox->dqs_rcvr_dly_a[nodeid * 2 * 8],&sysinfo->dqs_rcvr_dly_a[nodeid * 2 * 8], 2*8);
2086 // memcpy(&sysinfox->dqs_delay_a[nodeid * 2 * 2 * 9], &sysinfo->dqs_delay_a[nodeid * 2 * 2 * 9], 2 * 2 * 9);
2087 sysinfox->mem_trained[nodeid] = sysinfo->mem_trained[nodeid];
2090 static void copy_and_run_ap_code_in_car(unsigned ret_addr);
2091 static inline void train_ram_on_node(unsigned nodeid, unsigned coreid, struct sys_info *sysinfo, unsigned retcall)
2093 if(coreid) return; // only do it on core0
2094 struct sys_info *sysinfox = (void*)((CONFIG_RAMTOP) - CONFIG_DCACHE_RAM_GLOBAL_VAR_SIZE);
2095 wait_till_sysinfo_in_ram(); // use pci to get it
2097 if(sysinfox->mem_trained[nodeid] == 0x80) {
2099 sysinfo->tom_k = sysinfox->tom_k;
2100 sysinfo->tom2_k = sysinfox->tom2_k;
2101 sysinfo->meminfo[nodeid].is_Width128 = sysinfox->meminfo[nodeid].is_Width128;
2102 sysinfo->mem_trained[nodeid] = sysinfox->mem_trained[nodeid];
2103 memcpy(&sysinfo->ctrl[nodeid], &sysinfox->ctrl[nodeid], sizeof(struct mem_controller));
2105 memcpy(sysinfo, sysinfox, CONFIG_DCACHE_RAM_GLOBAL_VAR_SIZE);
2107 set_top_mem_ap(sysinfo->tom_k, sysinfo->tom2_k); // keep the ap's tom consistent with bsp's
2108 #if CONFIG_AP_CODE_IN_CAR == 0
2109 printk(BIOS_DEBUG, "CODE IN ROM AND RUN ON NODE: %02x\n", nodeid);
2110 train_ram(nodeid, sysinfo, sysinfox);
2112 /* Can copy dqs_timing to ap cache and run from cache?
2113 * we need coreboot_ap_car.rom? and treat it as coreboot_ram.rom for ap ?
2115 copy_and_run_ap_code_in_car(retcall);
2116 // will go back by jump