2 * This file is part of the coreboot project.
4 * Copyright (C) 2005 YingHai Lu
5 * Copyright (C) 2008 Advanced Micro Devices, Inc.
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; version 2 of the License.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 //0: mean no debug info
22 #define DQS_TRAIN_DEBUG 0
24 #if CONFIG_USE_PRINTK_IN_CAR
26 #error This file needs CONFIG_USE_PRINTK_IN_CAR
29 static inline void print_debug_dqs(const char *str, unsigned val, unsigned level)
31 #if DQS_TRAIN_DEBUG > 0
32 if(DQS_TRAIN_DEBUG > level) {
33 printk_debug("%s%x\r\n", str, val);
38 static inline void print_debug_dqs_pair(const char *str, unsigned val, const char *str2, unsigned val2, unsigned level)
40 #if DQS_TRAIN_DEBUG > 0
41 if(DQS_TRAIN_DEBUG > level) {
42 printk_debug("%s%08x%s%08x\r\n", str, val, str2, val2);
47 static inline void print_debug_dqs_tsc(const char *str, unsigned i, unsigned val, unsigned val2, unsigned level)
49 #if DQS_TRAIN_DEBUG > 0
50 if(DQS_TRAIN_DEBUG > level) {
51 printk_debug("%s[%02x]=%08x%08x\r\n", str, i, val, val2);
56 static inline void print_debug_dqs_tsc_x(const char *str, unsigned i, unsigned val, unsigned val2)
58 printk_debug("%s[%02x]=%08x%08x\r\n", str, i, val, val2);
62 static void fill_mem_cs_sysinfo(unsigned nodeid, const struct mem_controller *ctrl, struct sys_info *sysinfo)
66 sysinfo->mem_base[nodeid] = pci_read_config32(ctrl->f1, 0x40 + (nodeid<<3));
69 sysinfo->cs_base[nodeid*8+i] = pci_read_config32(ctrl->f2, 0x40 + (i<<2));
72 sysinfo->hole_reg[nodeid] = pci_read_config32(ctrl->f1, 0xf0);
75 static unsigned Get_MCTSysAddr(const struct mem_controller *ctrl, unsigned cs_idx, struct sys_info *sysinfo)
79 unsigned nodeid = ctrl->node_id;
81 #if CONFIG_HW_MEM_HOLE_SIZEK != 0
85 //get the local base addr of the chipselect
86 dword = sysinfo->cs_base[nodeid * 8 + cs_idx];
89 //sys addr= node base + local cs base
90 mem_base = sysinfo->mem_base[nodeid];
91 mem_base &= 0xffff0000;
94 #if CONFIG_HW_MEM_HOLE_SIZEK != 0
95 hole_reg = sysinfo->hole_reg[nodeid];
98 hole_startk = (hole_reg & (0xff<<24)) >> 10;
99 if( (dword >= (hole_startk<<2)) && (dword < ((4*1024*1024)<<2))) {
100 dword += ((4*1024*1024 - hole_startk)<<2);
105 //add 1MB offset to avoid compat area
106 dword += (1<<(20-8));
108 //So final result is upper 32 bit addr
114 static unsigned Get_RcvrSysAddr(const struct mem_controller * ctrl, unsigned channel, unsigned cs_idx, struct sys_info *sysinfo)
116 return Get_MCTSysAddr(ctrl, cs_idx, sysinfo);
120 static inline unsigned long read_cr4(void)
123 asm volatile ("movl %%cr4, %0" : "=r" (cr4));
127 static inline void write_cr4(unsigned long cr4)
129 asm volatile ("movl %0, %%cr4" : : "r" (cr4));
133 static inline void enable_sse2()
141 static inline void disable_sse2()
150 static void set_wrap32dis(void) {
153 msr = rdmsr(0xc0010015);
156 wrmsr(0xc0010015, msr);
160 static void clear_wrap32dis(void) {
163 msr = rdmsr(0xc0010015);
166 wrmsr(0xc0010015, msr);
170 static void set_FSBASE(uint32_t addr_hi)
174 //set fs and use fs prefix to access the mem
177 wrmsr(0xc0000100, msr); //FS_BASE
181 static unsigned ChipSelPresent(const struct mem_controller *ctrl, unsigned cs_idx, struct sys_info *sysinfo)
184 unsigned nodeid = ctrl->node_id;
187 enabled = sysinfo->cs_base[nodeid * 8 + cs_idx];
194 static unsigned RcvrRankEnabled(const struct mem_controller *ctrl, int channel, int cs_idx, unsigned is_Width128, struct sys_info *sysinfo)
196 return ChipSelPresent(ctrl, cs_idx, sysinfo);
199 static void WriteLNTestPattern(unsigned addr_lo, uint8_t *buf_a, unsigned line_num)
203 "movdqa (%3), %%xmm0\n\t"
204 "movntdq %%xmm0, %%fs:(%0)\n\t" /* xmm0 is 128 bit */
209 :: "a" (addr_lo), "d" (16), "c" (line_num * 4), "b"(buf_a)
215 static void Write1LTestPattern(unsigned addr, unsigned p, uint8_t *buf_a, uint8_t *buf_b)
218 if(p==1) { buf = buf_b; }
219 else { buf = buf_a; }
221 set_FSBASE (addr>>24);
223 WriteLNTestPattern(addr<<8, buf, 1);
226 static void Read1LTestPattern(unsigned addr)
230 set_FSBASE(addr>>24);
232 /* 1st move causes read fill (to exclusive or shared)*/
234 "movl %%fs:(%1), %0\n\t"
235 :"=b"(value): "a" (addr<<8)
243 #define DQS_FIRST_PASS 1
244 #define DQS_SECOND_PASS 2
246 #define SB_NORCVREN 11
247 #define RCVREN_MARGIN 6
248 #define SB_SmallRCVR 13
249 #define SB_CHA2BRCVREN 12
250 #define SB_NODQSPOS 14
251 #define MIN_DQS_WNDW 3
252 #define SB_SMALLDQS 15
255 static unsigned CompareTestPatternQW0(unsigned channel, unsigned addr, unsigned pattern, const uint32_t *TestPattern0, const uint32_t *TestPattern1, const uint32_t *TestPattern2, unsigned Pass, unsigned is_Width128)
261 unsigned result = DQS_FAIL;
263 if(Pass == DQS_FIRST_PASS) {
265 test_buf = (uint32_t *)TestPattern1;
268 test_buf = (uint32_t *)TestPattern0;
272 test_buf = (uint32_t *)TestPattern2;
275 set_FSBASE(addr>>24);
279 if(is_Width128 && (channel == 1)) {
280 addr_lo += 8; //second channel
285 "movl %%fs:(%1), %0\n\t"
286 :"=b"(value): "a" (addr_lo)
289 value_test = *test_buf;
292 print_debug_dqs_pair("\t\t\t\t\t\tQW0.lo : test_buf= ", (unsigned)test_buf, " value = ", value_test, 4);
293 print_debug_dqs_pair("\t\t\t\t\t\tQW0.lo : addr_lo = ", addr_lo, " value = ", value, 4);
295 if(value == value_test) {
299 "movl %%fs:(%1), %0\n\t"
300 :"=b"(value): "a" (addr_lo)
302 value_test = *test_buf;
303 print_debug_dqs_pair("\t\t\t\t\t\tQW0.hi : test_buf= ", (unsigned)test_buf, " value = ", value_test, 4);
304 print_debug_dqs_pair("\t\t\t\t\t\tQW0.hi : addr_lo = ", addr_lo, " value = ", value, 4);
306 if(value == value_test){
311 if(Pass == DQS_SECOND_PASS) { // second pass need to be inverted
312 if(result==DQS_PASS) {
324 static void SetMaxAL_RcvrDly(const struct mem_controller *ctrl, unsigned dly)
328 dly += (20-1); // round it
329 dly /= 20; // convert from unit 50ps to 1ns
334 reg = pci_read_config32(ctrl->f2, DRAM_CONFIG_HIGH);
335 reg &= ~(DCH_MaxAsyncLat_MASK <<DCH_MaxAsyncLat_SHIFT);
336 reg |= ((dly - DCH_MaxAsyncLat_BASE) << DCH_MaxAsyncLat_SHIFT);
337 pci_write_config32(ctrl->f2, DRAM_CONFIG_HIGH, reg);
342 Set the Target range to WT IO (using an IORR overlapping the already existing
343 WB dram type). Use IORR0
345 static void SetTargetWTIO(unsigned addr)
350 wrmsr(0xc0010016, msr); //IORR0 BASE
353 msr.lo = 0xfc000800; // 64MB Mask
354 wrmsr(0xc0010017, msr); // IORR0 Mask
357 static void ResetTargetWTIO(void)
363 wrmsr(0xc0010017, msr); // IORR0 Mask
366 static void proc_CLFLUSH(unsigned addr)
369 set_FSBASE(addr>>24);
371 /* 1st move causes read fill (to exclusive or shared)*/
373 /* clflush fs:[eax] */
374 "clflush %%fs:(%0)\n\t"
379 static void proc_IOCLFLUSH(unsigned addr)
386 static void ResetDCTWrPtr(const struct mem_controller *ctrl)
389 unsigned index = 0x10;
391 dword = pci_read_config32_index_wait(ctrl->f2, 0x98, index);
392 pci_write_config32_index_wait(ctrl->f2, 0x98, index, dword);
395 dword = pci_read_config32_index_wait(ctrl->f2, 0x98, index);
396 pci_write_config32_index_wait(ctrl->f2, 0x98, index, dword);
401 static uint16_t get_exact_T1000(unsigned i)
404 static const uint16_t T1000_a[]= { 5000, 3759, 3003, 2500 };
406 static const uint16_t TT_a[] = {
408 /*4 */ 6250, 6250, 6250, 6250,
409 /*5 */ 5000, 5000, 5000, 2500,
410 /*6 */ 5000, 4166, 4166, 2500,
411 /*7 */ 5000, 4285, 3571, 2500,
413 /*8 */ 5000, 3750, 3125, 2500,
414 /*9 */ 5000, 3888, 3333, 2500,
415 /*10*/ 5000, 4000, 3000, 2500,
416 /*11*/ 5000, 4090, 3181, 2500,
418 /*12*/ 5000, 3750, 3333, 2500,
419 /*13*/ 5000, 3846, 3076, 2500,
420 /*14*/ 5000, 3928, 3214, 2500,
421 /*15*/ 5000, 4000, 3000, 2500,
427 /* Check for FID control support */
428 struct cpuid_result cpuid1;
429 cpuid1 = cpuid(0x80000007);
430 if( cpuid1.edx & 0x02 ) {
431 /* Use current FID */
433 msr = rdmsr(0xc0010042);
434 fid_cur = msr.lo & 0x3f;
438 /* Use startup FID */
440 msr = rdmsr(0xc0010015);
441 fid_start = (msr.lo & (0x3f << 24));
443 index = fid_start>>25;
446 if(index>12) return T1000_a[i];
448 return TT_a[index * 4+i];
452 static void InitDQSPos4RcvrEn(const struct mem_controller *ctrl)
458 for(i=1; i<=3; i++) {
459 /* Program the DQS Write Timing Control Registers (Function 2:Offset 0x9c, index 0x01-0x03, 0x21-0x23) to 0x00 for all bytes */
460 pci_write_config32_index_wait(ctrl->f2, 0x98, i, dword);
461 pci_write_config32_index_wait(ctrl->f2, 0x98, i+0x20, dword);
465 for(i=5; i<=7; i++) {
466 /* Program the DQS Write Timing Control Registers (Function 2:Offset 0x9c, index 0x05-0x07, 0x25-0x27) to 0x2f for all bytes */
467 pci_write_config32_index_wait(ctrl->f2, 0x98, i, dword);
468 pci_write_config32_index_wait(ctrl->f2, 0x98, i+0x20, dword);
473 #ifndef K8_REV_F_SUPPORT_F0_F1_WORKAROUND
474 #define K8_REV_F_SUPPORT_F0_F1_WORKAROUND 1
477 static unsigned TrainRcvrEn(const struct mem_controller *ctrl, unsigned Pass, struct sys_info *sysinfo)
480 static const uint32_t TestPattern0[] = {
481 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
482 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
483 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
484 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
486 static const uint32_t TestPattern1[] = {
487 0x55555555, 0x55555555, 0x55555555, 0x55555555,
488 0x55555555, 0x55555555, 0x55555555, 0x55555555,
489 0x55555555, 0x55555555, 0x55555555, 0x55555555,
490 0x55555555, 0x55555555, 0x55555555, 0x55555555,
492 static const uint32_t TestPattern2[] = {
493 0x12345678, 0x87654321, 0x23456789, 0x98765432,
494 0x59385824, 0x30496724, 0x24490795, 0x99938733,
495 0x40385642, 0x38465245, 0x29432163, 0x05067894,
496 0x12349045, 0x98723467, 0x12387634, 0x34587623,
499 uint8_t pattern_buf_x[64 * 4 + 16]; // We need to two cache line So have more 16 bytes to keep 16 byte alignment */
500 uint8_t *buf_a, *buf_b;
503 uint8_t *dqs_rcvr_dly_a = &sysinfo->dqs_rcvr_dly_a[ctrl->node_id * 2* 8] ; //8 node, channel 2, receiver 8
507 unsigned channel, receiver;
510 unsigned CTLRMaxDelay;
515 unsigned Test0, Test1;
517 unsigned RcvrEnDlyRmin;
525 unsigned TestAddr0, TestAddr0B, TestAddr1 = 0, TestAddr1B = 0;
527 unsigned CurrRcvrCHADelay = 0;
531 unsigned is_Width128 = sysinfo->meminfo[ctrl->node_id].is_Width128;
533 #if K8_REV_F_SUPPORT_F0_F1_WORKAROUND == 1
537 if(Pass == DQS_FIRST_PASS) {
538 InitDQSPos4RcvrEn(ctrl);
548 dword = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
549 ecc_bit = dword & DCL_DimmEccEn;
550 dword &= ~(DCL_DimmEccEn);
551 pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dword);
554 if(Pass == DQS_FIRST_PASS) {
555 #if K8_REV_F_SUPPORT_F0_F1_WORKAROUND == 1
556 cpu_f0_f1 = is_cpu_pre_f2_in_bsp(ctrl->node_id);
561 /* Set the DqsRcvEnTrain bit */
562 dword = pci_read_config32(ctrl->f2, DRAM_CTRL);
563 dword |= DC_DqsRcvEnTrain;
564 pci_write_config32(ctrl->f2, DRAM_CTRL, dword);
569 //get T1000 figures (cycle time (ns)) * 1K
570 dword = pci_read_config32(ctrl->f2, DRAM_CONFIG_HIGH);
571 dword &= DCH_MemClkFreq_MASK;
573 T1000 = get_exact_T1000(dword);
576 buf_a = (uint8_t *)(((uint32_t)(&pattern_buf_x[0]) + 0x10) & (0xfffffff0));
577 buf_b = buf_a + 128; //??
578 if(Pass==DQS_FIRST_PASS) {
580 *((uint32_t *)(buf_a + i*4)) = TestPattern0[i];
581 *((uint32_t *)(buf_b + i*4)) = TestPattern1[i];
586 *((uint32_t *)(buf_a + i*4)) = TestPattern2[i];
587 *((uint32_t *)(buf_b + i*4)) = TestPattern2[i];
591 print_debug_dqs("\r\nTrainRcvEn: 0 ctrl", ctrl->node_id, 0);
593 print_debug_addr("TrainRcvEn: buf_a:", buf_a);
596 /* for each channel */
600 if (!(sysinfo->meminfo[ctrl->node_id].dimm_mask & 0x0F) &&
601 (sysinfo->meminfo[ctrl->node_id].dimm_mask & 0xF0)) { /* channelB only? */
605 for ( ; (channel < 2) && (!Errors); channel++)
607 print_debug_dqs("\tTrainRcvEn51: channel ",channel, 1);
610 /* there are four recriver pairs, loosely associated with CS */
611 for( receiver = 0; (receiver < 8) && (!Errors); receiver+=2)
614 unsigned index=(receiver>>1) * 3 + 0x10;
616 print_debug_dqs("\t\tTrainRcvEn52: index ", index, 2);
620 dword = pci_read_config32_index_wait(ctrl->f2, 0x98, index);
621 CurrRcvrCHADelay= dword & 0xff;
631 RcvrEnDlyRmin = 0xaf;
633 if(!RcvrRankEnabled(ctrl, channel, receiver, is_Width128, sysinfo)) continue;
635 /* for each DQS receiver enable setting */
637 TestAddr0 = Get_RcvrSysAddr(ctrl, channel, receiver, sysinfo);
639 TestAddr0B = TestAddr0 + (1<<(20+2-8)); // 4MB
641 if(RcvrRankEnabled(ctrl, channel, receiver+1, is_Width128, sysinfo)) {
642 TestAddr1 = Get_RcvrSysAddr(ctrl, channel, receiver+1, sysinfo);
643 TestAddr1B = TestAddr1 + (1<<(20+2-8)); //4MB
650 print_debug_dqs("\t\tTrainRcvEn53: TestAddr0B ", TestAddr0B, 2);
652 Write1LTestPattern(TestAddr0, 0, buf_a, buf_b); // rank0 of dimm, test p0
653 Write1LTestPattern(TestAddr0B, 1, buf_a, buf_b); //rank0 of dimm, test p1
656 Write1LTestPattern(TestAddr1, 0, buf_a, buf_b); //rank 1 of dimm
657 Write1LTestPattern(TestAddr1B, 1, buf_a, buf_b);//rank 1 of dimm
660 if(Pass == DQS_FIRST_PASS) {
663 RcvrEnDly = dqs_rcvr_dly_a[channel * 8 + receiver];
666 while ( RcvrEnDly < 0xaf) { // Sweep Delay value here
667 print_debug_dqs("\t\t\tTrainRcvEn541: RcvrEnDly ", RcvrEnDly, 3);
670 /* Odd steps get another pattern such that even
671 and odd steps alternate.
672 The pointers to the patterns will be swapped
673 at the end of the loop so they are correspond
684 /* Program current Receiver enable delay */
685 pci_write_config32_index_wait(ctrl->f2, 0x98, index, RcvrEnDly);
686 /* FIXME: 64bit MUX */
689 /* Program current Receiver enable delay chaannel b */
690 pci_write_config32_index_wait(ctrl->f2, 0x98, index+ 0x20, RcvrEnDly);
693 /* Program the MaxAsyncLat filed with the
694 current DQS receiver enable setting plus 6ns
696 /*Porgram MaxAsyncLat to correspond with current delay */
697 SetMaxAL_RcvrDly(ctrl, RcvrEnDly);
701 Read1LTestPattern(TestAddr0); //Cache Fill
702 /* ROM vs cache compare */
703 Test0 = CompareTestPatternQW0(channel, TestAddr0, PatternA, TestPattern0, TestPattern1, TestPattern2, Pass, is_Width128);
704 proc_IOCLFLUSH(TestAddr0);
708 print_debug_dqs("\t\t\tTrainRcvEn542: Test0 ", Test0, 3);
710 if(Test0 == DQS_PASS) {
712 Read1LTestPattern(TestAddr0B);
713 Test1 = CompareTestPatternQW0(channel, TestAddr0B, PatternB, TestPattern0, TestPattern1, TestPattern2, Pass, is_Width128);
714 proc_IOCLFLUSH(TestAddr0B);
718 print_debug_dqs("\t\t\tTrainRcvEn543: Test1 ", Test1, 3);
720 if(Test1 == DQS_PASS) {
722 Read1LTestPattern(TestAddr1);
723 Test0 = CompareTestPatternQW0(channel, TestAddr1, PatternA, TestPattern0, TestPattern1, TestPattern2, Pass, is_Width128);
724 proc_IOCLFLUSH(TestAddr1);
727 if(Test0 == DQS_PASS) {
728 Read1LTestPattern(TestAddr1B);
729 Test1 = CompareTestPatternQW0(channel, TestAddr1B, PatternB, TestPattern0, TestPattern1, TestPattern2, Pass, is_Width128);
730 proc_IOCLFLUSH(TestAddr1B);
733 if(Test1 == DQS_PASS) {
737 print_debug_dqs("\t\t\tTrainRcvEn544: Test0 ", Test0, 3);
745 print_debug_dqs("\t\t\tTrainRcvEn55: RcvrEnDly ", RcvrEnDly, 3);
747 if(CurrTest == DQS_PASS) {
748 if(LastTest == DQS_FAIL) {
749 RcvrEnDlyRmin = RcvrEnDly;
756 /* swap the rank 0 pointers */
758 TestAddr0 = TestAddr0B;
761 /* swap the rank 1 pointers */
763 TestAddr1 = TestAddr1B;
766 print_debug_dqs("\t\t\tTrainRcvEn56: RcvrEnDly ", RcvrEnDly, 3);
772 print_debug_dqs("\t\tTrainRcvEn61: RcvrEnDly ", RcvrEnDly, 2);
774 if(RcvrEnDlyRmin == 0xaf) {
776 Errors |= SB_NORCVREN;
779 if(Pass == DQS_FIRST_PASS) {
780 // We need a better value for DQSPos trainning
781 RcvrEnDly = RcvrEnDlyRmin /* + RCVREN_MARGIN * T1000/64/50 */;
783 RcvrEnDly = RcvrEnDlyRmin;
786 if(RcvrEnDly > 0xae) {
787 //passing window too narrow, too far delayed
788 Errors |= SB_SmallRCVR;
792 if(Pass == DQS_SECOND_PASS) { //second pass must average vales
793 RcvrEnDly += dqs_rcvr_dly_a[channel * 8 + receiver] /* - (RCVREN_MARGIN * T1000/64/50)*/;
797 dqs_rcvr_dly_a[channel * 8 + receiver] = RcvrEnDly;
799 //Set final RcvrEnDly for this DIMM and Channel
800 pci_write_config32_index_wait(ctrl->f2, 0x98, index, RcvrEnDly);
803 pci_write_config32_index_wait(ctrl->f2, 0x98, index+0x20, RcvrEnDly); // channel B
805 pci_write_config32_index_wait(ctrl->f2, 0x98, index, CurrRcvrCHADelay);
806 if(RcvrEnDly > CurrRcvrCHADelay) {
807 dword = RcvrEnDly - CurrRcvrCHADelay;
810 dword = CurrRcvrCHADelay - RcvrEnDly;
814 Errors |= SB_CHA2BRCVREN;
819 print_debug_dqs("\t\tTrainRcvEn63: RcvrEnDly ", RcvrEnDly, 2);
821 if(RcvrEnDly > CTLRMaxDelay) {
822 CTLRMaxDelay = RcvrEnDly;
825 print_debug_dqs("\t\tTrainRcvEn64: CTLRMaxDelay ", CTLRMaxDelay, 2);
830 print_debug_dqs("\tTrainRcvEn65: CTLRMaxDelay ", CTLRMaxDelay, 1);
832 /* Program the MaxAsysncLat field with the largest DQS Receiver Enable setting */
833 SetMaxAL_RcvrDly(ctrl, CTLRMaxDelay);
837 dword = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
838 dword &= ~(DCL_DimmEccEn);
840 pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dword);
842 if(Pass == DQS_FIRST_PASS) {
843 #if K8_REV_F_SUPPORT_F0_F1_WORKAROUND == 1
847 dword = pci_read_config32(ctrl->f2, DRAM_CTRL);
848 dword &= ~DC_DqsRcvEnTrain;
849 pci_write_config32(ctrl->f2, DRAM_CTRL, dword);
857 //restore SSE2 setting
860 #if CONFIG_MEM_TRAIN_SEQ != 1
861 /* We need tidy output for type 1 */
862 printk_debug(" CTLRMaxDelay=%02x\n", CTLRMaxDelay);
865 return (CTLRMaxDelay==0xae)?1:0;
869 #define DQS_READDIR 1
870 #define DQS_WRITEDIR 0
873 static void SetDQSDelayCSR(const struct mem_controller *ctrl, unsigned channel, unsigned bytelane, unsigned direction, unsigned dqs_delay)
874 { //ByteLane could be 0-8, last is for ECC
881 index = (bytelane>>2) + 1 + channel * 0x20 + (direction << 2);
886 shift <<= 3; // 8 bit
888 dword = pci_read_config32_index_wait(ctrl->f2, 0x98, index);
889 dword &= ~(0x3f<<shift);
890 dword |= (dqs_delay<<shift);
891 pci_write_config32_index_wait(ctrl->f2, 0x98, index, dword);
895 static void SetDQSDelayAllCSR(const struct mem_controller *ctrl, unsigned channel, unsigned direction, unsigned dqs_delay)
904 dword |= dqs_delay<<(i*8);
907 index = 1 + channel * 0x20 + direction * 4;
910 pci_write_config32_index_wait(ctrl->f2, 0x98, index + i, dword);
915 static unsigned MiddleDQS(unsigned min_d, unsigned max_d)
918 size_d = max_d-min_d;
919 if(size_d & 1) { //need round up
922 return ( min_d + (size_d>>1));
925 static inline void save_dqs_delay(unsigned channel, unsigned bytelane, unsigned direction, uint8_t *dqs_delay_a, uint8_t dqs_delay)
927 dqs_delay_a[channel * 2*9 + direction * 9 + bytelane] = dqs_delay;
930 static void WriteDQSTestPattern(unsigned addr_lo, unsigned pattern , uint8_t *buf_a)
932 WriteLNTestPattern(addr_lo, buf_a, (pattern+1) * 9);
935 static void ReadL18TestPattern(unsigned addr_lo)
937 //set fs and use fs prefix to access the mem
939 "movl %%fs:-128(%%esi), %%eax\n\t" //TestAddr cache line
940 "movl %%fs:-64(%%esi), %%eax\n\t" //+1
941 "movl %%fs:(%%esi), %%eax\n\t" //+2
942 "movl %%fs:64(%%esi), %%eax\n\t" //+3
944 "movl %%fs:-128(%%edi), %%eax\n\t" //+4
945 "movl %%fs:-64(%%edi), %%eax\n\t" //+5
946 "movl %%fs:(%%edi), %%eax\n\t" //+6
947 "movl %%fs:64(%%edi), %%eax\n\t" //+7
949 "movl %%fs:-128(%%ebx), %%eax\n\t" //+8
950 "movl %%fs:-64(%%ebx), %%eax\n\t" //+9
951 "movl %%fs:(%%ebx), %%eax\n\t" //+10
952 "movl %%fs:64(%%ebx), %%eax\n\t" //+11
954 "movl %%fs:-128(%%ecx), %%eax\n\t" //+12
955 "movl %%fs:-64(%%ecx), %%eax\n\t" //+13
956 "movl %%fs:(%%ecx), %%eax\n\t" //+14
957 "movl %%fs:64(%%ecx), %%eax\n\t" //+15
959 "movl %%fs:-128(%%edx), %%eax\n\t" //+16
960 "movl %%fs:-64(%%edx), %%eax\n\t" //+17
962 :: "a"(0), "b" (addr_lo+128+8*64), "c" (addr_lo+128+12*64), "d" (addr_lo +128+16*64), "S"(addr_lo+128), "D"(addr_lo+128+4*64)
967 static void ReadL9TestPattern(unsigned addr_lo)
970 //set fs and use fs prefix to access the mem
973 "movl %%fs:-128(%%ecx), %%eax\n\t" //TestAddr cache line
974 "movl %%fs:-64(%%ecx), %%eax\n\t" //+1
975 "movl %%fs:(%%ecx), %%eax\n\t" //+2
976 "movl %%fs:64(%%ecx), %%eax\n\t" //+3
978 "movl %%fs:-128(%%edx), %%eax\n\t" //+4
979 "movl %%fs:-64(%%edx), %%eax\n\t" //+5
980 "movl %%fs:(%%edx), %%eax\n\t" //+6
981 "movl %%fs:64(%%edx), %%eax\n\t" //+7
983 "movl %%fs:-128(%%ebx), %%eax\n\t" //+8
985 :: "a"(0), "b" (addr_lo+128+8*64), "c"(addr_lo+128), "d"(addr_lo+128+4*64)
991 static void ReadDQSTestPattern(unsigned addr_lo, unsigned pattern)
994 ReadL9TestPattern(addr_lo);
997 ReadL18TestPattern(addr_lo);
1001 static void FlushDQSTestPattern_L9(unsigned addr_lo)
1004 "clflush %%fs:-128(%%ecx)\n\t"
1005 "clflush %%fs:-64(%%ecx)\n\t"
1006 "clflush %%fs:(%%ecx)\n\t"
1007 "clflush %%fs:64(%%ecx)\n\t"
1009 "clflush %%fs:-128(%%eax)\n\t"
1010 "clflush %%fs:-64(%%eax)\n\t"
1011 "clflush %%fs:(%%eax)\n\t"
1012 "clflush %%fs:64(%%eax)\n\t"
1014 "clflush %%fs:-128(%%ebx)\n\t"
1016 :: "b" (addr_lo+128+8*64), "c"(addr_lo+128), "a"(addr_lo+128+4*64)
1020 static __attribute__((noinline)) void FlushDQSTestPattern_L18(unsigned addr_lo)
1023 "clflush %%fs:-128(%%eax)\n\t"
1024 "clflush %%fs:-64(%%eax)\n\t"
1025 "clflush %%fs:(%%eax)\n\t"
1026 "clflush %%fs:64(%%eax)\n\t"
1028 "clflush %%fs:-128(%%edi)\n\t"
1029 "clflush %%fs:-64(%%edi)\n\t"
1030 "clflush %%fs:(%%edi)\n\t"
1031 "clflush %%fs:64(%%edi)\n\t"
1033 "clflush %%fs:-128(%%ebx)\n\t"
1034 "clflush %%fs:-64(%%ebx)\n\t"
1035 "clflush %%fs:(%%ebx)\n\t"
1036 "clflush %%fs:64(%%ebx)\n\t"
1038 "clflush %%fs:-128(%%ecx)\n\t"
1039 "clflush %%fs:-64(%%ecx)\n\t"
1040 "clflush %%fs:(%%ecx)\n\t"
1041 "clflush %%fs:64(%%ecx)\n\t"
1043 "clflush %%fs:-128(%%edx)\n\t"
1044 "clflush %%fs:-64(%%edx)\n\t"
1046 :: "b" (addr_lo+128+8*64), "c" (addr_lo+128+12*64), "d" (addr_lo +128+16*64), "a"(addr_lo+128), "D"(addr_lo+128+4*64)
1050 static void FlushDQSTestPattern(unsigned addr_lo, unsigned pattern )
1054 FlushDQSTestPattern_L9(addr_lo);
1057 FlushDQSTestPattern_L18(addr_lo);
1061 static unsigned CompareDQSTestPattern(unsigned channel, unsigned addr_lo, unsigned pattern, uint8_t *buf_a)
1064 unsigned bitmap = 0xff;
1069 uint32_t value_test;
1071 test_buf = (uint32_t *)buf_a;
1074 if(pattern && channel) {
1075 addr_lo += 8; //second channel
1080 for(i=0;i<9*64/4;i++) {
1082 "movl %%fs:(%1), %0\n\t"
1083 :"=b"(value): "a" (addr_lo)
1085 value_test = *test_buf;
1087 print_debug_dqs_pair("\t\t\t\t\t\ttest_buf= ", (unsigned)test_buf, " value = ", value_test, 7);
1088 print_debug_dqs_pair("\t\t\t\t\t\ttaddr_lo = ",addr_lo, " value = ", value, 7);
1090 for(j=0;j<4*8;j+=8) {
1091 if(((value>>j)&0xff) != ((value_test>>j)& 0xff)) {
1092 bitmap &= ~(1<<bytelane);
1098 print_debug_dqs("\t\t\t\t\t\tbitmap = ", bitmap, 7);
1101 if(pattern == 1) { //dual channel
1102 addr_lo += 8; //skip over other channel's data
1116 static unsigned TrainDQSPos(const struct mem_controller *ctrl, unsigned channel, unsigned Direction, unsigned Pattern, uint8_t *buf_a, uint8_t *dqs_delay_a, struct sys_info *sysinfo)
1120 unsigned BanksPresent;
1122 unsigned MutualCSPassW[48];
1130 unsigned RnkDlyFilterMax, RnkDlyFilterMin = 0;
1131 unsigned RnkDlySeqPassMax, RnkDlySeqPassMin = 0;
1136 print_debug_dqs("\t\t\tTrainDQSPos begin ", 0, 3);
1138 printk_debug("TrainDQSPos: MutualCSPassW[48] :%p\n", MutualCSPassW);
1140 for(DQSDelay=0; DQSDelay<48; DQSDelay++) {
1141 MutualCSPassW[DQSDelay] = 0xff; // Bitmapped status per delay setting, 0xff=All positions passing (1= PASS)
1144 for(ChipSel = 0; ChipSel < 8; ChipSel++) { //logical register chipselects 0..7
1145 print_debug_dqs("\t\t\t\tTrainDQSPos: 11 ChipSel ", ChipSel, 4);
1146 //FIXME: process 64MUXedMode
1147 if(!ChipSelPresent(ctrl, ChipSel, sysinfo)) continue;
1150 TestAddr = Get_MCTSysAddr(ctrl, ChipSel, sysinfo);
1152 print_debug_dqs("\t\t\t\tTrainDQSPos: 12 TestAddr ", TestAddr, 4);
1154 //set fs and use fs prefix to access the mem
1155 set_FSBASE(TestAddr>>24);
1157 if(Direction == DQS_READDIR) {
1158 print_debug_dqs("\t\t\t\tTrainDQSPos: 13 for read so write at first", 0, 4);
1159 WriteDQSTestPattern(TestAddr<<8, Pattern, buf_a);
1162 for(DQSDelay = 0; DQSDelay < 48; DQSDelay++ ){
1163 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 141 DQSDelay ", DQSDelay, 5);
1164 if(MutualCSPassW[DQSDelay] == 0) continue; //skip current delay value if other chipselects have failed all 8 bytelanes
1165 SetDQSDelayAllCSR(ctrl, channel, Direction, DQSDelay);
1166 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 142 MutualCSPassW ", MutualCSPassW[DQSDelay], 5);
1167 if(Direction == DQS_WRITEDIR) {
1168 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 143 for write", 0, 5);
1169 WriteDQSTestPattern(TestAddr<<8, Pattern, buf_a);
1171 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 144 Pattern ", Pattern, 5);
1172 ReadDQSTestPattern(TestAddr<<8, Pattern);
1173 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 145 MutualCSPassW ", MutualCSPassW[DQSDelay], 5);
1174 MutualCSPassW[DQSDelay] &= CompareDQSTestPattern(channel, TestAddr<<8, Pattern, buf_a); //0: fail, 1=pass
1175 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 146 MutualCSPassW ", MutualCSPassW[DQSDelay], 5);
1176 SetTargetWTIO(TestAddr);
1177 FlushDQSTestPattern(TestAddr<<8, Pattern);
1183 for(ByteLane = 0; ByteLane < 8; ByteLane++) {
1184 print_debug_dqs("\t\t\t\tTrainDQSPos: 31 ByteLane ",ByteLane, 4);
1186 LastTest = DQS_FAIL;
1187 RnkDlySeqPassMax = 0;
1188 RnkDlyFilterMax = 0;
1189 RnkDlyFilterMin = 0;
1190 for(DQSDelay=0; DQSDelay<48; DQSDelay++) {
1191 if(MutualCSPassW[DQSDelay] & (1<<ByteLane)) {
1193 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 321 DQSDelay ", DQSDelay, 5);
1194 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 322 MutualCSPassW ", MutualCSPassW[DQSDelay], 5);
1196 RnkDlySeqPassMax = DQSDelay;
1197 if(LastTest == DQS_FAIL) {
1198 RnkDlySeqPassMin = DQSDelay; //start sequential run
1200 if((RnkDlySeqPassMax - RnkDlySeqPassMin)>(RnkDlyFilterMax-RnkDlyFilterMin)){
1201 RnkDlyFilterMin = RnkDlySeqPassMin;
1202 RnkDlyFilterMax = RnkDlySeqPassMax;
1204 LastTest = DQS_PASS;
1207 LastTest = DQS_FAIL;
1210 print_debug_dqs("\t\t\t\tTrainDQSPos: 33 RnkDlySeqPassMax ", RnkDlySeqPassMax, 4);
1212 if(RnkDlySeqPassMax == 0) {
1213 Errors |= SB_NODQSPOS; // no passing window
1216 print_debug_dqs("\t\t\t\tTrainDQSPos: 34 RnkDlyFilterMax ", RnkDlyFilterMax, 4);
1217 print_debug_dqs("\t\t\t\tTrainDQSPos: 34 RnkDlyFilterMin ", RnkDlyFilterMin, 4);
1218 if((RnkDlyFilterMax - RnkDlyFilterMin)< MIN_DQS_WNDW){
1219 Errors |= SB_SMALLDQS;
1222 unsigned middle_dqs;
1223 middle_dqs = MiddleDQS(RnkDlyFilterMin, RnkDlyFilterMax);
1224 print_debug_dqs("\t\t\t\tTrainDQSPos: 35 middle_dqs ",middle_dqs, 4);
1225 SetDQSDelayCSR(ctrl, channel, ByteLane, Direction, middle_dqs);
1226 save_dqs_delay(channel, ByteLane, Direction, dqs_delay_a, middle_dqs);
1232 print_debug_dqs("\t\t\tTrainDQSPos: end", 0xff, 3);
1239 static unsigned TrainReadDQS(const struct mem_controller *ctrl, unsigned channel, unsigned pattern, uint8_t *buf_a, uint8_t *dqs_delay_a, struct sys_info *sysinfo)
1241 print_debug_dqs("\t\tTrainReadPos", 0, 2);
1242 return TrainDQSPos(ctrl, channel, DQS_READDIR, pattern, buf_a, dqs_delay_a, sysinfo);
1245 static unsigned TrainWriteDQS(const struct mem_controller *ctrl, unsigned channel, unsigned pattern, uint8_t *buf_a, uint8_t *dqs_delay_a, struct sys_info *sysinfo)
1247 print_debug_dqs("\t\tTrainWritePos", 0, 2);
1248 return TrainDQSPos(ctrl, channel, DQS_WRITEDIR, pattern, buf_a, dqs_delay_a, sysinfo);
1253 static unsigned TrainDQSRdWrPos(const struct mem_controller *ctrl, struct sys_info *sysinfo)
1255 static const uint32_t TestPatternJD1a[] = {
1256 0x00000000,0x00000000,0xFFFFFFFF,0xFFFFFFFF, // QW0-1, ALL-EVEN
1257 0x00000000,0x00000000,0x00000000,0x00000000, // QW2-3, ALL-EVEN
1258 0x00000000,0x00000000,0xFFFFFFFF,0xFFFFFFFF, // QW4-5, ALL-EVEN
1259 0x00000000,0x00000000,0x00000000,0x00000000, // QW6-7, ALL-EVEN
1260 0xFeFeFeFe,0xFeFeFeFe,0x01010101,0x01010101, // QW0-1, DQ0-ODD
1261 0xFeFeFeFe,0xFeFeFeFe,0x01010101,0x01010101, // QW2-3, DQ0-ODD
1262 0x01010101,0x01010101,0xFeFeFeFe,0xFeFeFeFe, // QW4-5, DQ0-ODD
1263 0xFeFeFeFe,0xFeFeFeFe,0x01010101,0x01010101, // QW6-7, DQ0-ODD
1264 0x02020202,0x02020202,0x02020202,0x02020202, // QW0-1, DQ1-ODD
1265 0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, // QW2-3, DQ1-ODD
1266 0xFdFdFdFd,0xFdFdFdFd,0x02020202,0x02020202, // QW4-5, DQ1-ODD
1267 0x02020202,0x02020202,0x02020202,0x02020202, // QW6-7, DQ1-ODD
1268 0x04040404,0x04040404,0xfBfBfBfB,0xfBfBfBfB, // QW0-1, DQ2-ODD
1269 0x04040404,0x04040404,0x04040404,0x04040404, // QW2-3, DQ2-ODD
1270 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW4-5, DQ2-ODD
1271 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW6-7, DQ2-ODD
1272 0x08080808,0x08080808,0xF7F7F7F7,0xF7F7F7F7, // QW0-1, DQ3-ODD
1273 0x08080808,0x08080808,0x08080808,0x08080808, // QW2-3, DQ3-ODD
1274 0xF7F7F7F7,0xF7F7F7F7,0x08080808,0x08080808, // QW4-5, DQ3-ODD
1275 0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, // QW6-7, DQ3-ODD
1276 0x10101010,0x10101010,0x10101010,0x10101010, // QW0-1, DQ4-ODD
1277 0xeFeFeFeF,0xeFeFeFeF,0x10101010,0x10101010, // QW2-3, DQ4-ODD
1278 0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, // QW4-5, DQ4-ODD
1279 0xeFeFeFeF,0xeFeFeFeF,0x10101010,0x10101010, // QW6-7, DQ4-ODD
1280 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW0-1, DQ5-ODD
1281 0xdFdFdFdF,0xdFdFdFdF,0x20202020,0x20202020, // QW2-3, DQ5-ODD
1282 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW4-5, DQ5-ODD
1283 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW6-7, DQ5-ODD
1284 0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW0-1, DQ6-ODD
1285 0x40404040,0x40404040,0xBfBfBfBf,0xBfBfBfBf, // QW2-3, DQ6-ODD
1286 0x40404040,0x40404040,0xBfBfBfBf,0xBfBfBfBf, // QW4-5, DQ6-ODD
1287 0x40404040,0x40404040,0xBfBfBfBf,0xBfBfBfBf, // QW6-7, DQ6-ODD
1288 0x80808080,0x80808080,0x7F7F7F7F,0x7F7F7F7F, // QW0-1, DQ7-ODD
1289 0x80808080,0x80808080,0x7F7F7F7F,0x7F7F7F7F, // QW2-3, DQ7-ODD
1290 0x80808080,0x80808080,0x7F7F7F7F,0x7F7F7F7F, // QW4-5, DQ7-ODD
1291 0x80808080,0x80808080,0x80808080,0x80808080 // QW6-7, DQ7-ODD
1293 static const uint32_t TestPatternJD1b[] = {
1294 0x00000000,0x00000000,0x00000000,0x00000000, // QW0,CHA-B, ALL-EVEN
1295 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, // QW1,CHA-B, ALL-EVEN
1296 0x00000000,0x00000000,0x00000000,0x00000000, // QW2,CHA-B, ALL-EVEN
1297 0x00000000,0x00000000,0x00000000,0x00000000, // QW3,CHA-B, ALL-EVEN
1298 0x00000000,0x00000000,0x00000000,0x00000000, // QW4,CHA-B, ALL-EVEN
1299 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, // QW5,CHA-B, ALL-EVEN
1300 0x00000000,0x00000000,0x00000000,0x00000000, // QW6,CHA-B, ALL-EVEN
1301 0x00000000,0x00000000,0x00000000,0x00000000, // QW7,CHA-B, ALL-EVEN
1302 0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, // QW0,CHA-B, DQ0-ODD
1303 0x01010101,0x01010101,0x01010101,0x01010101, // QW1,CHA-B, DQ0-ODD
1304 0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, // QW2,CHA-B, DQ0-ODD
1305 0x01010101,0x01010101,0x01010101,0x01010101, // QW3,CHA-B, DQ0-ODD
1306 0x01010101,0x01010101,0x01010101,0x01010101, // QW4,CHA-B, DQ0-ODD
1307 0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, // QW5,CHA-B, DQ0-ODD
1308 0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, // QW6,CHA-B, DQ0-ODD
1309 0x01010101,0x01010101,0x01010101,0x01010101, // QW7,CHA-B, DQ0-ODD
1310 0x02020202,0x02020202,0x02020202,0x02020202, // QW0,CHA-B, DQ1-ODD
1311 0x02020202,0x02020202,0x02020202,0x02020202, // QW1,CHA-B, DQ1-ODD
1312 0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, // QW2,CHA-B, DQ1-ODD
1313 0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, // QW3,CHA-B, DQ1-ODD
1314 0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, // QW4,CHA-B, DQ1-ODD
1315 0x02020202,0x02020202,0x02020202,0x02020202, // QW5,CHA-B, DQ1-ODD
1316 0x02020202,0x02020202,0x02020202,0x02020202, // QW6,CHA-B, DQ1-ODD
1317 0x02020202,0x02020202,0x02020202,0x02020202, // QW7,CHA-B, DQ1-ODD
1318 0x04040404,0x04040404,0x04040404,0x04040404, // QW0,CHA-B, DQ2-ODD
1319 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW1,CHA-B, DQ2-ODD
1320 0x04040404,0x04040404,0x04040404,0x04040404, // QW2,CHA-B, DQ2-ODD
1321 0x04040404,0x04040404,0x04040404,0x04040404, // QW3,CHA-B, DQ2-ODD
1322 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW4,CHA-B, DQ2-ODD
1323 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW5,CHA-B, DQ2-ODD
1324 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW6,CHA-B, DQ2-ODD
1325 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW7,CHA-B, DQ2-ODD
1326 0x08080808,0x08080808,0x08080808,0x08080808, // QW0,CHA-B, DQ3-ODD
1327 0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, // QW1,CHA-B, DQ3-ODD
1328 0x08080808,0x08080808,0x08080808,0x08080808, // QW2,CHA-B, DQ3-ODD
1329 0x08080808,0x08080808,0x08080808,0x08080808, // QW3,CHA-B, DQ3-ODD
1330 0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, // QW4,CHA-B, DQ3-ODD
1331 0x08080808,0x08080808,0x08080808,0x08080808, // QW5,CHA-B, DQ3-ODD
1332 0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, // QW6,CHA-B, DQ3-ODD
1333 0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, // QW7,CHA-B, DQ3-ODD
1334 0x10101010,0x10101010,0x10101010,0x10101010, // QW0,CHA-B, DQ4-ODD
1335 0x10101010,0x10101010,0x10101010,0x10101010, // QW1,CHA-B, DQ4-ODD
1336 0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, // QW2,CHA-B, DQ4-ODD
1337 0x10101010,0x10101010,0x10101010,0x10101010, // QW3,CHA-B, DQ4-ODD
1338 0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, // QW4,CHA-B, DQ4-ODD
1339 0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, // QW5,CHA-B, DQ4-ODD
1340 0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, // QW6,CHA-B, DQ4-ODD
1341 0x10101010,0x10101010,0x10101010,0x10101010, // QW7,CHA-B, DQ4-ODD
1342 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW0,CHA-B, DQ5-ODD
1343 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW1,CHA-B, DQ5-ODD
1344 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW2,CHA-B, DQ5-ODD
1345 0x20202020,0x20202020,0x20202020,0x20202020, // QW3,CHA-B, DQ5-ODD
1346 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW4,CHA-B, DQ5-ODD
1347 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW5,CHA-B, DQ5-ODD
1348 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW6,CHA-B, DQ5-ODD
1349 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW7,CHA-B, DQ5-ODD
1350 0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW0,CHA-B, DQ6-ODD
1351 0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW1,CHA-B, DQ6-ODD
1352 0x40404040,0x40404040,0x40404040,0x40404040, // QW2,CHA-B, DQ6-ODD
1353 0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW3,CHA-B, DQ6-ODD
1354 0x40404040,0x40404040,0x40404040,0x40404040, // QW4,CHA-B, DQ6-ODD
1355 0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW5,CHA-B, DQ6-ODD
1356 0x40404040,0x40404040,0x40404040,0x40404040, // QW6,CHA-B, DQ6-ODD
1357 0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW7,CHA-B, DQ6-ODD
1358 0x80808080,0x80808080,0x80808080,0x80808080, // QW0,CHA-B, DQ7-ODD
1359 0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F, // QW1,CHA-B, DQ7-ODD
1360 0x80808080,0x80808080,0x80808080,0x80808080, // QW2,CHA-B, DQ7-ODD
1361 0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F, // QW3,CHA-B, DQ7-ODD
1362 0x80808080,0x80808080,0x80808080,0x80808080, // QW4,CHA-B, DQ7-ODD
1363 0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F, // QW5,CHA-B, DQ7-ODD
1364 0x80808080,0x80808080,0x80808080,0x80808080, // QW6,CHA-B, DQ7-ODD
1365 0x80808080,0x80808080,0x80808080,0x80808080 // QW7,CHA-B, DQ7-ODD
1367 uint8_t pattern_buf_x[64 * 18 + 16]; // We need to two cache line So have more 16 bytes to keep 16 byte alignment */
1376 unsigned DQSWrDelay;
1377 unsigned is_Width128 = sysinfo->meminfo[ctrl->node_id].is_Width128;
1378 uint8_t *dqs_delay_a = &sysinfo->dqs_delay_a[ctrl->node_id * 2*2*9]; //channel 2, direction 2 , bytelane *9
1387 dword = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
1388 ecc_bit = dword & DCL_DimmEccEn;
1389 dword &= ~(DCL_DimmEccEn);
1390 pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dword);
1393 buf_a = (uint8_t *)(((uint32_t)(&pattern_buf_x[0]) + 0x10) & (~0xf));
1397 for(i=0;i<16*18;i++) {
1398 *((uint32_t *)(buf_a + i*4)) = TestPatternJD1b[i];
1403 for(i=0; i<16*9;i++) {
1404 *((uint32_t *)(buf_a + i*4)) = TestPatternJD1a[i];
1409 print_debug_dqs("\r\nTrainDQSRdWrPos: 0 ctrl ", ctrl->node_id, 0);
1411 printk_debug("TrainDQSRdWrPos: buf_a:%p\n", buf_a);
1416 if (!(sysinfo->meminfo[ctrl->node_id].dimm_mask & 0x0F) &&
1417 (sysinfo->meminfo[ctrl->node_id].dimm_mask & 0xF0)) { /* channelB only? */
1421 while( (channel<2) && (!Errors)) {
1422 print_debug_dqs("\tTrainDQSRdWrPos: 1 channel ",channel, 1);
1423 for(DQSWrDelay = 0; DQSWrDelay < 48; DQSWrDelay++) {
1425 SetDQSDelayAllCSR(ctrl, channel, DQS_WRITEDIR, DQSWrDelay);
1426 print_debug_dqs("\t\tTrainDQSRdWrPos: 21 DQSWrDelay ", DQSWrDelay, 2);
1427 err= TrainReadDQS(ctrl, channel, pattern, buf_a, dqs_delay_a, sysinfo);
1428 print_debug_dqs("\t\tTrainDQSRdWrPos: 22 err ",err, 2);
1433 print_debug_dqs("\tTrainDQSRdWrPos: 3 DQSWrDelay ", DQSWrDelay, 1);
1435 if(DQSWrDelay < 48) {
1436 Errors = TrainWriteDQS(ctrl, channel, pattern, buf_a, dqs_delay_a, sysinfo);
1437 print_debug_dqs("\tTrainDQSRdWrPos: 4 Errors ", Errors, 1);
1442 //FIXME: 64MuxMode??
1443 channel++; // skip channel if 64-bit mode
1448 dword = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
1449 dword &= ~(DCL_DimmEccEn);
1451 pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dword);
1457 //restore SSE2 setting
1460 print_debug_dqs("TrainDQSRdWrPos: ", 5, 0);
1465 static inline uint8_t get_dqs_delay(unsigned channel, unsigned bytelane, unsigned direction, uint8_t *dqs_delay_a)
1467 return dqs_delay_a[channel * 2*9 + direction * 9 + bytelane];
1470 static unsigned CalcEccDQSPos(unsigned channel,unsigned ByteLane0, unsigned ByteLane1, unsigned InterFactor, unsigned Direction, uint8_t *dqs_delay_a)
1471 /* InterFactor: 0: 100% ByteLane 0
1472 0x80: 50% between ByteLane 0 and 1
1473 0xff: 99.6% ByteLane 1 and 0.4% like 0
1476 unsigned DQSDelay0, DQSDelay1;
1479 DQSDelay0 = get_dqs_delay(channel, ByteLane0, Direction, dqs_delay_a);
1480 DQSDelay1 = get_dqs_delay(channel, ByteLane1, Direction, dqs_delay_a);
1482 if(DQSDelay0>DQSDelay1) {
1483 DQSDelay = DQSDelay0 - DQSDelay1;
1484 InterFactor = 0xff - InterFactor;
1487 DQSDelay = DQSDelay1 - DQSDelay0;
1490 DQSDelay *= InterFactor;
1492 DQSDelay >>= 8; // /255
1494 if(DQSDelay0>DQSDelay1) {
1495 DQSDelay += DQSDelay1;
1498 DQSDelay += DQSDelay0;
1505 static void SetEccDQSRdWrPos(const struct mem_controller *ctrl, struct sys_info *sysinfo)
1510 unsigned lane0, lane1, ratio;
1513 unsigned direction[] = { DQS_READDIR, DQS_WRITEDIR };
1515 uint8_t *dqs_delay_a = &sysinfo->dqs_delay_a[ctrl->node_id * 2*2*9]; //channel 2, direction 2 , bytelane *9
1519 for(channel = 0; channel < 2; channel++) {
1521 Direction = direction[i];
1522 lane0 = 4; lane1 = 5; ratio = 0;
1523 dqs_delay = CalcEccDQSPos(channel, lane0, lane1, ratio, Direction, dqs_delay_a);
1524 print_debug_dqs_pair("\t\tSetEccDQSRdWrPos: channel ", channel, Direction==DQS_READDIR? " R dqs_delay":" W dqs_delay", dqs_delay, 2);
1525 SetDQSDelayCSR(ctrl, channel, ByteLane, Direction, dqs_delay);
1526 save_dqs_delay(channel, ByteLane, Direction, dqs_delay_a, dqs_delay);
1531 static unsigned train_DqsRcvrEn(const struct mem_controller *ctrl, unsigned Pass, struct sys_info *sysinfo)
1533 print_debug_dqs("\r\ntrain_DqsRcvrEn: begin ctrl ", ctrl->node_id, 0);
1534 if(TrainRcvrEn(ctrl, Pass, sysinfo)) {
1537 print_debug_dqs("\r\ntrain_DqsRcvrEn: end ctrl ", ctrl->node_id, 0);
1541 static unsigned train_DqsPos(const struct mem_controller *ctrl, struct sys_info *sysinfo)
1543 print_debug_dqs("\r\ntrain_DqsPos: begin ctrl ", ctrl->node_id, 0);
1544 if(TrainDQSRdWrPos(ctrl, sysinfo) != 0) {
1545 printk_err("\r\nDQS Training Rd Wr failed ctrl%02x\r\n", ctrl->node_id);
1549 SetEccDQSRdWrPos(ctrl, sysinfo);
1551 print_debug_dqs("\r\ntrain_DqsPos: end ctrl ", ctrl->node_id, 0);
1556 #if K8_REV_F_SUPPORT_F0_F1_WORKAROUND == 1
1557 static void f0_svm_workaround(int controllers, const struct mem_controller *ctrl, tsc_t *tsc0, struct sys_info *sysinfo)
1560 unsigned cpu_f0_f1[8];
1563 print_debug_addr("dqs_timing: tsc1[8] :", tsc1);
1565 for(i = 0; i < controllers; i++) {
1566 if (!sysinfo->ctrl_present[i])
1569 /* Skip everything if I don't have any memory on this controller */
1570 if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
1574 cpu_f0_f1[i] = is_cpu_pre_f2_in_bsp(i);
1576 if(!cpu_f0_f1[i]) continue;
1578 dword = pci_read_config32(ctrl[i].f2, DRAM_CTRL);
1579 dword &= ~DC_DqsRcvEnTrain;
1580 pci_write_config32(ctrl[i].f2, DRAM_CTRL, dword);
1582 dword = pci_read_config32(ctrl[i].f2, DRAM_INIT);
1583 dword |= DI_EnDramInit;
1584 pci_write_config32(ctrl[i].f2, DRAM_INIT, dword);
1585 dword &= ~DI_EnDramInit;
1586 pci_write_config32(ctrl[i].f2, DRAM_INIT, dword);
1589 print_debug_dqs_tsc("begin: tsc1", i, tsc1[i].hi, tsc1[i].lo, 2);
1591 dword = tsc1[i].lo + tsc0[i].lo;
1592 if((dword<tsc1[i].lo) || (dword<tsc0[i].lo)) {
1596 tsc1[i].hi+= tsc0[i].hi;
1598 print_debug_dqs_tsc("end : tsc1", i, tsc1[i].hi, tsc1[i].lo, 2);
1602 for(i = 0; i < controllers; i++) {
1603 if (!sysinfo->ctrl_present[i])
1606 /* Skip everything if I don't have any memory on this controller */
1607 if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
1609 if(!cpu_f0_f1[i]) continue;
1615 } while ((tsc1[i].hi>tsc.hi) || ((tsc1[i].hi==tsc.hi) && (tsc1[i].lo>tsc.lo)));
1617 print_debug_dqs_tsc("end : tsc ", i, tsc.hi, tsc.lo, 2);
1625 /* setting variable mtrr, comes from linux kernel source */
1626 static void set_var_mtrr_dqs(
1627 unsigned int reg, unsigned long basek, unsigned long sizek,
1628 unsigned char type, unsigned address_bits)
1631 unsigned address_mask_high;
1633 address_mask_high = ((1u << (address_bits - 32u)) - 1u);
1635 base.hi = basek >> 22;
1636 base.lo = basek << 10;
1638 if (sizek < 4*1024*1024) {
1639 mask.hi = address_mask_high;
1640 mask.lo = ~((sizek << 10) -1);
1643 mask.hi = address_mask_high & (~((sizek >> 22) -1));
1652 zero.lo = zero.hi = 0;
1653 /* The invalid bit is kept in the mask, so we simply clear the
1654 relevant mask register to disable a range. */
1655 wrmsr (MTRRphysMask_MSR(reg), zero);
1657 /* Bit 32-35 of MTRRphysMask should be set to 1 */
1660 wrmsr (MTRRphysBase_MSR(reg), base);
1661 wrmsr (MTRRphysMask_MSR(reg), mask);
1666 /* fms: find most sigificant bit set, stolen from Linux Kernel Source. */
1667 static inline unsigned int fms(unsigned int x)
1671 __asm__("bsrl %1,%0\n\t"
1674 "1:" : "=r" (r) : "g" (x));
1678 /* fls: find least sigificant bit set */
1679 static inline unsigned int fls(unsigned int x)
1683 __asm__("bsfl %1,%0\n\t"
1686 "1:" : "=r" (r) : "g" (x));
1690 static unsigned int range_to_mtrr(unsigned int reg,
1691 unsigned long range_startk, unsigned long range_sizek,
1692 unsigned long next_range_startk, unsigned char type, unsigned address_bits)
1694 if (!range_sizek || (reg >= 8)) {
1697 while(range_sizek) {
1698 unsigned long max_align, align;
1699 unsigned long sizek;
1700 /* Compute the maximum size I can make a range */
1701 max_align = fls(range_startk);
1702 align = fms(range_sizek);
1703 if (align > max_align) {
1707 #if CONFIG_MEM_TRAIN_SEQ != 1
1708 printk_debug("Setting variable MTRR %d, base: %4dMB, range: %4dMB, type %s\r\n",
1709 reg, range_startk >>10, sizek >> 10,
1710 (type==MTRR_TYPE_UNCACHEABLE)?"UC":
1711 ((type==MTRR_TYPE_WRBACK)?"WB":"Other")
1714 set_var_mtrr_dqs(reg++, range_startk, sizek, type, address_bits);
1715 range_startk += sizek;
1716 range_sizek -= sizek;
1723 static void set_top_mem_ap(unsigned tom_k, unsigned tom2_k)
1727 /* Now set top of memory */
1728 msr.lo = (tom2_k & 0x003fffff) << 10;
1729 msr.hi = (tom2_k & 0xffc00000) >> 22;
1730 wrmsr(TOP_MEM2, msr);
1732 msr.lo = (tom_k & 0x003fffff) << 10;
1733 msr.hi = (tom_k & 0xffc00000) >> 22;
1734 wrmsr(TOP_MEM, msr);
1737 static void setup_mtrr_dqs(unsigned tom_k, unsigned tom2_k)
1743 //still enable from cache_as_ram.inc
1744 msr = rdmsr(SYSCFG_MSR);
1745 msr.lo |= SYSCFG_MSR_MtrrFixDramModEn;
1746 wrmsr(SYSCFG_MSR,msr);
1749 //[0,512k), [512k, 640k)
1750 msr.hi = 0x1e1e1e1e;
1756 reg = range_to_mtrr(2, 0, tom_k,4*1024*1024, MTRR_TYPE_WRBACK, 40);
1760 //enable tom2 and type
1761 msr = rdmsr(SYSCFG_MSR);
1762 msr.lo |= (1<<21) | (1<<22); //MtrrTom2En and Tom2ForceMemTypeWB
1763 wrmsr(SYSCFG_MSR, msr);
1768 static void clear_mtrr_dqs(unsigned tom2_k)
1773 //still enable from cache_as_ram.inc
1774 msr = rdmsr(SYSCFG_MSR);
1775 msr.lo |= SYSCFG_MSR_MtrrFixDramModEn;
1776 wrmsr(SYSCFG_MSR,msr);
1778 //[0,512k), [512k, 640k)
1785 for(i=0x204;i<0x210;i++) {
1791 //enable tom2 and type
1792 msr = rdmsr(SYSCFG_MSR);
1793 msr.lo &= ~((1<<21) | (1<<22)); //MtrrTom2En and Tom2ForceMemTypeWB
1794 wrmsr(SYSCFG_MSR, msr);
1798 static void set_htic_bit(unsigned i, unsigned val, unsigned bit)
1801 dword = pci_read_config32(PCI_DEV(0, 0x18+i, 0), HT_INIT_CONTROL);
1803 dword |= ((val & 1) <<bit);
1804 pci_write_config32(PCI_DEV(0, 0x18+i, 0), HT_INIT_CONTROL, dword);
1808 static unsigned get_htic_bit(unsigned i, unsigned bit)
1811 dword = pci_read_config32(PCI_DEV(0, 0x18+i, 0), HT_INIT_CONTROL);
1816 static void wait_till_sysinfo_in_ram(void)
1819 if(get_htic_bit(0, 9)) return;
1823 static void set_sysinfo_in_ram(unsigned val)
1825 set_htic_bit(0, val, 9);
1828 #ifdef S3_NVRAM_EARLY
1829 int s3_save_nvram_early(u32 dword, int size, int nvram_pos);
1830 int s3_load_nvram_early(int size, u32 *old_dword, int nvram_pos);
1832 int s3_save_nvram_early(u32 dword, int size, int nvram_pos)
1837 int s3_load_nvram_early(int size, u32 *old_dword, int nvram_pos)
1839 die("No memory NVRAM loader for DQS data! Unable to restore memory state\n");
1841 return nvram_pos; /* Make GCC happy */
1845 static int save_index_to_pos(unsigned int dev, int size, int index, int nvram_pos)
1847 u32 dword = pci_read_config32_index_wait(dev, 0x98, index);
1849 return s3_save_nvram_early(dword, size, nvram_pos);
1852 static int load_index_to_pos(unsigned int dev, int size, int index, int nvram_pos)
1855 u32 old_dword = pci_read_config32_index_wait(dev, 0x98, index);
1856 nvram_pos = s3_load_nvram_early(size, &old_dword, nvram_pos);
1857 pci_write_config32_index_wait(dev, 0x98, index, old_dword);
1861 static int dqs_load_MC_NVRAM_ch(unsigned int dev, int ch, int pos)
1863 /* 30 bytes per channel */
1865 pos = load_index_to_pos(dev, 4, 0x00 + ch, pos);
1866 pos = load_index_to_pos(dev, 4, 0x01 + ch, pos);
1867 pos = load_index_to_pos(dev, 4, 0x02 + ch, pos);
1868 pos = load_index_to_pos(dev, 1, 0x03 + ch, pos);
1869 pos = load_index_to_pos(dev, 4, 0x04 + ch, pos);
1870 pos = load_index_to_pos(dev, 4, 0x05 + ch, pos);
1871 pos = load_index_to_pos(dev, 4, 0x06 + ch, pos);
1872 pos = load_index_to_pos(dev, 1, 0x07 + ch, pos);
1873 pos = load_index_to_pos(dev, 1, 0x10 + ch, pos);
1874 pos = load_index_to_pos(dev, 1, 0x13 + ch, pos);
1875 pos = load_index_to_pos(dev, 1, 0x16 + ch, pos);
1876 pos = load_index_to_pos(dev, 1, 0x19 + ch, pos);
1880 static int dqs_save_MC_NVRAM_ch(unsigned int dev, int ch, int pos)
1882 /* 30 bytes per channel */
1884 pos = save_index_to_pos(dev, 4, 0x00 + ch, pos);
1885 pos = save_index_to_pos(dev, 4, 0x01 + ch, pos);
1886 pos = save_index_to_pos(dev, 4, 0x02 + ch, pos);
1887 pos = save_index_to_pos(dev, 1, 0x03 + ch, pos);
1888 pos = save_index_to_pos(dev, 4, 0x04 + ch, pos);
1889 pos = save_index_to_pos(dev, 4, 0x05 + ch, pos);
1890 pos = save_index_to_pos(dev, 4, 0x06 + ch, pos);
1891 pos = save_index_to_pos(dev, 1, 0x07 + ch, pos);
1892 pos = save_index_to_pos(dev, 1, 0x10 + ch, pos);
1893 pos = save_index_to_pos(dev, 1, 0x13 + ch, pos);
1894 pos = save_index_to_pos(dev, 1, 0x16 + ch, pos);
1895 pos = save_index_to_pos(dev, 1, 0x19 + ch, pos);
1899 static void dqs_save_MC_NVRAM(unsigned int dev)
1903 printk_debug("DQS SAVE NVRAM: %x\n", dev);
1904 pos = dqs_save_MC_NVRAM_ch(dev, 0, pos);
1905 pos = dqs_save_MC_NVRAM_ch(dev, 1, pos);
1906 /* save the maxasync lat here */
1907 reg = pci_read_config32(dev, DRAM_CONFIG_HIGH);
1908 pos = s3_save_nvram_early(reg, 4, pos);
1911 static void dqs_restore_MC_NVRAM(unsigned int dev)
1916 printk_debug("DQS RESTORE FROM NVRAM: %x\n", dev);
1917 pos = dqs_load_MC_NVRAM_ch(dev, 0, pos);
1918 pos = dqs_load_MC_NVRAM_ch(dev, 1, pos);
1919 /* load the maxasync lat here */
1920 pos = s3_load_nvram_early(4, ®, pos);
1921 reg &= (DCH_MaxAsyncLat_MASK <<DCH_MaxAsyncLat_SHIFT);
1922 reg |= pci_read_config32(dev, DRAM_CONFIG_HIGH);
1923 pci_write_config32(dev, DRAM_CONFIG_HIGH, reg);
1926 #if CONFIG_MEM_TRAIN_SEQ == 0
1927 #if K8_REV_F_SUPPORT_F0_F1_WORKAROUND == 1
1928 static void dqs_timing(int controllers, const struct mem_controller *ctrl, tsc_t *tsc0, struct sys_info *sysinfo)
1930 static void dqs_timing(int controllers, const struct mem_controller *ctrl, struct sys_info *sysinfo)
1937 //need to enable mtrr, so dqs training could access the test address
1938 setup_mtrr_dqs(sysinfo->tom_k, sysinfo->tom2_k);
1940 for(i = 0; i < controllers; i++) {
1941 if (!sysinfo->ctrl_present[ i ])
1944 /* Skip everything if I don't have any memory on this controller */
1945 if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
1947 fill_mem_cs_sysinfo(i, ctrl+i, sysinfo);
1951 for(i = 0; i < controllers; i++) {
1952 if (!sysinfo->ctrl_present[ i ])
1955 /* Skip everything if I don't have any memory on this controller */
1956 if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
1958 printk_debug("DQS Training:RcvrEn:Pass1: %02x\n", i);
1959 if(train_DqsRcvrEn(ctrl+i, 1, sysinfo)) goto out;
1960 printk_debug(" done\r\n");
1964 #if K8_REV_F_SUPPORT_F0_F1_WORKAROUND == 1
1965 f0_svm_workaround(controllers, ctrl, tsc0, sysinfo);
1969 for(i = 0; i < controllers; i++) {
1970 if (!sysinfo->ctrl_present[i])
1973 /* Skip everything if I don't have any memory on this controller */
1974 if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
1976 printk_debug("DQS Training:DQSPos: %02x\n", i);
1977 if(train_DqsPos(ctrl+i, sysinfo)) goto out;
1978 printk_debug(" done\r\n");
1982 for(i = 0; i < controllers; i++) {
1983 if (!sysinfo->ctrl_present[i])
1986 /* Skip everything if I don't have any memory on this controller */
1987 if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
1989 printk_debug("DQS Training:RcvrEn:Pass2: %02x\n", i);
1990 if(train_DqsRcvrEn(ctrl+i, 2, sysinfo)) goto out;
1991 printk_debug(" done\r\n");
1992 sysinfo->mem_trained[i]=1;
1993 dqs_save_MC_NVRAM((ctrl+i)->f2);
1998 clear_mtrr_dqs(sysinfo->tom2_k);
2002 print_debug_dqs_tsc_x("DQS Training:tsc", i, tsc[i].hi, tsc[i].lo);
2012 #if CONFIG_MEM_TRAIN_SEQ > 0
2014 static void dqs_timing(int i, const struct mem_controller *ctrl, struct sys_info *sysinfo, unsigned v)
2021 if(sysinfo->mem_trained[i] != 0x80) return;
2023 #if CONFIG_MEM_TRAIN_SEQ == 1
2024 //need to enable mtrr, so dqs training could access the test address
2025 setup_mtrr_dqs(sysinfo->tom_k, sysinfo->tom2_k);
2028 fill_mem_cs_sysinfo(i, ctrl, sysinfo);
2033 printk_debug("set DQS timing:RcvrEn:Pass1: %02x\n", i);
2035 if(train_DqsRcvrEn(ctrl, 1, sysinfo)) {
2036 sysinfo->mem_trained[i]=0x81; //
2041 printk_debug(" done\r\n");
2043 printk_debug("set DQS timing:DQSPos: %02x\n", i);
2046 if(train_DqsPos(ctrl, sysinfo)) {
2047 sysinfo->mem_trained[i]=0x82; //
2052 printk_debug(" done\r\n");
2055 printk_debug("set DQS timing:RcvrEn:Pass2: %02x\n", i);
2057 if(train_DqsRcvrEn(ctrl, 2, sysinfo)){
2058 sysinfo->mem_trained[i]=0x83; //
2063 printk_debug(" done\r\n");
2069 #if CONFIG_MEM_TRAIN_SEQ == 1
2070 clear_mtrr_dqs(sysinfo->tom2_k);
2074 for(ii=0;ii<4;ii++) {
2075 print_debug_dqs_tsc_x("Total DQS Training : tsc ", ii, tsc[ii].hi, tsc[ii].lo);
2079 if(sysinfo->mem_trained[i] == 0x80) {
2080 sysinfo->mem_trained[i]=1;
2086 #if CONFIG_MEM_TRAIN_SEQ == 1
2087 static void train_ram(unsigned nodeid, struct sys_info *sysinfo, struct sys_info *sysinfox)
2089 dqs_timing(nodeid, &sysinfo->ctrl[nodeid], sysinfo, 0); // keep the output tidy
2090 // memcpy(&sysinfox->dqs_rcvr_dly_a[nodeid * 2 * 8],&sysinfo->dqs_rcvr_dly_a[nodeid * 2 * 8], 2*8);
2091 // memcpy(&sysinfox->dqs_delay_a[nodeid * 2 * 2 * 9], &sysinfo->dqs_delay_a[nodeid * 2 * 2 * 9], 2 * 2 * 9);
2092 sysinfox->mem_trained[nodeid] = sysinfo->mem_trained[nodeid];
2095 static void copy_and_run_ap_code_in_car(unsigned ret_addr);
2096 static inline void train_ram_on_node(unsigned nodeid, unsigned coreid, struct sys_info *sysinfo, unsigned retcall)
2098 if(coreid) return; // only do it on core0
2099 struct sys_info *sysinfox = (void*)((CONFIG_RAMTOP) - CONFIG_DCACHE_RAM_GLOBAL_VAR_SIZE);
2100 wait_till_sysinfo_in_ram(); // use pci to get it
2102 if(sysinfox->mem_trained[nodeid] == 0x80) {
2104 sysinfo->tom_k = sysinfox->tom_k;
2105 sysinfo->tom2_k = sysinfox->tom2_k;
2106 sysinfo->meminfo[nodeid].is_Width128 = sysinfox->meminfo[nodeid].is_Width128;
2107 sysinfo->mem_trained[nodeid] = sysinfox->mem_trained[nodeid];
2108 memcpy(&sysinfo->ctrl[nodeid], &sysinfox->ctrl[nodeid], sizeof(struct mem_controller));
2110 memcpy(sysinfo, sysinfox, CONFIG_DCACHE_RAM_GLOBAL_VAR_SIZE);
2112 set_top_mem_ap(sysinfo->tom_k, sysinfo->tom2_k); // keep the ap's tom consistent with bsp's
2113 #if CONFIG_AP_CODE_IN_CAR == 0
2114 printk_debug("CODE IN ROM AND RUN ON NODE: %02x\n", nodeid);
2115 train_ram(nodeid, sysinfo, sysinfox);
2117 /* Can copy dqs_timing to ap cache and run from cache?
2118 * we need coreboot_ap_car.rom? and treat it as coreboot_ram.rom for ap ?
2120 copy_and_run_ap_code_in_car(retcall);
2121 // will go back by jump