2 * This file is part of the coreboot project.
4 * Copyright (C) 2005 YingHai Lu
5 * Copyright (C) 2008 Advanced Micro Devices, Inc.
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; version 2 of the License.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 #include <arch/stages.h>
23 //0: mean no debug info
24 #define DQS_TRAIN_DEBUG 0
26 static inline void print_debug_dqs(const char *str, unsigned val, unsigned level)
28 #if DQS_TRAIN_DEBUG > 0
29 if(DQS_TRAIN_DEBUG > level) {
30 printk(BIOS_DEBUG, "%s%x\n", str, val);
35 static inline void print_debug_dqs_pair(const char *str, unsigned val, const char *str2, unsigned val2, unsigned level)
37 #if DQS_TRAIN_DEBUG > 0
38 if(DQS_TRAIN_DEBUG > level) {
39 printk(BIOS_DEBUG, "%s%08x%s%08x\n", str, val, str2, val2);
44 static inline void print_debug_dqs_tsc(const char *str, unsigned i, unsigned val, unsigned val2, unsigned level)
46 #if DQS_TRAIN_DEBUG > 0
47 if(DQS_TRAIN_DEBUG > level) {
48 printk(BIOS_DEBUG, "%s[%02x]=%08x%08x\n", str, i, val, val2);
53 static inline void print_debug_dqs_tsc_x(const char *str, unsigned i, unsigned val, unsigned val2)
55 printk(BIOS_DEBUG, "%s[%02x]=%08x%08x\n", str, i, val, val2);
59 static void fill_mem_cs_sysinfo(unsigned nodeid, const struct mem_controller *ctrl, struct sys_info *sysinfo)
63 sysinfo->mem_base[nodeid] = pci_read_config32(ctrl->f1, 0x40 + (nodeid<<3));
66 sysinfo->cs_base[nodeid*8+i] = pci_read_config32(ctrl->f2, 0x40 + (i<<2));
69 sysinfo->hole_reg[nodeid] = pci_read_config32(ctrl->f1, 0xf0);
72 static unsigned Get_MCTSysAddr(const struct mem_controller *ctrl, unsigned cs_idx, struct sys_info *sysinfo)
76 unsigned nodeid = ctrl->node_id;
78 #if CONFIG_HW_MEM_HOLE_SIZEK != 0
82 //get the local base addr of the chipselect
83 dword = sysinfo->cs_base[nodeid * 8 + cs_idx];
86 //sys addr= node base + local cs base
87 mem_base = sysinfo->mem_base[nodeid];
88 mem_base &= 0xffff0000;
91 #if CONFIG_HW_MEM_HOLE_SIZEK != 0
92 hole_reg = sysinfo->hole_reg[nodeid];
95 hole_startk = (hole_reg & (0xff<<24)) >> 10;
96 if( (dword >= (hole_startk<<2)) && (dword < ((4*1024*1024)<<2))) {
97 dword += ((4*1024*1024 - hole_startk)<<2);
102 //add 1MB offset to avoid compat area
103 dword += (1<<(20-8));
105 //So final result is upper 32 bit addr
111 static unsigned Get_RcvrSysAddr(const struct mem_controller * ctrl, unsigned channel, unsigned cs_idx, struct sys_info *sysinfo)
113 return Get_MCTSysAddr(ctrl, cs_idx, sysinfo);
117 static inline unsigned long read_cr4(void)
120 asm volatile ("movl %%cr4, %0" : "=r" (cr4));
124 static inline void write_cr4(unsigned long cr4)
126 asm volatile ("movl %0, %%cr4" : : "r" (cr4));
130 static inline void enable_sse2(void)
138 static inline void disable_sse2(void)
147 static void set_wrap32dis(void) {
150 msr = rdmsr(0xc0010015);
153 wrmsr(0xc0010015, msr);
157 static void clear_wrap32dis(void) {
160 msr = rdmsr(0xc0010015);
163 wrmsr(0xc0010015, msr);
167 static void set_FSBASE(uint32_t addr_hi)
171 //set fs and use fs prefix to access the mem
174 wrmsr(0xc0000100, msr); //FS_BASE
178 static unsigned ChipSelPresent(const struct mem_controller *ctrl, unsigned cs_idx, struct sys_info *sysinfo)
181 unsigned nodeid = ctrl->node_id;
184 enabled = sysinfo->cs_base[nodeid * 8 + cs_idx];
191 static unsigned RcvrRankEnabled(const struct mem_controller *ctrl, int channel, int cs_idx, unsigned is_Width128, struct sys_info *sysinfo)
193 return ChipSelPresent(ctrl, cs_idx, sysinfo);
196 static void WriteLNTestPattern(unsigned addr_lo, uint8_t *buf_a, unsigned line_num)
200 "movdqa (%3), %%xmm0\n\t"
201 "movntdq %%xmm0, %%fs:(%0)\n\t" /* xmm0 is 128 bit */
206 :: "a" (addr_lo), "d" (16), "c" (line_num * 4), "b"(buf_a)
212 static void Write1LTestPattern(unsigned addr, unsigned p, uint8_t *buf_a, uint8_t *buf_b)
215 if(p==1) { buf = buf_b; }
216 else { buf = buf_a; }
218 set_FSBASE (addr>>24);
220 WriteLNTestPattern(addr<<8, buf, 1);
223 static void Read1LTestPattern(unsigned addr)
227 set_FSBASE(addr>>24);
229 /* 1st move causes read fill (to exclusive or shared)*/
231 "movl %%fs:(%1), %0\n\t"
232 :"=b"(value): "a" (addr<<8)
240 #define DQS_FIRST_PASS 1
241 #define DQS_SECOND_PASS 2
243 #define SB_NORCVREN 11
244 #define RCVREN_MARGIN 6
245 #define SB_SmallRCVR 13
246 #define SB_CHA2BRCVREN 12
247 #define SB_NODQSPOS 14
248 #define MIN_DQS_WNDW 3
249 #define SB_SMALLDQS 15
252 static unsigned CompareTestPatternQW0(unsigned channel, unsigned addr, unsigned pattern, const uint32_t *TestPattern0, const uint32_t *TestPattern1, const uint32_t *TestPattern2, unsigned Pass, unsigned is_Width128)
258 unsigned result = DQS_FAIL;
260 if(Pass == DQS_FIRST_PASS) {
262 test_buf = (uint32_t *)TestPattern1;
265 test_buf = (uint32_t *)TestPattern0;
269 test_buf = (uint32_t *)TestPattern2;
272 set_FSBASE(addr>>24);
276 if(is_Width128 && (channel == 1)) {
277 addr_lo += 8; //second channel
282 "movl %%fs:(%1), %0\n\t"
283 :"=b"(value): "a" (addr_lo)
286 value_test = *test_buf;
289 print_debug_dqs_pair("\t\t\t\t\t\tQW0.lo : test_buf= ", (unsigned)test_buf, " value = ", value_test, 4);
290 print_debug_dqs_pair("\t\t\t\t\t\tQW0.lo : addr_lo = ", addr_lo, " value = ", value, 4);
292 if(value == value_test) {
296 "movl %%fs:(%1), %0\n\t"
297 :"=b"(value): "a" (addr_lo)
299 value_test = *test_buf;
300 print_debug_dqs_pair("\t\t\t\t\t\tQW0.hi : test_buf= ", (unsigned)test_buf, " value = ", value_test, 4);
301 print_debug_dqs_pair("\t\t\t\t\t\tQW0.hi : addr_lo = ", addr_lo, " value = ", value, 4);
303 if(value == value_test){
308 if(Pass == DQS_SECOND_PASS) { // second pass need to be inverted
309 if(result==DQS_PASS) {
321 static void SetMaxAL_RcvrDly(const struct mem_controller *ctrl, unsigned dly)
325 dly += (20-1); // round it
326 dly /= 20; // convert from unit 50ps to 1ns
331 reg = pci_read_config32(ctrl->f2, DRAM_CONFIG_HIGH);
332 reg &= ~(DCH_MaxAsyncLat_MASK <<DCH_MaxAsyncLat_SHIFT);
333 reg |= ((dly - DCH_MaxAsyncLat_BASE) << DCH_MaxAsyncLat_SHIFT);
334 pci_write_config32(ctrl->f2, DRAM_CONFIG_HIGH, reg);
339 Set the Target range to WT IO (using an IORR overlapping the already existing
340 WB dram type). Use IORR0
342 static void SetTargetWTIO(unsigned addr)
347 wrmsr(0xc0010016, msr); //IORR0 BASE
350 msr.lo = 0xfc000800; // 64MB Mask
351 wrmsr(0xc0010017, msr); // IORR0 Mask
354 static void ResetTargetWTIO(void)
360 wrmsr(0xc0010017, msr); // IORR0 Mask
363 static void proc_CLFLUSH(unsigned addr)
366 set_FSBASE(addr>>24);
368 /* 1st move causes read fill (to exclusive or shared)*/
370 /* clflush fs:[eax] */
371 "clflush %%fs:(%0)\n\t"
376 static void proc_IOCLFLUSH(unsigned addr)
383 static void ResetDCTWrPtr(const struct mem_controller *ctrl)
386 unsigned index = 0x10;
388 dword = pci_read_config32_index_wait(ctrl->f2, 0x98, index);
389 pci_write_config32_index_wait(ctrl->f2, 0x98, index, dword);
392 dword = pci_read_config32_index_wait(ctrl->f2, 0x98, index);
393 pci_write_config32_index_wait(ctrl->f2, 0x98, index, dword);
398 static uint16_t get_exact_T1000(unsigned i)
401 static const uint16_t T1000_a[]= { 5000, 3759, 3003, 2500 };
403 static const uint16_t TT_a[] = {
405 /*4 */ 6250, 6250, 6250, 6250,
406 /*5 */ 5000, 5000, 5000, 2500,
407 /*6 */ 5000, 4166, 4166, 2500,
408 /*7 */ 5000, 4285, 3571, 2500,
410 /*8 */ 5000, 3750, 3125, 2500,
411 /*9 */ 5000, 3888, 3333, 2500,
412 /*10*/ 5000, 4000, 3000, 2500,
413 /*11*/ 5000, 4090, 3181, 2500,
415 /*12*/ 5000, 3750, 3333, 2500,
416 /*13*/ 5000, 3846, 3076, 2500,
417 /*14*/ 5000, 3928, 3214, 2500,
418 /*15*/ 5000, 4000, 3000, 2500,
424 /* Check for FID control support */
425 struct cpuid_result cpuid1;
426 cpuid1 = cpuid(0x80000007);
427 if( cpuid1.edx & 0x02 ) {
428 /* Use current FID */
430 msr = rdmsr(0xc0010042);
431 fid_cur = msr.lo & 0x3f;
435 /* Use startup FID */
437 msr = rdmsr(0xc0010015);
438 fid_start = (msr.lo & (0x3f << 24));
440 index = fid_start>>25;
443 if(index>12) return T1000_a[i];
445 return TT_a[index * 4+i];
449 static void InitDQSPos4RcvrEn(const struct mem_controller *ctrl)
455 for(i=1; i<=3; i++) {
456 /* Program the DQS Write Timing Control Registers (Function 2:Offset 0x9c, index 0x01-0x03, 0x21-0x23) to 0x00 for all bytes */
457 pci_write_config32_index_wait(ctrl->f2, 0x98, i, dword);
458 pci_write_config32_index_wait(ctrl->f2, 0x98, i+0x20, dword);
462 for(i=5; i<=7; i++) {
463 /* Program the DQS Write Timing Control Registers (Function 2:Offset 0x9c, index 0x05-0x07, 0x25-0x27) to 0x2f for all bytes */
464 pci_write_config32_index_wait(ctrl->f2, 0x98, i, dword);
465 pci_write_config32_index_wait(ctrl->f2, 0x98, i+0x20, dword);
470 #ifndef K8_REV_F_SUPPORT_F0_F1_WORKAROUND
471 #define K8_REV_F_SUPPORT_F0_F1_WORKAROUND 1
474 static unsigned TrainRcvrEn(const struct mem_controller *ctrl, unsigned Pass, struct sys_info *sysinfo)
477 static const uint32_t TestPattern0[] = {
478 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
479 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
480 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
481 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
483 static const uint32_t TestPattern1[] = {
484 0x55555555, 0x55555555, 0x55555555, 0x55555555,
485 0x55555555, 0x55555555, 0x55555555, 0x55555555,
486 0x55555555, 0x55555555, 0x55555555, 0x55555555,
487 0x55555555, 0x55555555, 0x55555555, 0x55555555,
489 static const uint32_t TestPattern2[] = {
490 0x12345678, 0x87654321, 0x23456789, 0x98765432,
491 0x59385824, 0x30496724, 0x24490795, 0x99938733,
492 0x40385642, 0x38465245, 0x29432163, 0x05067894,
493 0x12349045, 0x98723467, 0x12387634, 0x34587623,
496 uint8_t pattern_buf_x[64 * 4 + 16]; // We need to two cache line So have more 16 bytes to keep 16 byte alignment */
497 uint8_t *buf_a, *buf_b;
500 uint8_t *dqs_rcvr_dly_a = &sysinfo->dqs_rcvr_dly_a[ctrl->node_id * 2* 8] ; //8 node, channel 2, receiver 8
504 unsigned channel, receiver;
507 unsigned CTLRMaxDelay;
512 unsigned Test0, Test1;
514 unsigned RcvrEnDlyRmin;
522 unsigned TestAddr0, TestAddr0B, TestAddr1 = 0, TestAddr1B = 0;
524 unsigned CurrRcvrCHADelay = 0;
528 unsigned is_Width128 = sysinfo->meminfo[ctrl->node_id].is_Width128;
530 #if K8_REV_F_SUPPORT_F0_F1_WORKAROUND == 1
531 unsigned cpu_f0_f1 = 0;
534 if(Pass == DQS_FIRST_PASS) {
535 InitDQSPos4RcvrEn(ctrl);
545 dword = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
546 ecc_bit = dword & DCL_DimmEccEn;
547 dword &= ~(DCL_DimmEccEn);
548 pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dword);
551 if(Pass == DQS_FIRST_PASS) {
552 #if K8_REV_F_SUPPORT_F0_F1_WORKAROUND == 1
553 cpu_f0_f1 = is_cpu_pre_f2_in_bsp(ctrl->node_id);
558 /* Set the DqsRcvEnTrain bit */
559 dword = pci_read_config32(ctrl->f2, DRAM_CTRL);
560 dword |= DC_DqsRcvEnTrain;
561 pci_write_config32(ctrl->f2, DRAM_CTRL, dword);
566 //get T1000 figures (cycle time (ns)) * 1K
567 dword = pci_read_config32(ctrl->f2, DRAM_CONFIG_HIGH);
568 dword &= DCH_MemClkFreq_MASK;
570 T1000 = get_exact_T1000(dword);
573 buf_a = (uint8_t *)(((uint32_t)(&pattern_buf_x[0]) + 0x10) & (0xfffffff0));
574 buf_b = buf_a + 128; //??
575 if(Pass==DQS_FIRST_PASS) {
577 *((uint32_t *)(buf_a + i*4)) = TestPattern0[i];
578 *((uint32_t *)(buf_b + i*4)) = TestPattern1[i];
583 *((uint32_t *)(buf_a + i*4)) = TestPattern2[i];
584 *((uint32_t *)(buf_b + i*4)) = TestPattern2[i];
588 print_debug_dqs("\nTrainRcvEn: 0 ctrl", ctrl->node_id, 0);
590 print_debug_addr("TrainRcvEn: buf_a:", buf_a);
593 /* for each channel */
597 if (!(sysinfo->meminfo[ctrl->node_id].dimm_mask & 0x0F) &&
598 (sysinfo->meminfo[ctrl->node_id].dimm_mask & 0xF0)) { /* channelB only? */
602 for ( ; (channel < 2) && (!Errors); channel++)
604 print_debug_dqs("\tTrainRcvEn51: channel ",channel, 1);
607 /* there are four recriver pairs, loosely associated with CS */
608 for( receiver = 0; (receiver < 8) && (!Errors); receiver+=2)
611 unsigned index=(receiver>>1) * 3 + 0x10;
613 print_debug_dqs("\t\tTrainRcvEn52: index ", index, 2);
617 dword = pci_read_config32_index_wait(ctrl->f2, 0x98, index);
618 CurrRcvrCHADelay= dword & 0xff;
628 RcvrEnDlyRmin = 0xaf;
630 if(!RcvrRankEnabled(ctrl, channel, receiver, is_Width128, sysinfo)) continue;
632 /* for each DQS receiver enable setting */
634 TestAddr0 = Get_RcvrSysAddr(ctrl, channel, receiver, sysinfo);
636 TestAddr0B = TestAddr0 + (1<<(20+2-8)); // 4MB
638 if(RcvrRankEnabled(ctrl, channel, receiver+1, is_Width128, sysinfo)) {
639 TestAddr1 = Get_RcvrSysAddr(ctrl, channel, receiver+1, sysinfo);
640 TestAddr1B = TestAddr1 + (1<<(20+2-8)); //4MB
647 print_debug_dqs("\t\tTrainRcvEn53: TestAddr0B ", TestAddr0B, 2);
649 Write1LTestPattern(TestAddr0, 0, buf_a, buf_b); // rank0 of dimm, test p0
650 Write1LTestPattern(TestAddr0B, 1, buf_a, buf_b); //rank0 of dimm, test p1
653 Write1LTestPattern(TestAddr1, 0, buf_a, buf_b); //rank 1 of dimm
654 Write1LTestPattern(TestAddr1B, 1, buf_a, buf_b);//rank 1 of dimm
657 if(Pass == DQS_FIRST_PASS) {
660 RcvrEnDly = dqs_rcvr_dly_a[channel * 8 + receiver];
663 while ( RcvrEnDly < 0xaf) { // Sweep Delay value here
664 print_debug_dqs("\t\t\tTrainRcvEn541: RcvrEnDly ", RcvrEnDly, 3);
667 /* Odd steps get another pattern such that even
668 and odd steps alternate.
669 The pointers to the patterns will be swapped
670 at the end of the loop so they are correspond
681 /* Program current Receiver enable delay */
682 pci_write_config32_index_wait(ctrl->f2, 0x98, index, RcvrEnDly);
683 /* FIXME: 64bit MUX */
686 /* Program current Receiver enable delay chaannel b */
687 pci_write_config32_index_wait(ctrl->f2, 0x98, index+ 0x20, RcvrEnDly);
690 /* Program the MaxAsyncLat filed with the
691 current DQS receiver enable setting plus 6ns
693 /*Porgram MaxAsyncLat to correspond with current delay */
694 SetMaxAL_RcvrDly(ctrl, RcvrEnDly);
698 Read1LTestPattern(TestAddr0); //Cache Fill
699 /* ROM vs cache compare */
700 Test0 = CompareTestPatternQW0(channel, TestAddr0, PatternA, TestPattern0, TestPattern1, TestPattern2, Pass, is_Width128);
701 proc_IOCLFLUSH(TestAddr0);
705 print_debug_dqs("\t\t\tTrainRcvEn542: Test0 ", Test0, 3);
707 if(Test0 == DQS_PASS) {
709 Read1LTestPattern(TestAddr0B);
710 Test1 = CompareTestPatternQW0(channel, TestAddr0B, PatternB, TestPattern0, TestPattern1, TestPattern2, Pass, is_Width128);
711 proc_IOCLFLUSH(TestAddr0B);
715 print_debug_dqs("\t\t\tTrainRcvEn543: Test1 ", Test1, 3);
717 if(Test1 == DQS_PASS) {
719 Read1LTestPattern(TestAddr1);
720 Test0 = CompareTestPatternQW0(channel, TestAddr1, PatternA, TestPattern0, TestPattern1, TestPattern2, Pass, is_Width128);
721 proc_IOCLFLUSH(TestAddr1);
724 if(Test0 == DQS_PASS) {
725 Read1LTestPattern(TestAddr1B);
726 Test1 = CompareTestPatternQW0(channel, TestAddr1B, PatternB, TestPattern0, TestPattern1, TestPattern2, Pass, is_Width128);
727 proc_IOCLFLUSH(TestAddr1B);
730 if(Test1 == DQS_PASS) {
734 print_debug_dqs("\t\t\tTrainRcvEn544: Test0 ", Test0, 3);
742 print_debug_dqs("\t\t\tTrainRcvEn55: RcvrEnDly ", RcvrEnDly, 3);
744 if(CurrTest == DQS_PASS) {
745 if(LastTest == DQS_FAIL) {
746 RcvrEnDlyRmin = RcvrEnDly;
753 /* swap the rank 0 pointers */
755 TestAddr0 = TestAddr0B;
758 /* swap the rank 1 pointers */
760 TestAddr1 = TestAddr1B;
763 print_debug_dqs("\t\t\tTrainRcvEn56: RcvrEnDly ", RcvrEnDly, 3);
769 print_debug_dqs("\t\tTrainRcvEn61: RcvrEnDly ", RcvrEnDly, 2);
771 if(RcvrEnDlyRmin == 0xaf) {
773 Errors |= SB_NORCVREN;
776 if(Pass == DQS_FIRST_PASS) {
777 // We need a better value for DQSPos trainning
778 RcvrEnDly = RcvrEnDlyRmin /* + RCVREN_MARGIN * T1000/64/50 */;
780 RcvrEnDly = RcvrEnDlyRmin;
783 if(RcvrEnDly > 0xae) {
784 //passing window too narrow, too far delayed
785 Errors |= SB_SmallRCVR;
789 if(Pass == DQS_SECOND_PASS) { //second pass must average vales
790 RcvrEnDly += dqs_rcvr_dly_a[channel * 8 + receiver] /* - (RCVREN_MARGIN * T1000/64/50)*/;
794 dqs_rcvr_dly_a[channel * 8 + receiver] = RcvrEnDly;
796 //Set final RcvrEnDly for this DIMM and Channel
797 pci_write_config32_index_wait(ctrl->f2, 0x98, index, RcvrEnDly);
800 pci_write_config32_index_wait(ctrl->f2, 0x98, index+0x20, RcvrEnDly); // channel B
802 pci_write_config32_index_wait(ctrl->f2, 0x98, index, CurrRcvrCHADelay);
803 if(RcvrEnDly > CurrRcvrCHADelay) {
804 dword = RcvrEnDly - CurrRcvrCHADelay;
807 dword = CurrRcvrCHADelay - RcvrEnDly;
811 Errors |= SB_CHA2BRCVREN;
816 print_debug_dqs("\t\tTrainRcvEn63: RcvrEnDly ", RcvrEnDly, 2);
818 if(RcvrEnDly > CTLRMaxDelay) {
819 CTLRMaxDelay = RcvrEnDly;
822 print_debug_dqs("\t\tTrainRcvEn64: CTLRMaxDelay ", CTLRMaxDelay, 2);
827 print_debug_dqs("\tTrainRcvEn65: CTLRMaxDelay ", CTLRMaxDelay, 1);
829 /* Program the MaxAsysncLat field with the largest DQS Receiver Enable setting */
830 SetMaxAL_RcvrDly(ctrl, CTLRMaxDelay);
834 dword = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
835 dword &= ~(DCL_DimmEccEn);
837 pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dword);
839 if(Pass == DQS_FIRST_PASS) {
840 #if K8_REV_F_SUPPORT_F0_F1_WORKAROUND == 1
844 dword = pci_read_config32(ctrl->f2, DRAM_CTRL);
845 dword &= ~DC_DqsRcvEnTrain;
846 pci_write_config32(ctrl->f2, DRAM_CTRL, dword);
854 //restore SSE2 setting
857 #if CONFIG_MEM_TRAIN_SEQ != 1
858 /* We need tidy output for type 1 */
859 printk(BIOS_DEBUG, " CTLRMaxDelay=%02x\n", CTLRMaxDelay);
862 return (CTLRMaxDelay==0xae)?1:0;
866 #define DQS_READDIR 1
867 #define DQS_WRITEDIR 0
870 static void SetDQSDelayCSR(const struct mem_controller *ctrl, unsigned channel, unsigned bytelane, unsigned direction, unsigned dqs_delay)
871 { //ByteLane could be 0-8, last is for ECC
878 index = (bytelane>>2) + 1 + channel * 0x20 + (direction << 2);
883 shift <<= 3; // 8 bit
885 dword = pci_read_config32_index_wait(ctrl->f2, 0x98, index);
886 dword &= ~(0x3f<<shift);
887 dword |= (dqs_delay<<shift);
888 pci_write_config32_index_wait(ctrl->f2, 0x98, index, dword);
892 static void SetDQSDelayAllCSR(const struct mem_controller *ctrl, unsigned channel, unsigned direction, unsigned dqs_delay)
901 dword |= dqs_delay<<(i*8);
904 index = 1 + channel * 0x20 + direction * 4;
907 pci_write_config32_index_wait(ctrl->f2, 0x98, index + i, dword);
912 static unsigned MiddleDQS(unsigned min_d, unsigned max_d)
915 size_d = max_d-min_d;
916 if(size_d & 1) { //need round up
919 return ( min_d + (size_d>>1));
922 static inline void save_dqs_delay(unsigned channel, unsigned bytelane, unsigned direction, uint8_t *dqs_delay_a, uint8_t dqs_delay)
924 dqs_delay_a[channel * 2*9 + direction * 9 + bytelane] = dqs_delay;
927 static void WriteDQSTestPattern(unsigned addr_lo, unsigned pattern , uint8_t *buf_a)
929 WriteLNTestPattern(addr_lo, buf_a, (pattern+1) * 9);
932 static void ReadL18TestPattern(unsigned addr_lo)
934 //set fs and use fs prefix to access the mem
936 "movl %%fs:-128(%%esi), %%eax\n\t" //TestAddr cache line
937 "movl %%fs:-64(%%esi), %%eax\n\t" //+1
938 "movl %%fs:(%%esi), %%eax\n\t" //+2
939 "movl %%fs:64(%%esi), %%eax\n\t" //+3
941 "movl %%fs:-128(%%edi), %%eax\n\t" //+4
942 "movl %%fs:-64(%%edi), %%eax\n\t" //+5
943 "movl %%fs:(%%edi), %%eax\n\t" //+6
944 "movl %%fs:64(%%edi), %%eax\n\t" //+7
946 "movl %%fs:-128(%%ebx), %%eax\n\t" //+8
947 "movl %%fs:-64(%%ebx), %%eax\n\t" //+9
948 "movl %%fs:(%%ebx), %%eax\n\t" //+10
949 "movl %%fs:64(%%ebx), %%eax\n\t" //+11
951 "movl %%fs:-128(%%ecx), %%eax\n\t" //+12
952 "movl %%fs:-64(%%ecx), %%eax\n\t" //+13
953 "movl %%fs:(%%ecx), %%eax\n\t" //+14
954 "movl %%fs:64(%%ecx), %%eax\n\t" //+15
956 "movl %%fs:-128(%%edx), %%eax\n\t" //+16
957 "movl %%fs:-64(%%edx), %%eax\n\t" //+17
959 :: "a"(0), "b" (addr_lo+128+8*64), "c" (addr_lo+128+12*64), "d" (addr_lo +128+16*64), "S"(addr_lo+128), "D"(addr_lo+128+4*64)
964 static void ReadL9TestPattern(unsigned addr_lo)
967 //set fs and use fs prefix to access the mem
970 "movl %%fs:-128(%%ecx), %%eax\n\t" //TestAddr cache line
971 "movl %%fs:-64(%%ecx), %%eax\n\t" //+1
972 "movl %%fs:(%%ecx), %%eax\n\t" //+2
973 "movl %%fs:64(%%ecx), %%eax\n\t" //+3
975 "movl %%fs:-128(%%edx), %%eax\n\t" //+4
976 "movl %%fs:-64(%%edx), %%eax\n\t" //+5
977 "movl %%fs:(%%edx), %%eax\n\t" //+6
978 "movl %%fs:64(%%edx), %%eax\n\t" //+7
980 "movl %%fs:-128(%%ebx), %%eax\n\t" //+8
982 :: "a"(0), "b" (addr_lo+128+8*64), "c"(addr_lo+128), "d"(addr_lo+128+4*64)
988 static void ReadDQSTestPattern(unsigned addr_lo, unsigned pattern)
991 ReadL9TestPattern(addr_lo);
994 ReadL18TestPattern(addr_lo);
998 static void FlushDQSTestPattern_L9(unsigned addr_lo)
1001 "clflush %%fs:-128(%%ecx)\n\t"
1002 "clflush %%fs:-64(%%ecx)\n\t"
1003 "clflush %%fs:(%%ecx)\n\t"
1004 "clflush %%fs:64(%%ecx)\n\t"
1006 "clflush %%fs:-128(%%eax)\n\t"
1007 "clflush %%fs:-64(%%eax)\n\t"
1008 "clflush %%fs:(%%eax)\n\t"
1009 "clflush %%fs:64(%%eax)\n\t"
1011 "clflush %%fs:-128(%%ebx)\n\t"
1013 :: "b" (addr_lo+128+8*64), "c"(addr_lo+128), "a"(addr_lo+128+4*64)
1017 static __attribute__((noinline)) void FlushDQSTestPattern_L18(unsigned addr_lo)
1020 "clflush %%fs:-128(%%eax)\n\t"
1021 "clflush %%fs:-64(%%eax)\n\t"
1022 "clflush %%fs:(%%eax)\n\t"
1023 "clflush %%fs:64(%%eax)\n\t"
1025 "clflush %%fs:-128(%%edi)\n\t"
1026 "clflush %%fs:-64(%%edi)\n\t"
1027 "clflush %%fs:(%%edi)\n\t"
1028 "clflush %%fs:64(%%edi)\n\t"
1030 "clflush %%fs:-128(%%ebx)\n\t"
1031 "clflush %%fs:-64(%%ebx)\n\t"
1032 "clflush %%fs:(%%ebx)\n\t"
1033 "clflush %%fs:64(%%ebx)\n\t"
1035 "clflush %%fs:-128(%%ecx)\n\t"
1036 "clflush %%fs:-64(%%ecx)\n\t"
1037 "clflush %%fs:(%%ecx)\n\t"
1038 "clflush %%fs:64(%%ecx)\n\t"
1040 "clflush %%fs:-128(%%edx)\n\t"
1041 "clflush %%fs:-64(%%edx)\n\t"
1043 :: "b" (addr_lo+128+8*64), "c" (addr_lo+128+12*64), "d" (addr_lo +128+16*64), "a"(addr_lo+128), "D"(addr_lo+128+4*64)
1047 static void FlushDQSTestPattern(unsigned addr_lo, unsigned pattern )
1051 FlushDQSTestPattern_L9(addr_lo);
1054 FlushDQSTestPattern_L18(addr_lo);
1058 static unsigned CompareDQSTestPattern(unsigned channel, unsigned addr_lo, unsigned pattern, uint8_t *buf_a)
1061 unsigned bitmap = 0xff;
1066 uint32_t value_test;
1068 test_buf = (uint32_t *)buf_a;
1071 if(pattern && channel) {
1072 addr_lo += 8; //second channel
1077 for(i=0;i<9*64/4;i++) {
1079 "movl %%fs:(%1), %0\n\t"
1080 :"=b"(value): "a" (addr_lo)
1082 value_test = *test_buf;
1084 print_debug_dqs_pair("\t\t\t\t\t\ttest_buf= ", (unsigned)test_buf, " value = ", value_test, 7);
1085 print_debug_dqs_pair("\t\t\t\t\t\ttaddr_lo = ",addr_lo, " value = ", value, 7);
1087 for(j=0;j<4*8;j+=8) {
1088 if(((value>>j)&0xff) != ((value_test>>j)& 0xff)) {
1089 bitmap &= ~(1<<bytelane);
1095 print_debug_dqs("\t\t\t\t\t\tbitmap = ", bitmap, 7);
1098 if(pattern == 1) { //dual channel
1099 addr_lo += 8; //skip over other channel's data
1113 static unsigned TrainDQSPos(const struct mem_controller *ctrl, unsigned channel, unsigned Direction, unsigned Pattern, uint8_t *buf_a, uint8_t *dqs_delay_a, struct sys_info *sysinfo)
1117 unsigned BanksPresent;
1119 unsigned MutualCSPassW[48];
1127 unsigned RnkDlyFilterMax, RnkDlyFilterMin = 0;
1128 unsigned RnkDlySeqPassMax, RnkDlySeqPassMin = 0;
1133 print_debug_dqs("\t\t\tTrainDQSPos begin ", 0, 3);
1135 printk(BIOS_DEBUG, "TrainDQSPos: MutualCSPassW[48] :%p\n", MutualCSPassW);
1137 for(DQSDelay=0; DQSDelay<48; DQSDelay++) {
1138 MutualCSPassW[DQSDelay] = 0xff; // Bitmapped status per delay setting, 0xff=All positions passing (1= PASS)
1141 for(ChipSel = 0; ChipSel < 8; ChipSel++) { //logical register chipselects 0..7
1142 print_debug_dqs("\t\t\t\tTrainDQSPos: 11 ChipSel ", ChipSel, 4);
1143 //FIXME: process 64MUXedMode
1144 if(!ChipSelPresent(ctrl, ChipSel, sysinfo)) continue;
1147 TestAddr = Get_MCTSysAddr(ctrl, ChipSel, sysinfo);
1149 print_debug_dqs("\t\t\t\tTrainDQSPos: 12 TestAddr ", TestAddr, 4);
1151 //set fs and use fs prefix to access the mem
1152 set_FSBASE(TestAddr>>24);
1154 if(Direction == DQS_READDIR) {
1155 print_debug_dqs("\t\t\t\tTrainDQSPos: 13 for read so write at first", 0, 4);
1156 WriteDQSTestPattern(TestAddr<<8, Pattern, buf_a);
1159 for(DQSDelay = 0; DQSDelay < 48; DQSDelay++ ){
1160 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 141 DQSDelay ", DQSDelay, 5);
1161 if(MutualCSPassW[DQSDelay] == 0) continue; //skip current delay value if other chipselects have failed all 8 bytelanes
1162 SetDQSDelayAllCSR(ctrl, channel, Direction, DQSDelay);
1163 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 142 MutualCSPassW ", MutualCSPassW[DQSDelay], 5);
1164 if(Direction == DQS_WRITEDIR) {
1165 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 143 for write", 0, 5);
1166 WriteDQSTestPattern(TestAddr<<8, Pattern, buf_a);
1168 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 144 Pattern ", Pattern, 5);
1169 ReadDQSTestPattern(TestAddr<<8, Pattern);
1170 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 145 MutualCSPassW ", MutualCSPassW[DQSDelay], 5);
1171 MutualCSPassW[DQSDelay] &= CompareDQSTestPattern(channel, TestAddr<<8, Pattern, buf_a); //0: fail, 1=pass
1172 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 146 MutualCSPassW ", MutualCSPassW[DQSDelay], 5);
1173 SetTargetWTIO(TestAddr);
1174 FlushDQSTestPattern(TestAddr<<8, Pattern);
1180 for(ByteLane = 0; ByteLane < 8; ByteLane++) {
1181 print_debug_dqs("\t\t\t\tTrainDQSPos: 31 ByteLane ",ByteLane, 4);
1183 LastTest = DQS_FAIL;
1184 RnkDlySeqPassMax = 0;
1185 RnkDlyFilterMax = 0;
1186 RnkDlyFilterMin = 0;
1187 for(DQSDelay=0; DQSDelay<48; DQSDelay++) {
1188 if(MutualCSPassW[DQSDelay] & (1<<ByteLane)) {
1190 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 321 DQSDelay ", DQSDelay, 5);
1191 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 322 MutualCSPassW ", MutualCSPassW[DQSDelay], 5);
1193 RnkDlySeqPassMax = DQSDelay;
1194 if(LastTest == DQS_FAIL) {
1195 RnkDlySeqPassMin = DQSDelay; //start sequential run
1197 if((RnkDlySeqPassMax - RnkDlySeqPassMin)>(RnkDlyFilterMax-RnkDlyFilterMin)){
1198 RnkDlyFilterMin = RnkDlySeqPassMin;
1199 RnkDlyFilterMax = RnkDlySeqPassMax;
1201 LastTest = DQS_PASS;
1204 LastTest = DQS_FAIL;
1207 print_debug_dqs("\t\t\t\tTrainDQSPos: 33 RnkDlySeqPassMax ", RnkDlySeqPassMax, 4);
1209 if(RnkDlySeqPassMax == 0) {
1210 Errors |= SB_NODQSPOS; // no passing window
1213 print_debug_dqs("\t\t\t\tTrainDQSPos: 34 RnkDlyFilterMax ", RnkDlyFilterMax, 4);
1214 print_debug_dqs("\t\t\t\tTrainDQSPos: 34 RnkDlyFilterMin ", RnkDlyFilterMin, 4);
1215 if((RnkDlyFilterMax - RnkDlyFilterMin)< MIN_DQS_WNDW){
1216 Errors |= SB_SMALLDQS;
1219 unsigned middle_dqs;
1220 middle_dqs = MiddleDQS(RnkDlyFilterMin, RnkDlyFilterMax);
1221 print_debug_dqs("\t\t\t\tTrainDQSPos: 35 middle_dqs ",middle_dqs, 4);
1222 SetDQSDelayCSR(ctrl, channel, ByteLane, Direction, middle_dqs);
1223 save_dqs_delay(channel, ByteLane, Direction, dqs_delay_a, middle_dqs);
1229 print_debug_dqs("\t\t\tTrainDQSPos: end", 0xff, 3);
1236 static unsigned TrainReadDQS(const struct mem_controller *ctrl, unsigned channel, unsigned pattern, uint8_t *buf_a, uint8_t *dqs_delay_a, struct sys_info *sysinfo)
1238 print_debug_dqs("\t\tTrainReadPos", 0, 2);
1239 return TrainDQSPos(ctrl, channel, DQS_READDIR, pattern, buf_a, dqs_delay_a, sysinfo);
1242 static unsigned TrainWriteDQS(const struct mem_controller *ctrl, unsigned channel, unsigned pattern, uint8_t *buf_a, uint8_t *dqs_delay_a, struct sys_info *sysinfo)
1244 print_debug_dqs("\t\tTrainWritePos", 0, 2);
1245 return TrainDQSPos(ctrl, channel, DQS_WRITEDIR, pattern, buf_a, dqs_delay_a, sysinfo);
1250 static unsigned TrainDQSRdWrPos(const struct mem_controller *ctrl, struct sys_info *sysinfo)
1252 static const uint32_t TestPatternJD1a[] = {
1253 0x00000000,0x00000000,0xFFFFFFFF,0xFFFFFFFF, // QW0-1, ALL-EVEN
1254 0x00000000,0x00000000,0x00000000,0x00000000, // QW2-3, ALL-EVEN
1255 0x00000000,0x00000000,0xFFFFFFFF,0xFFFFFFFF, // QW4-5, ALL-EVEN
1256 0x00000000,0x00000000,0x00000000,0x00000000, // QW6-7, ALL-EVEN
1257 0xFeFeFeFe,0xFeFeFeFe,0x01010101,0x01010101, // QW0-1, DQ0-ODD
1258 0xFeFeFeFe,0xFeFeFeFe,0x01010101,0x01010101, // QW2-3, DQ0-ODD
1259 0x01010101,0x01010101,0xFeFeFeFe,0xFeFeFeFe, // QW4-5, DQ0-ODD
1260 0xFeFeFeFe,0xFeFeFeFe,0x01010101,0x01010101, // QW6-7, DQ0-ODD
1261 0x02020202,0x02020202,0x02020202,0x02020202, // QW0-1, DQ1-ODD
1262 0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, // QW2-3, DQ1-ODD
1263 0xFdFdFdFd,0xFdFdFdFd,0x02020202,0x02020202, // QW4-5, DQ1-ODD
1264 0x02020202,0x02020202,0x02020202,0x02020202, // QW6-7, DQ1-ODD
1265 0x04040404,0x04040404,0xfBfBfBfB,0xfBfBfBfB, // QW0-1, DQ2-ODD
1266 0x04040404,0x04040404,0x04040404,0x04040404, // QW2-3, DQ2-ODD
1267 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW4-5, DQ2-ODD
1268 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW6-7, DQ2-ODD
1269 0x08080808,0x08080808,0xF7F7F7F7,0xF7F7F7F7, // QW0-1, DQ3-ODD
1270 0x08080808,0x08080808,0x08080808,0x08080808, // QW2-3, DQ3-ODD
1271 0xF7F7F7F7,0xF7F7F7F7,0x08080808,0x08080808, // QW4-5, DQ3-ODD
1272 0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, // QW6-7, DQ3-ODD
1273 0x10101010,0x10101010,0x10101010,0x10101010, // QW0-1, DQ4-ODD
1274 0xeFeFeFeF,0xeFeFeFeF,0x10101010,0x10101010, // QW2-3, DQ4-ODD
1275 0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, // QW4-5, DQ4-ODD
1276 0xeFeFeFeF,0xeFeFeFeF,0x10101010,0x10101010, // QW6-7, DQ4-ODD
1277 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW0-1, DQ5-ODD
1278 0xdFdFdFdF,0xdFdFdFdF,0x20202020,0x20202020, // QW2-3, DQ5-ODD
1279 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW4-5, DQ5-ODD
1280 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW6-7, DQ5-ODD
1281 0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW0-1, DQ6-ODD
1282 0x40404040,0x40404040,0xBfBfBfBf,0xBfBfBfBf, // QW2-3, DQ6-ODD
1283 0x40404040,0x40404040,0xBfBfBfBf,0xBfBfBfBf, // QW4-5, DQ6-ODD
1284 0x40404040,0x40404040,0xBfBfBfBf,0xBfBfBfBf, // QW6-7, DQ6-ODD
1285 0x80808080,0x80808080,0x7F7F7F7F,0x7F7F7F7F, // QW0-1, DQ7-ODD
1286 0x80808080,0x80808080,0x7F7F7F7F,0x7F7F7F7F, // QW2-3, DQ7-ODD
1287 0x80808080,0x80808080,0x7F7F7F7F,0x7F7F7F7F, // QW4-5, DQ7-ODD
1288 0x80808080,0x80808080,0x80808080,0x80808080 // QW6-7, DQ7-ODD
1290 static const uint32_t TestPatternJD1b[] = {
1291 0x00000000,0x00000000,0x00000000,0x00000000, // QW0,CHA-B, ALL-EVEN
1292 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, // QW1,CHA-B, ALL-EVEN
1293 0x00000000,0x00000000,0x00000000,0x00000000, // QW2,CHA-B, ALL-EVEN
1294 0x00000000,0x00000000,0x00000000,0x00000000, // QW3,CHA-B, ALL-EVEN
1295 0x00000000,0x00000000,0x00000000,0x00000000, // QW4,CHA-B, ALL-EVEN
1296 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, // QW5,CHA-B, ALL-EVEN
1297 0x00000000,0x00000000,0x00000000,0x00000000, // QW6,CHA-B, ALL-EVEN
1298 0x00000000,0x00000000,0x00000000,0x00000000, // QW7,CHA-B, ALL-EVEN
1299 0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, // QW0,CHA-B, DQ0-ODD
1300 0x01010101,0x01010101,0x01010101,0x01010101, // QW1,CHA-B, DQ0-ODD
1301 0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, // QW2,CHA-B, DQ0-ODD
1302 0x01010101,0x01010101,0x01010101,0x01010101, // QW3,CHA-B, DQ0-ODD
1303 0x01010101,0x01010101,0x01010101,0x01010101, // QW4,CHA-B, DQ0-ODD
1304 0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, // QW5,CHA-B, DQ0-ODD
1305 0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, // QW6,CHA-B, DQ0-ODD
1306 0x01010101,0x01010101,0x01010101,0x01010101, // QW7,CHA-B, DQ0-ODD
1307 0x02020202,0x02020202,0x02020202,0x02020202, // QW0,CHA-B, DQ1-ODD
1308 0x02020202,0x02020202,0x02020202,0x02020202, // QW1,CHA-B, DQ1-ODD
1309 0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, // QW2,CHA-B, DQ1-ODD
1310 0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, // QW3,CHA-B, DQ1-ODD
1311 0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, // QW4,CHA-B, DQ1-ODD
1312 0x02020202,0x02020202,0x02020202,0x02020202, // QW5,CHA-B, DQ1-ODD
1313 0x02020202,0x02020202,0x02020202,0x02020202, // QW6,CHA-B, DQ1-ODD
1314 0x02020202,0x02020202,0x02020202,0x02020202, // QW7,CHA-B, DQ1-ODD
1315 0x04040404,0x04040404,0x04040404,0x04040404, // QW0,CHA-B, DQ2-ODD
1316 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW1,CHA-B, DQ2-ODD
1317 0x04040404,0x04040404,0x04040404,0x04040404, // QW2,CHA-B, DQ2-ODD
1318 0x04040404,0x04040404,0x04040404,0x04040404, // QW3,CHA-B, DQ2-ODD
1319 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW4,CHA-B, DQ2-ODD
1320 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW5,CHA-B, DQ2-ODD
1321 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW6,CHA-B, DQ2-ODD
1322 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW7,CHA-B, DQ2-ODD
1323 0x08080808,0x08080808,0x08080808,0x08080808, // QW0,CHA-B, DQ3-ODD
1324 0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, // QW1,CHA-B, DQ3-ODD
1325 0x08080808,0x08080808,0x08080808,0x08080808, // QW2,CHA-B, DQ3-ODD
1326 0x08080808,0x08080808,0x08080808,0x08080808, // QW3,CHA-B, DQ3-ODD
1327 0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, // QW4,CHA-B, DQ3-ODD
1328 0x08080808,0x08080808,0x08080808,0x08080808, // QW5,CHA-B, DQ3-ODD
1329 0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, // QW6,CHA-B, DQ3-ODD
1330 0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, // QW7,CHA-B, DQ3-ODD
1331 0x10101010,0x10101010,0x10101010,0x10101010, // QW0,CHA-B, DQ4-ODD
1332 0x10101010,0x10101010,0x10101010,0x10101010, // QW1,CHA-B, DQ4-ODD
1333 0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, // QW2,CHA-B, DQ4-ODD
1334 0x10101010,0x10101010,0x10101010,0x10101010, // QW3,CHA-B, DQ4-ODD
1335 0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, // QW4,CHA-B, DQ4-ODD
1336 0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, // QW5,CHA-B, DQ4-ODD
1337 0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, // QW6,CHA-B, DQ4-ODD
1338 0x10101010,0x10101010,0x10101010,0x10101010, // QW7,CHA-B, DQ4-ODD
1339 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW0,CHA-B, DQ5-ODD
1340 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW1,CHA-B, DQ5-ODD
1341 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW2,CHA-B, DQ5-ODD
1342 0x20202020,0x20202020,0x20202020,0x20202020, // QW3,CHA-B, DQ5-ODD
1343 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW4,CHA-B, DQ5-ODD
1344 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW5,CHA-B, DQ5-ODD
1345 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW6,CHA-B, DQ5-ODD
1346 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW7,CHA-B, DQ5-ODD
1347 0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW0,CHA-B, DQ6-ODD
1348 0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW1,CHA-B, DQ6-ODD
1349 0x40404040,0x40404040,0x40404040,0x40404040, // QW2,CHA-B, DQ6-ODD
1350 0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW3,CHA-B, DQ6-ODD
1351 0x40404040,0x40404040,0x40404040,0x40404040, // QW4,CHA-B, DQ6-ODD
1352 0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW5,CHA-B, DQ6-ODD
1353 0x40404040,0x40404040,0x40404040,0x40404040, // QW6,CHA-B, DQ6-ODD
1354 0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW7,CHA-B, DQ6-ODD
1355 0x80808080,0x80808080,0x80808080,0x80808080, // QW0,CHA-B, DQ7-ODD
1356 0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F, // QW1,CHA-B, DQ7-ODD
1357 0x80808080,0x80808080,0x80808080,0x80808080, // QW2,CHA-B, DQ7-ODD
1358 0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F, // QW3,CHA-B, DQ7-ODD
1359 0x80808080,0x80808080,0x80808080,0x80808080, // QW4,CHA-B, DQ7-ODD
1360 0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F, // QW5,CHA-B, DQ7-ODD
1361 0x80808080,0x80808080,0x80808080,0x80808080, // QW6,CHA-B, DQ7-ODD
1362 0x80808080,0x80808080,0x80808080,0x80808080 // QW7,CHA-B, DQ7-ODD
1364 uint8_t pattern_buf_x[64 * 18 + 16]; // We need to two cache line So have more 16 bytes to keep 16 byte alignment */
1373 unsigned DQSWrDelay;
1374 unsigned is_Width128 = sysinfo->meminfo[ctrl->node_id].is_Width128;
1375 uint8_t *dqs_delay_a = &sysinfo->dqs_delay_a[ctrl->node_id * 2*2*9]; //channel 2, direction 2 , bytelane *9
1384 dword = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
1385 ecc_bit = dword & DCL_DimmEccEn;
1386 dword &= ~(DCL_DimmEccEn);
1387 pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dword);
1390 buf_a = (uint8_t *)(((uint32_t)(&pattern_buf_x[0]) + 0x10) & (~0xf));
1394 for(i=0;i<16*18;i++) {
1395 *((uint32_t *)(buf_a + i*4)) = TestPatternJD1b[i];
1400 for(i=0; i<16*9;i++) {
1401 *((uint32_t *)(buf_a + i*4)) = TestPatternJD1a[i];
1406 print_debug_dqs("\nTrainDQSRdWrPos: 0 ctrl ", ctrl->node_id, 0);
1408 printk(BIOS_DEBUG, "TrainDQSRdWrPos: buf_a:%p\n", buf_a);
1413 if (!(sysinfo->meminfo[ctrl->node_id].dimm_mask & 0x0F) &&
1414 (sysinfo->meminfo[ctrl->node_id].dimm_mask & 0xF0)) { /* channelB only? */
1418 while( (channel<2) && (!Errors)) {
1419 print_debug_dqs("\tTrainDQSRdWrPos: 1 channel ",channel, 1);
1420 for(DQSWrDelay = 0; DQSWrDelay < 48; DQSWrDelay++) {
1422 SetDQSDelayAllCSR(ctrl, channel, DQS_WRITEDIR, DQSWrDelay);
1423 print_debug_dqs("\t\tTrainDQSRdWrPos: 21 DQSWrDelay ", DQSWrDelay, 2);
1424 err= TrainReadDQS(ctrl, channel, pattern, buf_a, dqs_delay_a, sysinfo);
1425 print_debug_dqs("\t\tTrainDQSRdWrPos: 22 err ",err, 2);
1430 print_debug_dqs("\tTrainDQSRdWrPos: 3 DQSWrDelay ", DQSWrDelay, 1);
1432 if(DQSWrDelay < 48) {
1433 Errors = TrainWriteDQS(ctrl, channel, pattern, buf_a, dqs_delay_a, sysinfo);
1434 print_debug_dqs("\tTrainDQSRdWrPos: 4 Errors ", Errors, 1);
1439 //FIXME: 64MuxMode??
1440 channel++; // skip channel if 64-bit mode
1445 dword = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
1446 dword &= ~(DCL_DimmEccEn);
1448 pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dword);
1454 //restore SSE2 setting
1457 print_debug_dqs("TrainDQSRdWrPos: ", 5, 0);
1462 static inline uint8_t get_dqs_delay(unsigned channel, unsigned bytelane, unsigned direction, uint8_t *dqs_delay_a)
1464 return dqs_delay_a[channel * 2*9 + direction * 9 + bytelane];
1467 static unsigned CalcEccDQSPos(unsigned channel,unsigned ByteLane0, unsigned ByteLane1, unsigned InterFactor, unsigned Direction, uint8_t *dqs_delay_a)
1468 /* InterFactor: 0: 100% ByteLane 0
1469 0x80: 50% between ByteLane 0 and 1
1470 0xff: 99.6% ByteLane 1 and 0.4% like 0
1473 unsigned DQSDelay0, DQSDelay1;
1476 DQSDelay0 = get_dqs_delay(channel, ByteLane0, Direction, dqs_delay_a);
1477 DQSDelay1 = get_dqs_delay(channel, ByteLane1, Direction, dqs_delay_a);
1479 if(DQSDelay0>DQSDelay1) {
1480 DQSDelay = DQSDelay0 - DQSDelay1;
1481 InterFactor = 0xff - InterFactor;
1484 DQSDelay = DQSDelay1 - DQSDelay0;
1487 DQSDelay *= InterFactor;
1489 DQSDelay >>= 8; // /255
1491 if(DQSDelay0>DQSDelay1) {
1492 DQSDelay += DQSDelay1;
1495 DQSDelay += DQSDelay0;
1502 static void SetEccDQSRdWrPos(const struct mem_controller *ctrl, struct sys_info *sysinfo)
1507 unsigned lane0, lane1, ratio;
1510 unsigned direction[] = { DQS_READDIR, DQS_WRITEDIR };
1512 uint8_t *dqs_delay_a = &sysinfo->dqs_delay_a[ctrl->node_id * 2*2*9]; //channel 2, direction 2 , bytelane *9
1516 for(channel = 0; channel < 2; channel++) {
1518 Direction = direction[i];
1519 lane0 = 4; lane1 = 5; ratio = 0;
1520 dqs_delay = CalcEccDQSPos(channel, lane0, lane1, ratio, Direction, dqs_delay_a);
1521 print_debug_dqs_pair("\t\tSetEccDQSRdWrPos: channel ", channel, Direction==DQS_READDIR? " R dqs_delay":" W dqs_delay", dqs_delay, 2);
1522 SetDQSDelayCSR(ctrl, channel, ByteLane, Direction, dqs_delay);
1523 save_dqs_delay(channel, ByteLane, Direction, dqs_delay_a, dqs_delay);
1528 static unsigned train_DqsRcvrEn(const struct mem_controller *ctrl, unsigned Pass, struct sys_info *sysinfo)
1530 print_debug_dqs("\ntrain_DqsRcvrEn: begin ctrl ", ctrl->node_id, 0);
1531 if(TrainRcvrEn(ctrl, Pass, sysinfo)) {
1534 print_debug_dqs("\ntrain_DqsRcvrEn: end ctrl ", ctrl->node_id, 0);
1538 static unsigned train_DqsPos(const struct mem_controller *ctrl, struct sys_info *sysinfo)
1540 print_debug_dqs("\ntrain_DqsPos: begin ctrl ", ctrl->node_id, 0);
1541 if(TrainDQSRdWrPos(ctrl, sysinfo) != 0) {
1542 printk(BIOS_ERR, "\nDQS Training Rd Wr failed ctrl%02x\n", ctrl->node_id);
1546 SetEccDQSRdWrPos(ctrl, sysinfo);
1548 print_debug_dqs("\ntrain_DqsPos: end ctrl ", ctrl->node_id, 0);
1553 #if K8_REV_F_SUPPORT_F0_F1_WORKAROUND == 1
1554 static void f0_svm_workaround(int controllers, const struct mem_controller *ctrl, tsc_t *tsc0, struct sys_info *sysinfo)
1557 unsigned cpu_f0_f1[8];
1560 print_debug_addr("dqs_timing: tsc1[8] :", tsc1);
1562 for(i = 0; i < controllers; i++) {
1563 if (!sysinfo->ctrl_present[i])
1566 /* Skip everything if I don't have any memory on this controller */
1567 if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
1571 cpu_f0_f1[i] = is_cpu_pre_f2_in_bsp(i);
1573 if(!cpu_f0_f1[i]) continue;
1575 dword = pci_read_config32(ctrl[i].f2, DRAM_CTRL);
1576 dword &= ~DC_DqsRcvEnTrain;
1577 pci_write_config32(ctrl[i].f2, DRAM_CTRL, dword);
1579 dword = pci_read_config32(ctrl[i].f2, DRAM_INIT);
1580 dword |= DI_EnDramInit;
1581 pci_write_config32(ctrl[i].f2, DRAM_INIT, dword);
1582 dword &= ~DI_EnDramInit;
1583 pci_write_config32(ctrl[i].f2, DRAM_INIT, dword);
1586 print_debug_dqs_tsc("begin: tsc1", i, tsc1[i].hi, tsc1[i].lo, 2);
1588 dword = tsc1[i].lo + tsc0[i].lo;
1589 if((dword<tsc1[i].lo) || (dword<tsc0[i].lo)) {
1593 tsc1[i].hi+= tsc0[i].hi;
1595 print_debug_dqs_tsc("end : tsc1", i, tsc1[i].hi, tsc1[i].lo, 2);
1599 for(i = 0; i < controllers; i++) {
1600 if (!sysinfo->ctrl_present[i])
1603 /* Skip everything if I don't have any memory on this controller */
1604 if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
1606 if(!cpu_f0_f1[i]) continue;
1612 } while ((tsc1[i].hi>tsc.hi) || ((tsc1[i].hi==tsc.hi) && (tsc1[i].lo>tsc.lo)));
1614 print_debug_dqs_tsc("end : tsc ", i, tsc.hi, tsc.lo, 2);
1622 /* setting variable mtrr, comes from linux kernel source */
1623 static void set_var_mtrr_dqs(
1624 unsigned int reg, unsigned long basek, unsigned long sizek,
1625 unsigned char type, unsigned address_bits)
1628 unsigned address_mask_high;
1630 address_mask_high = ((1u << (address_bits - 32u)) - 1u);
1632 base.hi = basek >> 22;
1633 base.lo = basek << 10;
1635 if (sizek < 4*1024*1024) {
1636 mask.hi = address_mask_high;
1637 mask.lo = ~((sizek << 10) -1);
1640 mask.hi = address_mask_high & (~((sizek >> 22) -1));
1649 zero.lo = zero.hi = 0;
1650 /* The invalid bit is kept in the mask, so we simply clear the
1651 relevant mask register to disable a range. */
1652 wrmsr (MTRRphysMask_MSR(reg), zero);
1654 /* Bit 32-35 of MTRRphysMask should be set to 1 */
1657 wrmsr (MTRRphysBase_MSR(reg), base);
1658 wrmsr (MTRRphysMask_MSR(reg), mask);
1663 /* fms: find most sigificant bit set, stolen from Linux Kernel Source. */
1664 static inline unsigned int fms(unsigned int x)
1668 __asm__("bsrl %1,%0\n\t"
1671 "1:" : "=r" (r) : "g" (x));
1675 /* fls: find least sigificant bit set */
1676 static inline unsigned int fls(unsigned int x)
1680 __asm__("bsfl %1,%0\n\t"
1683 "1:" : "=r" (r) : "g" (x));
1687 static unsigned int range_to_mtrr(unsigned int reg,
1688 unsigned long range_startk, unsigned long range_sizek,
1689 unsigned long next_range_startk, unsigned char type, unsigned address_bits)
1691 if (!range_sizek || (reg >= 8)) {
1694 while(range_sizek) {
1695 unsigned long max_align, align;
1696 unsigned long sizek;
1697 /* Compute the maximum size I can make a range */
1698 max_align = fls(range_startk);
1699 align = fms(range_sizek);
1700 if (align > max_align) {
1704 #if CONFIG_MEM_TRAIN_SEQ != 1
1705 printk(BIOS_DEBUG, "Setting variable MTRR %d, base: %4ldMB, range: %4ldMB, type %s\n",
1706 reg, range_startk >>10, sizek >> 10,
1707 (type==MTRR_TYPE_UNCACHEABLE)?"UC":
1708 ((type==MTRR_TYPE_WRBACK)?"WB":"Other")
1711 set_var_mtrr_dqs(reg++, range_startk, sizek, type, address_bits);
1712 range_startk += sizek;
1713 range_sizek -= sizek;
1720 #if CONFIG_MEM_TRAIN_SEQ == 1
1721 static void set_top_mem_ap(unsigned tom_k, unsigned tom2_k)
1725 /* Now set top of memory */
1726 msr.lo = (tom2_k & 0x003fffff) << 10;
1727 msr.hi = (tom2_k & 0xffc00000) >> 22;
1728 wrmsr(TOP_MEM2, msr);
1730 msr.lo = (tom_k & 0x003fffff) << 10;
1731 msr.hi = (tom_k & 0xffc00000) >> 22;
1732 wrmsr(TOP_MEM, msr);
1736 static void setup_mtrr_dqs(unsigned tom_k, unsigned tom2_k)
1742 //still enable from cache_as_ram.inc
1743 msr = rdmsr(SYSCFG_MSR);
1744 msr.lo |= SYSCFG_MSR_MtrrFixDramModEn;
1745 wrmsr(SYSCFG_MSR,msr);
1748 //[0,512k), [512k, 640k)
1749 msr.hi = 0x1e1e1e1e;
1755 reg = range_to_mtrr(2, 0, tom_k,4*1024*1024, MTRR_TYPE_WRBACK, 40);
1759 //enable tom2 and type
1760 msr = rdmsr(SYSCFG_MSR);
1761 msr.lo |= (1<<21) | (1<<22); //MtrrTom2En and Tom2ForceMemTypeWB
1762 wrmsr(SYSCFG_MSR, msr);
1767 static void clear_mtrr_dqs(unsigned tom2_k)
1772 //still enable from cache_as_ram.inc
1773 msr = rdmsr(SYSCFG_MSR);
1774 msr.lo |= SYSCFG_MSR_MtrrFixDramModEn;
1775 wrmsr(SYSCFG_MSR,msr);
1777 //[0,512k), [512k, 640k)
1784 for(i=0x204;i<0x210;i++) {
1790 //enable tom2 and type
1791 msr = rdmsr(SYSCFG_MSR);
1792 msr.lo &= ~((1<<21) | (1<<22)); //MtrrTom2En and Tom2ForceMemTypeWB
1793 wrmsr(SYSCFG_MSR, msr);
1797 static void set_htic_bit(unsigned i, unsigned val, unsigned bit)
1800 dword = pci_read_config32(PCI_DEV(0, 0x18+i, 0), HT_INIT_CONTROL);
1802 dword |= ((val & 1) <<bit);
1803 pci_write_config32(PCI_DEV(0, 0x18+i, 0), HT_INIT_CONTROL, dword);
1807 #if CONFIG_MEM_TRAIN_SEQ == 1
1808 static unsigned get_htic_bit(unsigned i, unsigned bit)
1811 dword = pci_read_config32(PCI_DEV(0, 0x18+i, 0), HT_INIT_CONTROL);
1816 static void wait_till_sysinfo_in_ram(void)
1819 if(get_htic_bit(0, 9)) return;
1824 static void set_sysinfo_in_ram(unsigned val)
1826 set_htic_bit(0, val, 9);
1829 #ifdef S3_NVRAM_EARLY
1830 // Don't define these prototypes as the real functions are already included
1833 //int s3_save_nvram_early(u32 dword, int size, int nvram_pos);
1834 //int s3_load_nvram_early(int size, u32 *old_dword, int nvram_pos);
1836 static inline int s3_save_nvram_early(u32 dword, int size, int nvram_pos)
1841 static inline int s3_load_nvram_early(int size, u32 *old_dword, int nvram_pos)
1843 die("No memory NVRAM loader for DQS data! Unable to restore memory state\n");
1845 return nvram_pos; /* Make GCC happy */
1849 #if CONFIG_MEM_TRAIN_SEQ == 0
1850 static int save_index_to_pos(unsigned int dev, int size, int index, int nvram_pos)
1852 u32 dword = pci_read_config32_index_wait(dev, 0x98, index);
1854 return s3_save_nvram_early(dword, size, nvram_pos);
1858 static int load_index_to_pos(unsigned int dev, int size, int index, int nvram_pos)
1861 u32 old_dword = pci_read_config32_index_wait(dev, 0x98, index);
1862 nvram_pos = s3_load_nvram_early(size, &old_dword, nvram_pos);
1863 pci_write_config32_index_wait(dev, 0x98, index, old_dword);
1867 static int dqs_load_MC_NVRAM_ch(unsigned int dev, int ch, int pos)
1869 /* 30 bytes per channel */
1871 pos = load_index_to_pos(dev, 4, 0x00 + ch, pos);
1872 pos = load_index_to_pos(dev, 4, 0x01 + ch, pos);
1873 pos = load_index_to_pos(dev, 4, 0x02 + ch, pos);
1874 pos = load_index_to_pos(dev, 1, 0x03 + ch, pos);
1875 pos = load_index_to_pos(dev, 4, 0x04 + ch, pos);
1876 pos = load_index_to_pos(dev, 4, 0x05 + ch, pos);
1877 pos = load_index_to_pos(dev, 4, 0x06 + ch, pos);
1878 pos = load_index_to_pos(dev, 1, 0x07 + ch, pos);
1879 pos = load_index_to_pos(dev, 1, 0x10 + ch, pos);
1880 pos = load_index_to_pos(dev, 1, 0x13 + ch, pos);
1881 pos = load_index_to_pos(dev, 1, 0x16 + ch, pos);
1882 pos = load_index_to_pos(dev, 1, 0x19 + ch, pos);
1886 #if CONFIG_MEM_TRAIN_SEQ == 0
1887 static int dqs_save_MC_NVRAM_ch(unsigned int dev, int ch, int pos)
1889 /* 30 bytes per channel */
1891 pos = save_index_to_pos(dev, 4, 0x00 + ch, pos);
1892 pos = save_index_to_pos(dev, 4, 0x01 + ch, pos);
1893 pos = save_index_to_pos(dev, 4, 0x02 + ch, pos);
1894 pos = save_index_to_pos(dev, 1, 0x03 + ch, pos);
1895 pos = save_index_to_pos(dev, 4, 0x04 + ch, pos);
1896 pos = save_index_to_pos(dev, 4, 0x05 + ch, pos);
1897 pos = save_index_to_pos(dev, 4, 0x06 + ch, pos);
1898 pos = save_index_to_pos(dev, 1, 0x07 + ch, pos);
1899 pos = save_index_to_pos(dev, 1, 0x10 + ch, pos);
1900 pos = save_index_to_pos(dev, 1, 0x13 + ch, pos);
1901 pos = save_index_to_pos(dev, 1, 0x16 + ch, pos);
1902 pos = save_index_to_pos(dev, 1, 0x19 + ch, pos);
1906 static void dqs_save_MC_NVRAM(unsigned int dev)
1910 printk(BIOS_DEBUG, "DQS SAVE NVRAM: %x\n", dev);
1911 pos = dqs_save_MC_NVRAM_ch(dev, 0, pos);
1912 pos = dqs_save_MC_NVRAM_ch(dev, 1, pos);
1913 /* save the maxasync lat here */
1914 reg = pci_read_config32(dev, DRAM_CONFIG_HIGH);
1915 pos = s3_save_nvram_early(reg, 4, pos);
1919 static void dqs_restore_MC_NVRAM(unsigned int dev)
1924 printk(BIOS_DEBUG, "DQS RESTORE FROM NVRAM: %x\n", dev);
1925 pos = dqs_load_MC_NVRAM_ch(dev, 0, pos);
1926 pos = dqs_load_MC_NVRAM_ch(dev, 1, pos);
1927 /* load the maxasync lat here */
1928 pos = s3_load_nvram_early(4, ®, pos);
1929 reg &= (DCH_MaxAsyncLat_MASK <<DCH_MaxAsyncLat_SHIFT);
1930 reg |= pci_read_config32(dev, DRAM_CONFIG_HIGH);
1931 pci_write_config32(dev, DRAM_CONFIG_HIGH, reg);
1934 #if CONFIG_MEM_TRAIN_SEQ == 0
1935 #if K8_REV_F_SUPPORT_F0_F1_WORKAROUND == 1
1936 static void dqs_timing(int controllers, const struct mem_controller *ctrl, tsc_t *tsc0, struct sys_info *sysinfo)
1938 static void dqs_timing(int controllers, const struct mem_controller *ctrl, struct sys_info *sysinfo)
1945 //need to enable mtrr, so dqs training could access the test address
1946 setup_mtrr_dqs(sysinfo->tom_k, sysinfo->tom2_k);
1948 for(i = 0; i < controllers; i++) {
1949 if (!sysinfo->ctrl_present[ i ])
1952 /* Skip everything if I don't have any memory on this controller */
1953 if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
1955 fill_mem_cs_sysinfo(i, ctrl+i, sysinfo);
1959 for(i = 0; i < controllers; i++) {
1960 if (!sysinfo->ctrl_present[ i ])
1963 /* Skip everything if I don't have any memory on this controller */
1964 if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
1966 printk(BIOS_DEBUG, "DQS Training:RcvrEn:Pass1: %02x\n", i);
1967 if(train_DqsRcvrEn(ctrl+i, 1, sysinfo)) goto out;
1968 printk(BIOS_DEBUG, " done\n");
1972 #if K8_REV_F_SUPPORT_F0_F1_WORKAROUND == 1
1973 f0_svm_workaround(controllers, ctrl, tsc0, sysinfo);
1977 for(i = 0; i < controllers; i++) {
1978 if (!sysinfo->ctrl_present[i])
1981 /* Skip everything if I don't have any memory on this controller */
1982 if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
1984 printk(BIOS_DEBUG, "DQS Training:DQSPos: %02x\n", i);
1985 if(train_DqsPos(ctrl+i, sysinfo)) goto out;
1986 printk(BIOS_DEBUG, " done\n");
1990 for(i = 0; i < controllers; i++) {
1991 if (!sysinfo->ctrl_present[i])
1994 /* Skip everything if I don't have any memory on this controller */
1995 if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
1997 printk(BIOS_DEBUG, "DQS Training:RcvrEn:Pass2: %02x\n", i);
1998 if(train_DqsRcvrEn(ctrl+i, 2, sysinfo)) goto out;
1999 printk(BIOS_DEBUG, " done\n");
2000 sysinfo->mem_trained[i]=1;
2001 dqs_save_MC_NVRAM((ctrl+i)->f2);
2006 clear_mtrr_dqs(sysinfo->tom2_k);
2010 print_debug_dqs_tsc_x("DQS Training:tsc", i, tsc[i].hi, tsc[i].lo);
2020 #if CONFIG_MEM_TRAIN_SEQ > 0
2022 static void dqs_timing(int i, const struct mem_controller *ctrl, struct sys_info *sysinfo, unsigned v)
2029 if(sysinfo->mem_trained[i] != 0x80) return;
2031 #if CONFIG_MEM_TRAIN_SEQ == 1
2032 //need to enable mtrr, so dqs training could access the test address
2033 setup_mtrr_dqs(sysinfo->tom_k, sysinfo->tom2_k);
2036 fill_mem_cs_sysinfo(i, ctrl, sysinfo);
2041 printk(BIOS_DEBUG, "set DQS timing:RcvrEn:Pass1: %02x\n", i);
2043 if(train_DqsRcvrEn(ctrl, 1, sysinfo)) {
2044 sysinfo->mem_trained[i]=0x81; //
2049 printk(BIOS_DEBUG, " done\n");
2051 printk(BIOS_DEBUG, "set DQS timing:DQSPos: %02x\n", i);
2054 if(train_DqsPos(ctrl, sysinfo)) {
2055 sysinfo->mem_trained[i]=0x82; //
2060 printk(BIOS_DEBUG, " done\n");
2063 printk(BIOS_DEBUG, "set DQS timing:RcvrEn:Pass2: %02x\n", i);
2065 if(train_DqsRcvrEn(ctrl, 2, sysinfo)){
2066 sysinfo->mem_trained[i]=0x83; //
2071 printk(BIOS_DEBUG, " done\n");
2077 #if CONFIG_MEM_TRAIN_SEQ == 1
2078 clear_mtrr_dqs(sysinfo->tom2_k);
2082 for(ii=0;ii<4;ii++) {
2083 print_debug_dqs_tsc_x("Total DQS Training : tsc ", ii, tsc[ii].hi, tsc[ii].lo);
2087 if(sysinfo->mem_trained[i] == 0x80) {
2088 sysinfo->mem_trained[i]=1;
2094 #if CONFIG_MEM_TRAIN_SEQ == 1
2095 static void train_ram(unsigned nodeid, struct sys_info *sysinfo, struct sys_info *sysinfox)
2097 dqs_timing(nodeid, &sysinfo->ctrl[nodeid], sysinfo, 0); // keep the output tidy
2098 // memcpy(&sysinfox->dqs_rcvr_dly_a[nodeid * 2 * 8],&sysinfo->dqs_rcvr_dly_a[nodeid * 2 * 8], 2*8);
2099 // memcpy(&sysinfox->dqs_delay_a[nodeid * 2 * 2 * 9], &sysinfo->dqs_delay_a[nodeid * 2 * 2 * 9], 2 * 2 * 9);
2100 sysinfox->mem_trained[nodeid] = sysinfo->mem_trained[nodeid];
2104 static inline void train_ram_on_node(unsigned nodeid, unsigned coreid, struct sys_info *sysinfo, unsigned retcall)
2106 if(coreid) return; // only do it on core0
2107 struct sys_info *sysinfox = (void*)((CONFIG_RAMTOP) - CONFIG_DCACHE_RAM_GLOBAL_VAR_SIZE);
2108 wait_till_sysinfo_in_ram(); // use pci to get it
2110 if(sysinfox->mem_trained[nodeid] == 0x80) {
2112 sysinfo->tom_k = sysinfox->tom_k;
2113 sysinfo->tom2_k = sysinfox->tom2_k;
2114 sysinfo->meminfo[nodeid].is_Width128 = sysinfox->meminfo[nodeid].is_Width128;
2115 sysinfo->mem_trained[nodeid] = sysinfox->mem_trained[nodeid];
2116 memcpy(&sysinfo->ctrl[nodeid], &sysinfox->ctrl[nodeid], sizeof(struct mem_controller));
2118 memcpy(sysinfo, sysinfox, CONFIG_DCACHE_RAM_GLOBAL_VAR_SIZE);
2120 set_top_mem_ap(sysinfo->tom_k, sysinfo->tom2_k); // keep the ap's tom consistent with bsp's
2121 #if CONFIG_AP_CODE_IN_CAR == 0
2122 printk(BIOS_DEBUG, "CODE IN ROM AND RUN ON NODE: %02x\n", nodeid);
2123 train_ram(nodeid, sysinfo, sysinfox);
2125 /* Can copy dqs_timing to ap cache and run from cache?
2126 * we need coreboot_ap_car.rom? and treat it as coreboot_ram.rom for ap ?
2128 copy_and_run_ap_code_in_car(retcall);
2129 // will go back by jump