2 yhlu 2005.10 dqs training
4 //0: mean no debug info
5 #define DQS_TRAIN_DEBUG 0
7 static inline void print_debug_dqs(const char *str, unsigned val, unsigned level)
9 #if DQS_TRAIN_DEBUG > 0
10 if(DQS_TRAIN_DEBUG > level) {
11 #if CONFIG_USE_PRINTK_IN_CAR
12 printk_debug("%s%x\r\n", str, val);
14 print_debug(str); print_debug_hex32(val); print_debug("\r\n");
20 static inline void print_debug_dqs_pair(const char *str, unsigned val, const char *str2, unsigned val2, unsigned level)
22 #if DQS_TRAIN_DEBUG > 0
23 if(DQS_TRAIN_DEBUG > level) {
24 #if CONFIG_USE_PRINTK_IN_CAR
25 printk_debug("%s%08x%s%08x\r\n", str, val, str2, val2);
27 print_debug(str); print_debug_hex32(val); print_debug(str2); print_debug_hex32(val2); print_debug("\r\n");
33 static inline void print_debug_dqs_tsc(const char *str, unsigned i, unsigned val, unsigned val2, unsigned level)
35 #if DQS_TRAIN_DEBUG > 0
36 if(DQS_TRAIN_DEBUG > level) {
37 #if CONFIG_USE_PRINTK_IN_CAR
38 printk_debug("%s[%02x]=%08x%08x\r\n", str, i, val, val2);
40 print_debug(str); print_debug("["); print_debug_hex8(i); print_debug("]="); print_debug_hex32(val); print_debug_hex32(val2); print_debug("\r\n");
46 static inline void print_debug_dqs_tsc_x(const char *str, unsigned i, unsigned val, unsigned val2)
48 #if CONFIG_USE_PRINTK_IN_CAR
49 printk_debug("%s[%02x]=%08x%08x\r\n", str, i, val, val2);
51 print_debug(str); print_debug("["); print_debug_hex8(i); print_debug("]="); print_debug_hex32(val); print_debug_hex32(val2); print_debug("\r\n");
56 static void fill_mem_cs_sysinfo(unsigned nodeid, const struct mem_controller *ctrl, struct sys_info *sysinfo)
60 sysinfo->mem_base[nodeid] = pci_read_config32(ctrl->f1, 0x40 + (nodeid<<3));
63 sysinfo->cs_base[nodeid*8+i] = pci_read_config32(ctrl->f2, 0x40 + (i<<2));
66 sysinfo->hole_reg[nodeid] = pci_read_config32(ctrl->f1, 0xf0);
69 static unsigned Get_MCTSysAddr(const struct mem_controller *ctrl, unsigned cs_idx, struct sys_info *sysinfo)
73 unsigned nodeid = ctrl->node_id;
75 #if HW_MEM_HOLE_SIZEK != 0
79 //get the local base addr of the chipselect
80 dword = sysinfo->cs_base[nodeid * 8 + cs_idx];
83 //sys addr= node base + local cs base
84 mem_base = sysinfo->mem_base[nodeid];
85 mem_base &= 0xffff0000;
88 #if HW_MEM_HOLE_SIZEK != 0
89 hole_reg = sysinfo->hole_reg[nodeid];
92 hole_startk = (hole_reg & (0xff<<24)) >> 10;
93 if( (dword >= (hole_startk<<2)) && (dword < ((4*1024*1024)<<2))) {
94 dword += ((4*1024*1024 - hole_startk)<<2);
99 //add 1MB offset to avoid compat area
100 dword += (1<<(20-8));
102 //So final result is upper 32 bit addr
108 static unsigned Get_RcvrSysAddr(const struct mem_controller * ctrl, unsigned channel, unsigned cs_idx, struct sys_info *sysinfo)
110 return Get_MCTSysAddr(ctrl, cs_idx, sysinfo);
114 static inline unsigned long read_cr4(void)
117 asm volatile ("movl %%cr4, %0" : "=r" (cr4));
121 static inline void write_cr4(unsigned long cr4)
123 asm volatile ("movl %0, %%cr4" : : "r" (cr4));
127 static inline void enable_sse2()
135 static inline void disable_sse2()
144 static void set_wrap32dis(void) {
147 msr = rdmsr(0xc0010015);
150 wrmsr(0xc0010015, msr);
154 static void clear_wrap32dis(void) {
157 msr = rdmsr(0xc0010015);
160 wrmsr(0xc0010015, msr);
164 static void set_FSBASE(uint32_t addr_hi)
168 //set fs and use fs prefix to access the mem
171 wrmsr(0xc0000100, msr); //FS_BASE
175 static unsigned ChipSelPresent(const struct mem_controller *ctrl, unsigned cs_idx, struct sys_info *sysinfo)
178 unsigned nodeid = ctrl->node_id;
181 enabled = sysinfo->cs_base[nodeid * 8 + cs_idx];
188 static unsigned RcvrRankEnabled(const struct mem_controller *ctrl, int channel, int cs_idx, unsigned is_Width128, struct sys_info *sysinfo)
190 /* FIXME: process 64Muxed */
192 if(channel) return 0; // no channel b
195 return ChipSelPresent(ctrl, cs_idx, sysinfo);
198 static void WriteLNTestPattern(unsigned addr_lo, uint8_t *buf_a, unsigned line_num)
202 "movdqa (%3), %%xmm0\n\t"
203 "movntdq %%xmm0, %%fs:(%0)\n\t" /* xmm0 is 128 bit */
208 :: "a" (addr_lo), "d" (16), "c" (line_num * 4), "b"(buf_a)
214 static void Write1LTestPattern(unsigned addr, unsigned p, uint8_t *buf_a, uint8_t *buf_b)
217 if(p==1) { buf = buf_b; }
218 else { buf = buf_a; }
220 set_FSBASE (addr>>24);
222 WriteLNTestPattern(addr<<8, buf, 1);
225 static void Read1LTestPattern(unsigned addr)
229 set_FSBASE(addr>>24);
231 /* 1st move causes read fill (to exclusive or shared)*/
233 "movl %%fs:(%1), %0\n\t"
234 :"=b"(value): "a" (addr<<8)
242 #define DQS_FIRST_PASS 1
243 #define DQS_SECOND_PASS 2
245 #define SB_NORCVREN 11
246 #define RCVREN_MARGIN 6
247 #define SB_SmallRCVR 13
248 #define SB_CHA2BRCVREN 12
249 #define SB_NODQSPOS 14
250 #define MIN_DQS_WNDW 3
251 #define SB_SMALLDQS 15
254 static unsigned CompareTestPatternQW0(unsigned channel, unsigned addr, unsigned pattern, const uint32_t *TestPattern0, const uint32_t *TestPattern1, const uint32_t *TestPattern2, unsigned Pass, unsigned is_Width128)
260 unsigned result = DQS_FAIL;
262 if(Pass == DQS_FIRST_PASS) {
264 test_buf = (uint32_t *)TestPattern1;
267 test_buf = (uint32_t *)TestPattern0;
271 test_buf = (uint32_t *)TestPattern2;
274 set_FSBASE(addr>>24);
278 if(is_Width128 && (channel == 1)) {
279 addr_lo += 8; //second channel
284 "movl %%fs:(%1), %0\n\t"
285 :"=b"(value): "a" (addr_lo)
288 value_test = *test_buf;
291 print_debug_dqs_pair("\t\t\t\t\t\tQW0.lo : test_buf= ", (unsigned)test_buf, " value = ", value_test, 4);
292 print_debug_dqs_pair("\t\t\t\t\t\tQW0.lo : addr_lo = ", addr_lo, " value = ", value, 4);
294 if(value == value_test) {
298 "movl %%fs:(%1), %0\n\t"
299 :"=b"(value): "a" (addr_lo)
301 value_test = *test_buf;
302 print_debug_dqs_pair("\t\t\t\t\t\tQW0.hi : test_buf= ", (unsigned)test_buf, " value = ", value_test, 4);
303 print_debug_dqs_pair("\t\t\t\t\t\tQW0.hi : addr_lo = ", addr_lo, " value = ", value, 4);
305 if(value == value_test){
310 if(Pass == DQS_SECOND_PASS) { // second pass need to be inverted
311 if(result==DQS_PASS) {
323 static void SetMaxAL_RcvrDly(const struct mem_controller *ctrl, unsigned dly)
327 dly += (20-1); // round it
328 dly /= 20; // convert from unit 50ps to 1ns
333 reg = pci_read_config32(ctrl->f2, DRAM_CONFIG_HIGH);
334 reg &= ~(DCH_MaxAsyncLat_MASK <<DCH_MaxAsyncLat_SHIFT);
335 reg |= ((dly - DCH_MaxAsyncLat_BASE) << DCH_MaxAsyncLat_SHIFT);
336 pci_write_config32(ctrl->f2, DRAM_CONFIG_HIGH, reg);
341 Set the Target range to WT IO (using an IORR overlapping the already existing
342 WB dram type). Use IORR0
344 static void SetTargetWTIO(unsigned addr)
349 wrmsr(0xc0010016, msr); //IORR0 BASE
352 msr.lo = 0xfc000800; // 64MB Mask
353 wrmsr(0xc0010017, msr); // IORR0 Mask
356 static void ResetTargetWTIO(void)
362 wrmsr(0xc0010017, msr); // IORR0 Mask
365 static void proc_CLFLUSH(unsigned addr)
368 set_FSBASE(addr>>24);
370 /* 1st move causes read fill (to exclusive or shared)*/
372 /* clflush fs:[eax] */
373 "clflush %%fs:(%0)\n\t"
378 static void proc_IOCLFLUSH(unsigned addr)
385 static void ResetDCTWrPtr(const struct mem_controller *ctrl)
388 unsigned index = 0x10;
390 dword = pci_read_config32_index_wait(ctrl->f2, 0x98, index);
391 pci_write_config32_index_wait(ctrl->f2, 0x98, index, dword);
396 static uint16_t get_exact_T1000(unsigned i)
399 static const uint16_t T1000_a[]= { 5000, 3759, 3003, 2500 };
401 static const uint16_t TT_a[] = {
403 /*4 */ 6250, 6250, 6250, 6250,
404 /*5 */ 5000, 5000, 5000, 2500,
405 /*6 */ 5000, 4166, 4166, 2500,
406 /*7 */ 5000, 4285, 3571, 2500,
408 /*8 */ 5000, 3750, 3125, 2500,
409 /*9 */ 5000, 3888, 3333, 2500,
410 /*10*/ 5000, 4000, 3000, 2500,
411 /*11*/ 5000, 4090, 3181, 2500,
413 /*12*/ 5000, 3750, 3333, 2500,
414 /*13*/ 5000, 3846, 3076, 2500,
415 /*14*/ 5000, 3928, 3214, 2500,
416 /*15*/ 5000, 4000, 3000, 2500,
423 msr = rdmsr(0xc0010042);
424 fid_cur = msr.lo & 0x3f;
428 if(index>12) return T1000_a[i];
430 return TT_a[index * 4+i];
434 static void InitDQSPos4RcvrEn(const struct mem_controller *ctrl)
440 for(i=1; i<=3; i++) {
441 /* Program the DQS Write Timing Control Registers (Function 2:Offset 0x9c, index 0x01-0x03, 0x21-0x23) to 0x00 for all bytes */
442 pci_write_config32_index_wait(ctrl->f2, 0x98, i, dword);
443 pci_write_config32_index_wait(ctrl->f2, 0x98, i+0x20, dword);
447 for(i=5; i<=7; i++) {
448 /* Program the DQS Write Timing Control Registers (Function 2:Offset 0x9c, index 0x05-0x07, 0x25-0x27) to 0x2f for all bytes */
449 pci_write_config32_index_wait(ctrl->f2, 0x98, i, dword);
450 pci_write_config32_index_wait(ctrl->f2, 0x98, i+0x20, dword);
455 #ifndef K8_REV_F_SUPPORT_F0_F1_WORKAROUND
456 #define K8_REV_F_SUPPORT_F0_F1_WORKAROUND 1
459 static unsigned TrainRcvrEn(const struct mem_controller *ctrl, unsigned Pass, struct sys_info *sysinfo)
462 static const uint32_t TestPattern0[] = {
463 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
464 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
465 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
466 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
468 static const uint32_t TestPattern1[] = {
469 0x55555555, 0x55555555, 0x55555555, 0x55555555,
470 0x55555555, 0x55555555, 0x55555555, 0x55555555,
471 0x55555555, 0x55555555, 0x55555555, 0x55555555,
472 0x55555555, 0x55555555, 0x55555555, 0x55555555,
474 static const uint32_t TestPattern2[] = {
475 0x12345678, 0x87654321, 0x23456789, 0x98765432,
476 0x59385824, 0x30496724, 0x24490795, 0x99938733,
477 0x40385642, 0x38465245, 0x29432163, 0x05067894,
478 0x12349045, 0x98723467, 0x12387634, 0x34587623,
481 uint8_t pattern_buf_x[64 * 4 + 16]; // We need to two cache line So have more 16 bytes to keep 16 byte alignment */
482 uint8_t *buf_a, *buf_b;
485 uint8_t *dqs_rcvr_dly_a = &sysinfo->dqs_rcvr_dly_a[ctrl->node_id * 2* 8] ; //8 node, channel 2, receiver 8
489 unsigned channel, receiver;
492 unsigned CTLRMaxDelay;
497 unsigned Test0, Test1;
499 unsigned RcvrEnDlyRmin;
507 unsigned TestAddr0, TestAddr0B, TestAddr1, TestAddr1B;
509 unsigned CurrRcvrCHADelay;
513 unsigned is_Width128 = sysinfo->meminfo[ctrl->node_id].is_Width128;
517 if(Pass == DQS_FIRST_PASS) {
518 InitDQSPos4RcvrEn(ctrl);
528 dword = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
529 ecc_bit = dword & DCL_DimmEccEn;
530 dword &= ~(DCL_DimmEccEn);
531 pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dword);
534 if(Pass == DQS_FIRST_PASS) {
535 #if K8_REV_F_SUPPORT_F0_F1_WORKAROUND == 1
536 cpu_f0_f1 = is_cpu_pre_f2_in_bsp(ctrl->node_id);
541 /* Set the DqsRcvEnTrain bit */
542 dword = pci_read_config32(ctrl->f2, DRAM_CTRL);
543 dword |= DC_DqsRcvEnTrain;
544 pci_write_config32(ctrl->f2, DRAM_CTRL, dword);
549 //get T1000 figures (cycle time (ns)) * 1K
550 dword = pci_read_config32(ctrl->f2, DRAM_CONFIG_HIGH);
551 dword &= DCH_MemClkFreq_MASK;
553 T1000 = get_exact_T1000(dword);
556 buf_a = (uint8_t *)(((uint32_t)(&pattern_buf_x[0]) + 0x10) & (0xfffffff0));
557 buf_b = buf_a + 128; //??
558 if(Pass==DQS_FIRST_PASS) {
560 *((uint32_t *)(buf_a + i*4)) = TestPattern0[i];
561 *((uint32_t *)(buf_b + i*4)) = TestPattern1[i];
566 *((uint32_t *)(buf_a + i*4)) = TestPattern2[i];
567 *((uint32_t *)(buf_b + i*4)) = TestPattern2[i];
571 print_debug_dqs("\r\nTrainRcvEn: 0 ctrl", ctrl->node_id, 0);
573 print_debug_addr("TrainRcvEn: buf_a:", buf_a);
576 /* for each channel */
578 for(channel = 0; (channel < 2) && (!Errors); channel++)
580 print_debug_dqs("\tTrainRcvEn51: channel ",channel, 1);
583 /* there are four recriver pairs, loosely associated with CS */
584 for( receiver = 0; (receiver < 8) && (!Errors); receiver+=2)
587 unsigned index=(receiver>>1) * 3 + 0x10;
589 print_debug_dqs("\t\tTrainRcvEn52: index ", index, 2);
593 dword = pci_read_config32_index_wait(ctrl->f2, 0x98, index);
594 CurrRcvrCHADelay= dword & 0xff;
604 RcvrEnDlyRmin = 0xaf;
606 if(!RcvrRankEnabled(ctrl, channel, receiver, is_Width128, sysinfo)) continue;
608 /* for each DQS receiver enable setting */
610 TestAddr0 = Get_RcvrSysAddr(ctrl, channel, receiver, sysinfo);
612 TestAddr0B = TestAddr0 + (1<<(20+2-8)); // 4MB
614 if(RcvrRankEnabled(ctrl, channel, receiver+1, is_Width128, sysinfo)) {
615 TestAddr1 = Get_RcvrSysAddr(ctrl, channel, receiver+1, sysinfo);
616 TestAddr1B = TestAddr1 + (1<<(20+2-8)); //4MB
623 print_debug_dqs("\t\tTrainRcvEn53: TestAddr0B ", TestAddr0B, 2);
625 Write1LTestPattern(TestAddr0, 0, buf_a, buf_b); // rank0 of dimm, test p0
626 Write1LTestPattern(TestAddr0B, 1, buf_a, buf_b); //rank0 of dimm, test p1
629 Write1LTestPattern(TestAddr1, 0, buf_a, buf_b); //rank 1 of dimm
630 Write1LTestPattern(TestAddr1B, 1, buf_a, buf_b);//rank 1 of dimm
633 if(Pass == DQS_FIRST_PASS) {
636 RcvrEnDly = dqs_rcvr_dly_a[channel * 8 + receiver];
639 while ( RcvrEnDly < 0xaf) { // Sweep Delay value here
640 print_debug_dqs("\t\t\tTrainRcvEn541: RcvrEnDly ", RcvrEnDly, 3);
643 /* Odd steps get another pattern such that even
644 and odd steps alternate.
645 The pointers to the patterns will be swapped
646 at the end of the loop so they are correspond
657 /* Program current Receiver enable delay */
658 pci_write_config32_index_wait(ctrl->f2, 0x98, index, RcvrEnDly);
659 /* FIXME: 64bit MUX */
662 /* Program current Receiver enable delay chaannel b */
663 pci_write_config32_index_wait(ctrl->f2, 0x98, index+ 0x20, RcvrEnDly);
666 /* Program the MaxAsyncLat filed with the
667 current DQS receiver enable setting plus 6ns
669 /*Porgram MaxAsyncLat to correspond with current delay */
670 SetMaxAL_RcvrDly(ctrl, RcvrEnDly);
674 Read1LTestPattern(TestAddr0); //Cache Fill
675 /* ROM vs cache compare */
676 Test0 = CompareTestPatternQW0(channel, TestAddr0, PatternA, TestPattern0, TestPattern1, TestPattern2, Pass, is_Width128);
677 proc_IOCLFLUSH(TestAddr0);
681 print_debug_dqs("\t\t\tTrainRcvEn542: Test0 ", Test0, 3);
683 if(Test0 == DQS_PASS) {
685 Read1LTestPattern(TestAddr0B);
686 Test1 = CompareTestPatternQW0(channel, TestAddr0B, PatternB, TestPattern0, TestPattern1, TestPattern2, Pass, is_Width128);
687 proc_IOCLFLUSH(TestAddr0B);
691 print_debug_dqs("\t\t\tTrainRcvEn543: Test1 ", Test1, 3);
693 if(Test1 == DQS_PASS) {
695 Read1LTestPattern(TestAddr1);
696 Test0 = CompareTestPatternQW0(channel, TestAddr1, PatternA, TestPattern0, TestPattern1, TestPattern2, Pass, is_Width128);
697 proc_IOCLFLUSH(TestAddr1);
700 if(Test0 == DQS_PASS) {
701 Read1LTestPattern(TestAddr1B);
702 Test1 = CompareTestPatternQW0(channel, TestAddr1B, PatternB, TestPattern0, TestPattern1, TestPattern2, Pass, is_Width128);
703 proc_IOCLFLUSH(TestAddr1B);
706 if(Test1 == DQS_PASS) {
710 print_debug_dqs("\t\t\tTrainRcvEn544: Test0 ", Test0, 3);
718 print_debug_dqs("\t\t\tTrainRcvEn55: RcvrEnDly ", RcvrEnDly, 3);
720 if(CurrTest == DQS_PASS) {
721 if(LastTest == DQS_FAIL) {
722 RcvrEnDlyRmin = RcvrEnDly;
729 /* swap the rank 0 pointers */
731 TestAddr0 = TestAddr0B;
734 /* swap the rank 1 pointers */
736 TestAddr1 = TestAddr1B;
739 print_debug_dqs("\t\t\tTrainRcvEn56: RcvrEnDly ", RcvrEnDly, 3);
745 print_debug_dqs("\t\tTrainRcvEn61: RcvrEnDly ", RcvrEnDly, 2);
747 if(RcvrEnDlyRmin == 0xaf) {
749 Errors |= SB_NORCVREN;
752 if(Pass == DQS_FIRST_PASS) {
753 // We need a better value for DQSPos trainning
754 RcvrEnDly = RcvrEnDlyRmin /* + RCVREN_MARGIN * T1000/64/50 */;
756 RcvrEnDly = RcvrEnDlyRmin;
759 if(RcvrEnDly > 0xae) {
760 //passing window too narrow, too far delayed
761 Errors |= SB_SmallRCVR;
765 if(Pass == DQS_SECOND_PASS) { //second pass must average vales
766 RcvrEnDly += dqs_rcvr_dly_a[channel * 8 + receiver] /* - (RCVREN_MARGIN * T1000/64/50)*/;
770 dqs_rcvr_dly_a[channel * 8 + receiver] = RcvrEnDly;
772 //Set final RcvrEnDly for this DIMM and Channel
773 pci_write_config32_index_wait(ctrl->f2, 0x98, index, RcvrEnDly);
776 pci_write_config32_index_wait(ctrl->f2, 0x98, index+0x20, RcvrEnDly); // channel B
778 pci_write_config32_index_wait(ctrl->f2, 0x98, index, CurrRcvrCHADelay);
779 if(RcvrEnDly > CurrRcvrCHADelay) {
780 dword = RcvrEnDly - CurrRcvrCHADelay;
783 dword = CurrRcvrCHADelay - RcvrEnDly;
787 Errors |= SB_CHA2BRCVREN;
792 print_debug_dqs("\t\tTrainRcvEn63: RcvrEnDly ", RcvrEnDly, 2);
794 if(RcvrEnDly > CTLRMaxDelay) {
795 CTLRMaxDelay = RcvrEnDly;
798 print_debug_dqs("\t\tTrainRcvEn64: CTLRMaxDelay ", CTLRMaxDelay, 2);
803 print_debug_dqs("\tTrainRcvEn65: CTLRMaxDelay ", CTLRMaxDelay, 1);
805 /* Program the MaxAsysncLat field with the largest DQS Receiver Enable setting */
806 SetMaxAL_RcvrDly(ctrl, CTLRMaxDelay);
810 dword = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
811 dword &= ~(DCL_DimmEccEn);
813 pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dword);
815 if(Pass == DQS_FIRST_PASS) {
816 #if K8_REV_F_SUPPORT_F0_F1_WORKAROUND == 1
820 dword = pci_read_config32(ctrl->f2, DRAM_CTRL);
821 dword &= ~DC_DqsRcvEnTrain;
822 pci_write_config32(ctrl->f2, DRAM_CTRL, dword);
830 //restore SSE2 setting
833 #if MEM_TRAIN_SEQ != 1
834 /* We need tidy output for type 1 */
835 #if CONFIG_USE_PRINTK_IN_CAR
836 printk_debug(" CTLRMaxDelay=%02x", CTLRMaxDelay);
838 print_debug(" CTLRMaxDelay="); print_debug_hex8(CTLRMaxDelay);
842 return (CTLRMaxDelay==0xae)?1:0;
846 #define DQS_READDIR 1
847 #define DQS_WRITEDIR 0
850 static void SetDQSDelayCSR(const struct mem_controller *ctrl, unsigned channel, unsigned bytelane, unsigned direction, unsigned dqs_delay)
851 { //ByteLane could be 0-8, last is for ECC
858 index = (bytelane>>2) + 1 + channel * 0x20 + (direction << 2);
863 shift <<= 3; // 8 bit
865 dword = pci_read_config32_index_wait(ctrl->f2, 0x98, index);
866 dword &= ~(0x3f<<shift);
867 dword |= (dqs_delay<<shift);
868 pci_write_config32_index_wait(ctrl->f2, 0x98, index, dword);
872 static void SetDQSDelayAllCSR(const struct mem_controller *ctrl, unsigned channel, unsigned direction, unsigned dqs_delay)
881 dword |= dqs_delay<<(i*8);
884 index = 1 + channel * 0x20 + direction * 4;
887 pci_write_config32_index_wait(ctrl->f2, 0x98, index + i, dword);
892 static unsigned MiddleDQS(unsigned min_d, unsigned max_d)
895 size_d = max_d-min_d;
896 if(size_d & 1) { //need round up
899 return ( min_d + (size_d>>1));
902 static inline void save_dqs_delay(unsigned channel, unsigned bytelane, unsigned direction, uint8_t *dqs_delay_a, uint8_t dqs_delay)
904 dqs_delay_a[channel * 2*9 + direction * 9 + bytelane] = dqs_delay;
907 static void WriteDQSTestPattern(unsigned addr_lo, unsigned pattern , uint8_t *buf_a)
909 WriteLNTestPattern(addr_lo, buf_a, (pattern+1) * 9);
912 static void ReadL18TestPattern(unsigned addr_lo)
914 //set fs and use fs prefix to access the mem
916 "movl %%fs:-128(%%esi), %%eax\n\t" //TestAddr cache line
917 "movl %%fs:-64(%%esi), %%eax\n\t" //+1
918 "movl %%fs:(%%esi), %%eax\n\t" //+2
919 "movl %%fs:64(%%esi), %%eax\n\t" //+3
921 "movl %%fs:-128(%%edi), %%eax\n\t" //+4
922 "movl %%fs:-64(%%edi), %%eax\n\t" //+5
923 "movl %%fs:(%%edi), %%eax\n\t" //+6
924 "movl %%fs:64(%%edi), %%eax\n\t" //+7
926 "movl %%fs:-128(%%ebx), %%eax\n\t" //+8
927 "movl %%fs:-64(%%ebx), %%eax\n\t" //+9
928 "movl %%fs:(%%ebx), %%eax\n\t" //+10
929 "movl %%fs:64(%%ebx), %%eax\n\t" //+11
931 "movl %%fs:-128(%%ecx), %%eax\n\t" //+12
932 "movl %%fs:-64(%%ecx), %%eax\n\t" //+13
933 "movl %%fs:(%%ecx), %%eax\n\t" //+14
934 "movl %%fs:64(%%ecx), %%eax\n\t" //+15
936 "movl %%fs:-128(%%edx), %%eax\n\t" //+16
937 "movl %%fs:-64(%%edx), %%eax\n\t" //+17
939 :: "a"(0), "b" (addr_lo+128+8*64), "c" (addr_lo+128+12*64), "d" (addr_lo +128+16*64), "S"(addr_lo+128), "D"(addr_lo+128+4*64)
944 static void ReadL9TestPattern(unsigned addr_lo)
947 //set fs and use fs prefix to access the mem
950 "movl %%fs:-128(%%ecx), %%eax\n\t" //TestAddr cache line
951 "movl %%fs:-64(%%ecx), %%eax\n\t" //+1
952 "movl %%fs:(%%ecx), %%eax\n\t" //+2
953 "movl %%fs:64(%%ecx), %%eax\n\t" //+3
955 "movl %%fs:-128(%%edx), %%eax\n\t" //+4
956 "movl %%fs:-64(%%edx), %%eax\n\t" //+5
957 "movl %%fs:(%%edx), %%eax\n\t" //+6
958 "movl %%fs:64(%%edx), %%eax\n\t" //+7
960 "movl %%fs:-128(%%ebx), %%eax\n\t" //+8
962 :: "a"(0), "b" (addr_lo+128+8*64), "c"(addr_lo+128), "d"(addr_lo+128+4*64)
968 static void ReadDQSTestPattern(unsigned addr_lo, unsigned pattern)
971 ReadL9TestPattern(addr_lo);
974 ReadL18TestPattern(addr_lo);
978 static void FlushDQSTestPattern_L9(unsigned addr_lo)
981 "clflush %%fs:-128(%%ecx)\n\t"
982 "clflush %%fs:-64(%%ecx)\n\t"
983 "clflush %%fs:(%%ecx)\n\t"
984 "clflush %%fs:64(%%ecx)\n\t"
986 "clflush %%fs:-128(%%eax)\n\t"
987 "clflush %%fs:-64(%%eax)\n\t"
988 "clflush %%fs:(%%eax)\n\t"
989 "clflush %%fs:64(%%eax)\n\t"
991 "clflush %%fs:-128(%%ebx)\n\t"
993 :: "b" (addr_lo+128+8*64), "c"(addr_lo+128), "a"(addr_lo+128+4*64)
997 static __attribute__((noinline)) void FlushDQSTestPattern_L18(unsigned addr_lo)
1000 "clflush %%fs:-128(%%eax)\n\t"
1001 "clflush %%fs:-64(%%eax)\n\t"
1002 "clflush %%fs:(%%eax)\n\t"
1003 "clflush %%fs:64(%%eax)\n\t"
1005 "clflush %%fs:-128(%%edi)\n\t"
1006 "clflush %%fs:-64(%%edi)\n\t"
1007 "clflush %%fs:(%%edi)\n\t"
1008 "clflush %%fs:64(%%edi)\n\t"
1010 "clflush %%fs:-128(%%ebx)\n\t"
1011 "clflush %%fs:-64(%%ebx)\n\t"
1012 "clflush %%fs:(%%ebx)\n\t"
1013 "clflush %%fs:64(%%ebx)\n\t"
1015 "clflush %%fs:-128(%%ecx)\n\t"
1016 "clflush %%fs:-64(%%ecx)\n\t"
1017 "clflush %%fs:(%%ecx)\n\t"
1018 "clflush %%fs:64(%%ecx)\n\t"
1020 "clflush %%fs:-128(%%edx)\n\t"
1021 "clflush %%fs:-64(%%edx)\n\t"
1023 :: "b" (addr_lo+128+8*64), "c" (addr_lo+128+12*64), "d" (addr_lo +128+16*64), "a"(addr_lo+128), "D"(addr_lo+128+4*64)
1027 static void FlushDQSTestPattern(unsigned addr_lo, unsigned pattern )
1031 FlushDQSTestPattern_L9(addr_lo);
1034 FlushDQSTestPattern_L18(addr_lo);
1038 static unsigned CompareDQSTestPattern(unsigned channel, unsigned addr_lo, unsigned pattern, uint8_t *buf_a)
1041 unsigned bitmap = 0xff;
1046 uint32_t value_test;
1048 test_buf = (uint32_t *)buf_a;
1051 if(pattern && channel) {
1052 addr_lo += 8; //second channel
1057 for(i=0;i<9*64/4;i++) {
1059 "movl %%fs:(%1), %0\n\t"
1060 :"=b"(value): "a" (addr_lo)
1062 value_test = *test_buf;
1064 print_debug_dqs_pair("\t\t\t\t\t\ttest_buf= ", (unsigned)test_buf, " value = ", value_test, 7);
1065 print_debug_dqs_pair("\t\t\t\t\t\ttaddr_lo = ",addr_lo, " value = ", value, 7);
1067 for(j=0;j<4*8;j+=8) {
1068 if(((value>>j)&0xff) != ((value_test>>j)& 0xff)) {
1069 bitmap &= ~(1<<bytelane);
1075 print_debug_dqs("\t\t\t\t\t\tbitmap = ", bitmap, 7);
1078 if(pattern == 1) { //dual channel
1079 addr_lo += 8; //skip over other channel's data
1093 static unsigned TrainDQSPos(const struct mem_controller *ctrl, unsigned channel, unsigned Direction, unsigned Pattern, uint8_t *buf_a, uint8_t *dqs_delay_a, struct sys_info *sysinfo)
1097 unsigned BanksPresent;
1099 unsigned MutualCSPassW[48];
1107 unsigned RnkDlyFilterMax, RnkDlyFilterMin;
1108 unsigned RnkDlySeqPassMax, RnkDlySeqPassMin;
1113 print_debug_dqs("\t\t\tTrainDQSPos begin ", 0, 3);
1115 print_debug_addr("TrainDQSPos: MutualCSPassW[48] :", MutualCSPassW);
1117 for(DQSDelay=0; DQSDelay<48; DQSDelay++) {
1118 MutualCSPassW[DQSDelay] = 0xff; // Bitmapped status per delay setting, 0xff=All positions passing (1= PASS)
1121 for(ChipSel = 0; ChipSel < 8; ChipSel++) { //logical register chipselects 0..7
1122 print_debug_dqs("\t\t\t\tTrainDQSPos: 11 ChipSel ", ChipSel, 4);
1123 //FIXME: process 64MUXedMode
1124 if(!ChipSelPresent(ctrl, ChipSel, sysinfo)) continue;
1127 TestAddr = Get_MCTSysAddr(ctrl, ChipSel, sysinfo);
1129 print_debug_dqs("\t\t\t\tTrainDQSPos: 12 TestAddr ", TestAddr, 4);
1131 //set fs and use fs prefix to access the mem
1132 set_FSBASE(TestAddr>>24);
1134 if(Direction == DQS_READDIR) {
1135 print_debug_dqs("\t\t\t\tTrainDQSPos: 13 for read so write at first", 0, 4);
1136 WriteDQSTestPattern(TestAddr<<8, Pattern, buf_a);
1139 for(DQSDelay = 0; DQSDelay < 48; DQSDelay++ ){
1140 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 141 DQSDelay ", DQSDelay, 5);
1141 if(MutualCSPassW[DQSDelay] == 0) continue; //skip current delay value if other chipselects have failed all 8 bytelanes
1142 SetDQSDelayAllCSR(ctrl, channel, Direction, DQSDelay);
1143 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 142 MutualCSPassW ", MutualCSPassW[DQSDelay], 5);
1144 if(Direction == DQS_WRITEDIR) {
1145 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 143 for write", 0, 5);
1146 WriteDQSTestPattern(TestAddr<<8, Pattern, buf_a);
1148 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 144 Pattern ", Pattern, 5);
1149 ReadDQSTestPattern(TestAddr<<8, Pattern);
1150 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 145 MutualCSPassW ", MutualCSPassW[DQSDelay], 5);
1151 MutualCSPassW[DQSDelay] &= CompareDQSTestPattern(channel, TestAddr<<8, Pattern, buf_a); //0: fail, 1=pass
1152 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 146 MutualCSPassW ", MutualCSPassW[DQSDelay], 5);
1153 SetTargetWTIO(TestAddr);
1154 FlushDQSTestPattern(TestAddr<<8, Pattern);
1160 for(ByteLane = 0; ByteLane < 8; ByteLane++) {
1161 print_debug_dqs("\t\t\t\tTrainDQSPos: 31 ByteLane ",ByteLane, 4);
1163 LastTest = DQS_FAIL;
1164 RnkDlySeqPassMax = 0;
1165 RnkDlyFilterMax = 0;
1166 RnkDlyFilterMin = 0;
1167 for(DQSDelay=0; DQSDelay<48; DQSDelay++) {
1168 if(MutualCSPassW[DQSDelay] & (1<<ByteLane)) {
1170 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 321 DQSDelay ", DQSDelay, 5);
1171 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 322 MutualCSPassW ", MutualCSPassW[DQSDelay], 5);
1173 RnkDlySeqPassMax = DQSDelay;
1174 if(LastTest == DQS_FAIL) {
1175 RnkDlySeqPassMin = DQSDelay; //start sequential run
1177 if((RnkDlySeqPassMax - RnkDlySeqPassMin)>(RnkDlyFilterMax-RnkDlyFilterMin)){
1178 RnkDlyFilterMin = RnkDlySeqPassMin;
1179 RnkDlyFilterMax = RnkDlySeqPassMax;
1181 LastTest = DQS_PASS;
1184 LastTest = DQS_FAIL;
1187 print_debug_dqs("\t\t\t\tTrainDQSPos: 33 RnkDlySeqPassMax ", RnkDlySeqPassMax, 4);
1189 if(RnkDlySeqPassMax == 0) {
1190 Errors |= SB_NODQSPOS; // no passing window
1193 print_debug_dqs("\t\t\t\tTrainDQSPos: 34 RnkDlyFilterMax ", RnkDlyFilterMax, 4);
1194 print_debug_dqs("\t\t\t\tTrainDQSPos: 34 RnkDlyFilterMin ", RnkDlyFilterMin, 4);
1195 if((RnkDlyFilterMax - RnkDlyFilterMin)< MIN_DQS_WNDW){
1196 Errors |= SB_SMALLDQS;
1199 unsigned middle_dqs;
1200 middle_dqs = MiddleDQS(RnkDlyFilterMin, RnkDlyFilterMax);
1201 print_debug_dqs("\t\t\t\tTrainDQSPos: 35 middle_dqs ",middle_dqs, 4);
1202 SetDQSDelayCSR(ctrl, channel, ByteLane, Direction, middle_dqs);
1203 save_dqs_delay(channel, ByteLane, Direction, dqs_delay_a, middle_dqs);
1209 print_debug_dqs("\t\t\tTrainDQSPos: end", 0xff, 3);
1216 static unsigned TrainReadDQS(const struct mem_controller *ctrl, unsigned channel, unsigned pattern, uint8_t *buf_a, uint8_t *dqs_delay_a, struct sys_info *sysinfo)
1218 print_debug_dqs("\t\tTrainReadPos", 0, 2);
1219 return TrainDQSPos(ctrl, channel, DQS_READDIR, pattern, buf_a, dqs_delay_a, sysinfo);
1222 static unsigned TrainWriteDQS(const struct mem_controller *ctrl, unsigned channel, unsigned pattern, uint8_t *buf_a, uint8_t *dqs_delay_a, struct sys_info *sysinfo)
1224 print_debug_dqs("\t\tTrainWritePos", 0, 2);
1225 return TrainDQSPos(ctrl, channel, DQS_WRITEDIR, pattern, buf_a, dqs_delay_a, sysinfo);
1230 static unsigned TrainDQSRdWrPos(const struct mem_controller *ctrl, struct sys_info *sysinfo)
1232 static const uint32_t TestPatternJD1a[] = {
1233 0x00000000,0x00000000,0xFFFFFFFF,0xFFFFFFFF, // QW0-1, ALL-EVEN
1234 0x00000000,0x00000000,0x00000000,0x00000000, // QW2-3, ALL-EVEN
1235 0x00000000,0x00000000,0xFFFFFFFF,0xFFFFFFFF, // QW4-5, ALL-EVEN
1236 0x00000000,0x00000000,0x00000000,0x00000000, // QW6-7, ALL-EVEN
1237 0xFeFeFeFe,0xFeFeFeFe,0x01010101,0x01010101, // QW0-1, DQ0-ODD
1238 0xFeFeFeFe,0xFeFeFeFe,0x01010101,0x01010101, // QW2-3, DQ0-ODD
1239 0x01010101,0x01010101,0xFeFeFeFe,0xFeFeFeFe, // QW4-5, DQ0-ODD
1240 0xFeFeFeFe,0xFeFeFeFe,0x01010101,0x01010101, // QW6-7, DQ0-ODD
1241 0x02020202,0x02020202,0x02020202,0x02020202, // QW0-1, DQ1-ODD
1242 0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, // QW2-3, DQ1-ODD
1243 0xFdFdFdFd,0xFdFdFdFd,0x02020202,0x02020202, // QW4-5, DQ1-ODD
1244 0x02020202,0x02020202,0x02020202,0x02020202, // QW6-7, DQ1-ODD
1245 0x04040404,0x04040404,0xfBfBfBfB,0xfBfBfBfB, // QW0-1, DQ2-ODD
1246 0x04040404,0x04040404,0x04040404,0x04040404, // QW2-3, DQ2-ODD
1247 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW4-5, DQ2-ODD
1248 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW6-7, DQ2-ODD
1249 0x08080808,0x08080808,0xF7F7F7F7,0xF7F7F7F7, // QW0-1, DQ3-ODD
1250 0x08080808,0x08080808,0x08080808,0x08080808, // QW2-3, DQ3-ODD
1251 0xF7F7F7F7,0xF7F7F7F7,0x08080808,0x08080808, // QW4-5, DQ3-ODD
1252 0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, // QW6-7, DQ3-ODD
1253 0x10101010,0x10101010,0x10101010,0x10101010, // QW0-1, DQ4-ODD
1254 0xeFeFeFeF,0xeFeFeFeF,0x10101010,0x10101010, // QW2-3, DQ4-ODD
1255 0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, // QW4-5, DQ4-ODD
1256 0xeFeFeFeF,0xeFeFeFeF,0x10101010,0x10101010, // QW6-7, DQ4-ODD
1257 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW0-1, DQ5-ODD
1258 0xdFdFdFdF,0xdFdFdFdF,0x20202020,0x20202020, // QW2-3, DQ5-ODD
1259 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW4-5, DQ5-ODD
1260 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW6-7, DQ5-ODD
1261 0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW0-1, DQ6-ODD
1262 0x40404040,0x40404040,0xBfBfBfBf,0xBfBfBfBf, // QW2-3, DQ6-ODD
1263 0x40404040,0x40404040,0xBfBfBfBf,0xBfBfBfBf, // QW4-5, DQ6-ODD
1264 0x40404040,0x40404040,0xBfBfBfBf,0xBfBfBfBf, // QW6-7, DQ6-ODD
1265 0x80808080,0x80808080,0x7F7F7F7F,0x7F7F7F7F, // QW0-1, DQ7-ODD
1266 0x80808080,0x80808080,0x7F7F7F7F,0x7F7F7F7F, // QW2-3, DQ7-ODD
1267 0x80808080,0x80808080,0x7F7F7F7F,0x7F7F7F7F, // QW4-5, DQ7-ODD
1268 0x80808080,0x80808080,0x80808080,0x80808080 // QW6-7, DQ7-ODD
1270 static const uint32_t TestPatternJD1b[] = {
1271 0x00000000,0x00000000,0x00000000,0x00000000, // QW0,CHA-B, ALL-EVEN
1272 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, // QW1,CHA-B, ALL-EVEN
1273 0x00000000,0x00000000,0x00000000,0x00000000, // QW2,CHA-B, ALL-EVEN
1274 0x00000000,0x00000000,0x00000000,0x00000000, // QW3,CHA-B, ALL-EVEN
1275 0x00000000,0x00000000,0x00000000,0x00000000, // QW4,CHA-B, ALL-EVEN
1276 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, // QW5,CHA-B, ALL-EVEN
1277 0x00000000,0x00000000,0x00000000,0x00000000, // QW6,CHA-B, ALL-EVEN
1278 0x00000000,0x00000000,0x00000000,0x00000000, // QW7,CHA-B, ALL-EVEN
1279 0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, // QW0,CHA-B, DQ0-ODD
1280 0x01010101,0x01010101,0x01010101,0x01010101, // QW1,CHA-B, DQ0-ODD
1281 0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, // QW2,CHA-B, DQ0-ODD
1282 0x01010101,0x01010101,0x01010101,0x01010101, // QW3,CHA-B, DQ0-ODD
1283 0x01010101,0x01010101,0x01010101,0x01010101, // QW4,CHA-B, DQ0-ODD
1284 0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, // QW5,CHA-B, DQ0-ODD
1285 0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, // QW6,CHA-B, DQ0-ODD
1286 0x01010101,0x01010101,0x01010101,0x01010101, // QW7,CHA-B, DQ0-ODD
1287 0x02020202,0x02020202,0x02020202,0x02020202, // QW0,CHA-B, DQ1-ODD
1288 0x02020202,0x02020202,0x02020202,0x02020202, // QW1,CHA-B, DQ1-ODD
1289 0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, // QW2,CHA-B, DQ1-ODD
1290 0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, // QW3,CHA-B, DQ1-ODD
1291 0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, // QW4,CHA-B, DQ1-ODD
1292 0x02020202,0x02020202,0x02020202,0x02020202, // QW5,CHA-B, DQ1-ODD
1293 0x02020202,0x02020202,0x02020202,0x02020202, // QW6,CHA-B, DQ1-ODD
1294 0x02020202,0x02020202,0x02020202,0x02020202, // QW7,CHA-B, DQ1-ODD
1295 0x04040404,0x04040404,0x04040404,0x04040404, // QW0,CHA-B, DQ2-ODD
1296 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW1,CHA-B, DQ2-ODD
1297 0x04040404,0x04040404,0x04040404,0x04040404, // QW2,CHA-B, DQ2-ODD
1298 0x04040404,0x04040404,0x04040404,0x04040404, // QW3,CHA-B, DQ2-ODD
1299 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW4,CHA-B, DQ2-ODD
1300 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW5,CHA-B, DQ2-ODD
1301 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW6,CHA-B, DQ2-ODD
1302 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW7,CHA-B, DQ2-ODD
1303 0x08080808,0x08080808,0x08080808,0x08080808, // QW0,CHA-B, DQ3-ODD
1304 0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, // QW1,CHA-B, DQ3-ODD
1305 0x08080808,0x08080808,0x08080808,0x08080808, // QW2,CHA-B, DQ3-ODD
1306 0x08080808,0x08080808,0x08080808,0x08080808, // QW3,CHA-B, DQ3-ODD
1307 0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, // QW4,CHA-B, DQ3-ODD
1308 0x08080808,0x08080808,0x08080808,0x08080808, // QW5,CHA-B, DQ3-ODD
1309 0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, // QW6,CHA-B, DQ3-ODD
1310 0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, // QW7,CHA-B, DQ3-ODD
1311 0x10101010,0x10101010,0x10101010,0x10101010, // QW0,CHA-B, DQ4-ODD
1312 0x10101010,0x10101010,0x10101010,0x10101010, // QW1,CHA-B, DQ4-ODD
1313 0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, // QW2,CHA-B, DQ4-ODD
1314 0x10101010,0x10101010,0x10101010,0x10101010, // QW3,CHA-B, DQ4-ODD
1315 0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, // QW4,CHA-B, DQ4-ODD
1316 0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, // QW5,CHA-B, DQ4-ODD
1317 0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, // QW6,CHA-B, DQ4-ODD
1318 0x10101010,0x10101010,0x10101010,0x10101010, // QW7,CHA-B, DQ4-ODD
1319 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW0,CHA-B, DQ5-ODD
1320 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW1,CHA-B, DQ5-ODD
1321 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW2,CHA-B, DQ5-ODD
1322 0x20202020,0x20202020,0x20202020,0x20202020, // QW3,CHA-B, DQ5-ODD
1323 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW4,CHA-B, DQ5-ODD
1324 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW5,CHA-B, DQ5-ODD
1325 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW6,CHA-B, DQ5-ODD
1326 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW7,CHA-B, DQ5-ODD
1327 0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW0,CHA-B, DQ6-ODD
1328 0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW1,CHA-B, DQ6-ODD
1329 0x40404040,0x40404040,0x40404040,0x40404040, // QW2,CHA-B, DQ6-ODD
1330 0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW3,CHA-B, DQ6-ODD
1331 0x40404040,0x40404040,0x40404040,0x40404040, // QW4,CHA-B, DQ6-ODD
1332 0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW5,CHA-B, DQ6-ODD
1333 0x40404040,0x40404040,0x40404040,0x40404040, // QW6,CHA-B, DQ6-ODD
1334 0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW7,CHA-B, DQ6-ODD
1335 0x80808080,0x80808080,0x80808080,0x80808080, // QW0,CHA-B, DQ7-ODD
1336 0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F, // QW1,CHA-B, DQ7-ODD
1337 0x80808080,0x80808080,0x80808080,0x80808080, // QW2,CHA-B, DQ7-ODD
1338 0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F, // QW3,CHA-B, DQ7-ODD
1339 0x80808080,0x80808080,0x80808080,0x80808080, // QW4,CHA-B, DQ7-ODD
1340 0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F, // QW5,CHA-B, DQ7-ODD
1341 0x80808080,0x80808080,0x80808080,0x80808080, // QW6,CHA-B, DQ7-ODD
1342 0x80808080,0x80808080,0x80808080,0x80808080 // QW7,CHA-B, DQ7-ODD
1344 uint8_t pattern_buf_x[64 * 18 + 16]; // We need to two cache line So have more 16 bytes to keep 16 byte alignment */
1353 unsigned DQSWrDelay;
1354 unsigned is_Width128 = sysinfo->meminfo[ctrl->node_id].is_Width128;
1355 uint8_t *dqs_delay_a = &sysinfo->dqs_delay_a[ctrl->node_id * 2*2*9]; //channel 2, direction 2 , bytelane *9
1364 dword = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
1365 ecc_bit = dword & DCL_DimmEccEn;
1366 dword &= ~(DCL_DimmEccEn);
1367 pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dword);
1370 buf_a = (uint8_t *)(((uint32_t)(&pattern_buf_x[0]) + 0x10) & (~0xf));
1374 for(i=0;i<16*18;i++) {
1375 *((uint32_t *)(buf_a + i*4)) = TestPatternJD1b[i];
1380 for(i=0; i<16*9;i++) {
1381 *((uint32_t *)(buf_a + i*4)) = TestPatternJD1a[i];
1386 print_debug_dqs("\r\nTrainDQSRdWrPos: 0 ctrl ", ctrl->node_id, 0);
1388 print_debug_addr("TrainDQSRdWrPos: buf_a:", buf_a);
1393 while( (channel<2) && (!Errors)) {
1394 print_debug_dqs("\tTrainDQSRdWrPos: 1 channel ",channel, 1);
1395 for(DQSWrDelay = 0; DQSWrDelay < 48; DQSWrDelay++) {
1397 SetDQSDelayAllCSR(ctrl, channel, DQS_WRITEDIR, DQSWrDelay);
1398 print_debug_dqs("\t\tTrainDQSRdWrPos: 21 DQSWrDelay ", DQSWrDelay, 2);
1399 err= TrainReadDQS(ctrl, channel, pattern, buf_a, dqs_delay_a, sysinfo);
1400 print_debug_dqs("\t\tTrainDQSRdWrPos: 22 err ",err, 2);
1405 print_debug_dqs("\tTrainDQSRdWrPos: 3 DQSWrDelay ", DQSWrDelay, 1);
1407 if(DQSWrDelay < 48) {
1408 Errors = TrainWriteDQS(ctrl, channel, pattern, buf_a, dqs_delay_a, sysinfo);
1409 print_debug_dqs("\tTrainDQSRdWrPos: 4 Errors ", Errors, 1);
1414 //FIXME: 64MuxMode??
1415 channel++; // skip channel if 64-bit mode
1420 dword = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
1421 dword &= ~(DCL_DimmEccEn);
1423 pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dword);
1429 //restore SSE2 setting
1432 print_debug_dqs("TrainDQSRdWrPos: ", 5, 0);
1437 static inline uint8_t get_dqs_delay(unsigned channel, unsigned bytelane, unsigned direction, uint8_t *dqs_delay_a)
1439 return dqs_delay_a[channel * 2*9 + direction * 9 + bytelane];
1442 static unsigned CalcEccDQSPos(unsigned channel,unsigned ByteLane0, unsigned ByteLane1, unsigned InterFactor, unsigned Direction, uint8_t *dqs_delay_a)
1443 /* InterFactor: 0: 100% ByteLane 0
1444 0x80: 50% between ByteLane 0 and 1
1445 0xff: 99.6% ByteLane 1 and 0.4% like 0
1448 unsigned DQSDelay0, DQSDelay1;
1451 DQSDelay0 = get_dqs_delay(channel, ByteLane0, Direction, dqs_delay_a);
1452 DQSDelay1 = get_dqs_delay(channel, ByteLane1, Direction, dqs_delay_a);
1454 if(DQSDelay0>DQSDelay1) {
1455 DQSDelay = DQSDelay0 - DQSDelay1;
1456 InterFactor = 0xff - InterFactor;
1459 DQSDelay = DQSDelay1 - DQSDelay0;
1462 DQSDelay *= InterFactor;
1464 DQSDelay >>= 8; // /255
1466 if(DQSDelay0>DQSDelay1) {
1467 DQSDelay += DQSDelay1;
1470 DQSDelay += DQSDelay0;
1477 static void SetEccDQSRdWrPos(const struct mem_controller *ctrl, struct sys_info *sysinfo)
1482 unsigned lane0, lane1, ratio;
1485 unsigned direction[] = { DQS_READDIR, DQS_WRITEDIR };
1487 uint8_t *dqs_delay_a = &sysinfo->dqs_delay_a[ctrl->node_id * 2*2*9]; //channel 2, direction 2 , bytelane *9
1491 for(channel = 0; channel < 2; channel++) {
1493 Direction = direction[i];
1494 lane0 = 4; lane1 = 5; ratio = 0;
1495 dqs_delay = CalcEccDQSPos(channel, lane0, lane1, ratio, Direction, dqs_delay_a);
1496 print_debug_dqs_pair("\t\tSetEccDQSRdWrPos: channel ", channel, Direction==DQS_READDIR? " R dqs_delay":" W dqs_delay", dqs_delay, 2);
1497 SetDQSDelayCSR(ctrl, channel, ByteLane, Direction, dqs_delay);
1498 save_dqs_delay(channel, ByteLane, Direction, dqs_delay_a, dqs_delay);
1503 static unsigned train_DqsRcvrEn(const struct mem_controller *ctrl, unsigned Pass, struct sys_info *sysinfo)
1505 print_debug_dqs("\r\ntrain_DqsRcvrEn: begin ctrl ", ctrl->node_id, 0);
1506 if(TrainRcvrEn(ctrl, Pass, sysinfo)) {
1509 print_debug_dqs("\r\ntrain_DqsRcvrEn: end ctrl ", ctrl->node_id, 0);
1513 static unsigned train_DqsPos(const struct mem_controller *ctrl, struct sys_info *sysinfo)
1515 print_debug_dqs("\r\ntrain_DqsPos: begin ctrl ", ctrl->node_id, 0);
1516 if(TrainDQSRdWrPos(ctrl, sysinfo) != 0) {
1517 print_err("\r\nDQS Training Rd Wr failed ctrl"); print_err_hex8(ctrl->node_id); print_err("\r\n");
1521 SetEccDQSRdWrPos(ctrl, sysinfo);
1523 print_debug_dqs("\r\ntrain_DqsPos: end ctrl ", ctrl->node_id, 0);
1528 #if K8_REV_F_SUPPORT_F0_F1_WORKAROUND == 1
1529 static void f0_svm_workaround(int controllers, const struct mem_controller *ctrl, tsc_t *tsc0, struct sys_info *sysinfo)
1532 unsigned cpu_f0_f1[8];
1535 print_debug_addr("dqs_timing: tsc1[8] :", tsc1);
1537 for(i = 0; i < controllers; i++) {
1538 if (!sysinfo->ctrl_present[i])
1541 /* Skip everything if I don't have any memory on this controller */
1542 if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
1546 cpu_f0_f1[i] = is_cpu_pre_f2_in_bsp(i);
1548 if(!cpu_f0_f1[i]) continue;
1550 dword = pci_read_config32(ctrl[i].f2, DRAM_CTRL);
1551 dword &= ~DC_DqsRcvEnTrain;
1552 pci_write_config32(ctrl[i].f2, DRAM_CTRL, dword);
1554 dword = pci_read_config32(ctrl[i].f2, DRAM_INIT);
1555 dword |= DI_EnDramInit;
1556 pci_write_config32(ctrl[i].f2, DRAM_INIT, dword);
1557 dword &= ~DI_EnDramInit;
1558 pci_write_config32(ctrl[i].f2, DRAM_INIT, dword);
1561 print_debug_dqs_tsc("begin: tsc1", i, tsc1[i].hi, tsc1[i].lo, 2);
1563 dword = tsc1[i].lo + tsc0[i].lo;
1564 if((dword<tsc1[i].lo) || (dword<tsc0[i].lo)) {
1568 tsc1[i].hi+= tsc0[i].hi;
1570 print_debug_dqs_tsc("end : tsc1", i, tsc1[i].hi, tsc1[i].lo, 2);
1574 for(i = 0; i < controllers; i++) {
1575 if (!sysinfo->ctrl_present[i])
1578 /* Skip everything if I don't have any memory on this controller */
1579 if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
1581 if(!cpu_f0_f1[i]) continue;
1587 } while ((tsc1[i].hi>tsc.hi) || ((tsc1[i].hi==tsc.hi) && (tsc1[i].lo>tsc.lo)));
1589 print_debug_dqs_tsc("end : tsc ", i, tsc.hi, tsc.lo, 2);
1597 /* setting variable mtrr, comes from linux kernel source */
1598 static void set_var_mtrr_dqs(
1599 unsigned int reg, unsigned long basek, unsigned long sizek,
1600 unsigned char type, unsigned address_bits)
1603 unsigned address_mask_high;
1605 address_mask_high = ((1u << (address_bits - 32u)) - 1u);
1607 base.hi = basek >> 22;
1608 base.lo = basek << 10;
1610 if (sizek < 4*1024*1024) {
1611 mask.hi = address_mask_high;
1612 mask.lo = ~((sizek << 10) -1);
1615 mask.hi = address_mask_high & (~((sizek >> 22) -1));
1624 zero.lo = zero.hi = 0;
1625 /* The invalid bit is kept in the mask, so we simply clear the
1626 relevant mask register to disable a range. */
1627 wrmsr (MTRRphysMask_MSR(reg), zero);
1629 /* Bit 32-35 of MTRRphysMask should be set to 1 */
1632 wrmsr (MTRRphysBase_MSR(reg), base);
1633 wrmsr (MTRRphysMask_MSR(reg), mask);
1638 /* fms: find most sigificant bit set, stolen from Linux Kernel Source. */
1639 static inline unsigned int fms(unsigned int x)
1643 __asm__("bsrl %1,%0\n\t"
1646 "1:" : "=r" (r) : "g" (x));
1650 /* fms: find least sigificant bit set */
1651 static inline unsigned int fls(unsigned int x)
1655 __asm__("bsfl %1,%0\n\t"
1658 "1:" : "=r" (r) : "g" (x));
1662 static unsigned int range_to_mtrr(unsigned int reg,
1663 unsigned long range_startk, unsigned long range_sizek,
1664 unsigned long next_range_startk, unsigned char type, unsigned address_bits)
1666 if (!range_sizek || (reg >= 8)) {
1669 while(range_sizek) {
1670 unsigned long max_align, align;
1671 unsigned long sizek;
1672 /* Compute the maximum size I can make a range */
1673 max_align = fls(range_startk);
1674 align = fms(range_sizek);
1675 if (align > max_align) {
1679 #if MEM_TRAIN_SEQ != 1
1680 #if CONFIG_USE_PRINTK_IN_CAR
1681 printk_debug("Setting variable MTRR %d, base: %4dMB, range: %4dMB, type %s\r\n",
1682 reg, range_startk >>10, sizek >> 10,
1683 (type==MTRR_TYPE_UNCACHEABLE)?"UC":
1684 ((type==MTRR_TYPE_WRBACK)?"WB":"Other")
1687 print_debug("Setting variable MTRR "); print_debug_hex8(reg); print_debug(", base: "); print_debug_hex16(range_startk>>10);
1688 print_debug("MB, range: "); print_debug_hex16(sizek >> 10); print_debug("MB, type ");
1689 print_debug( (type==MTRR_TYPE_UNCACHEABLE)?"UC\r\n":
1690 ((type==MTRR_TYPE_WRBACK)?"WB\r\n":"Other\r\n")
1694 set_var_mtrr_dqs(reg++, range_startk, sizek, type, address_bits);
1695 range_startk += sizek;
1696 range_sizek -= sizek;
1703 static void set_top_mem_ap(unsigned tom_k, unsigned tom2_k)
1707 /* Now set top of memory */
1708 msr.lo = (tom2_k & 0x003fffff) << 10;
1709 msr.hi = (tom2_k & 0xffc00000) >> 22;
1710 wrmsr(TOP_MEM2, msr);
1712 msr.lo = (tom_k & 0x003fffff) << 10;
1713 msr.hi = (tom_k & 0xffc00000) >> 22;
1714 wrmsr(TOP_MEM, msr);
1717 static void setup_mtrr_dqs(unsigned tom_k, unsigned tom2_k){
1722 //still enable from cache_as_ram.inc
1723 msr = rdmsr(SYSCFG_MSR);
1724 msr.lo |= SYSCFG_MSR_MtrrFixDramModEn;
1725 wrmsr(SYSCFG_MSR,msr);
1728 //[0,512k), [512k, 640k)
1729 msr.hi = 0x1e1e1e1e;
1735 reg = range_to_mtrr(2, 0, tom_k,4*1024*1024, MTRR_TYPE_WRBACK, 40);
1739 //enable tom2 and type
1740 msr = rdmsr(SYSCFG_MSR);
1741 msr.lo |= (1<<21) | (1<<22); //MtrrTom2En and Tom2ForceMemTypeWB
1742 wrmsr(SYSCFG_MSR, msr);
1747 static void clear_mtrr_dqs(unsigned tom2_k){
1751 //still enable from cache_as_ram.inc
1752 msr = rdmsr(SYSCFG_MSR);
1753 msr.lo |= SYSCFG_MSR_MtrrFixDramModEn;
1754 wrmsr(SYSCFG_MSR,msr);
1756 //[0,512k), [512k, 640k)
1763 for(i=0x204;i<0x210;i++) {
1769 //enable tom2 and type
1770 msr = rdmsr(SYSCFG_MSR);
1771 msr.lo &= ~((1<<21) | (1<<22)); //MtrrTom2En and Tom2ForceMemTypeWB
1772 wrmsr(SYSCFG_MSR, msr);
1776 static void set_htic_bit(unsigned i, unsigned val, unsigned bit)
1779 dword = pci_read_config32(PCI_DEV(0, 0x18+i, 0), HT_INIT_CONTROL);
1781 dword |= ((val & 1) <<bit);
1782 pci_write_config32(PCI_DEV(0, 0x18+i, 0), HT_INIT_CONTROL, dword);
1786 static unsigned get_htic_bit(unsigned i, unsigned bit)
1789 dword = pci_read_config32(PCI_DEV(0, 0x18+i, 0), HT_INIT_CONTROL);
1794 static void wait_till_sysinfo_in_ram(void)
1797 if(get_htic_bit(0, 9)) return;
1801 static void set_sysinfo_in_ram(unsigned val)
1803 set_htic_bit(0, val, 9);
1807 #if MEM_TRAIN_SEQ == 0
1810 #if K8_REV_F_SUPPORT_F0_F1_WORKAROUND == 1
1811 static void dqs_timing(int controllers, const struct mem_controller *ctrl, tsc_t *tsc0, struct sys_info *sysinfo)
1813 static void dqs_timing(int controllers, const struct mem_controller *ctrl, struct sys_info *sysinfo)
1820 //need to enable mtrr, so dqs training could access the test address
1821 setup_mtrr_dqs(sysinfo->tom_k, sysinfo->tom2_k);
1823 for(i = 0; i < controllers; i++) {
1824 if (!sysinfo->ctrl_present[ i ])
1827 /* Skip everything if I don't have any memory on this controller */
1828 if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
1830 fill_mem_cs_sysinfo(i, ctrl+i, sysinfo);
1834 for(i = 0; i < controllers; i++) {
1835 if (!sysinfo->ctrl_present[ i ])
1838 /* Skip everything if I don't have any memory on this controller */
1839 if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
1841 print_debug("DQS Training:RcvrEn:Pass1: ");
1842 print_debug_hex8(i);
1843 if(train_DqsRcvrEn(ctrl+i, 1, sysinfo)) goto out;
1844 print_debug(" done\r\n");
1848 #if K8_REV_F_SUPPORT_F0_F1_WORKAROUND == 1
1849 f0_svm_workaround(controllers, ctrl, tsc0, sysinfo);
1853 for(i = 0; i < controllers; i++) {
1854 if (!sysinfo->ctrl_present[i])
1857 /* Skip everything if I don't have any memory on this controller */
1858 if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
1860 print_debug("DQS Training:DQSPos: ");
1861 print_debug_hex8(i);
1862 if(train_DqsPos(ctrl+i, sysinfo)) goto out;
1863 print_debug(" done\r\n");
1867 for(i = 0; i < controllers; i++) {
1868 if (!sysinfo->ctrl_present[i])
1871 /* Skip everything if I don't have any memory on this controller */
1872 if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
1874 print_debug("DQS Training:RcvrEn:Pass2: ");
1875 print_debug_hex8(i);
1876 if(train_DqsRcvrEn(ctrl+i, 2, sysinfo)) goto out;
1877 print_debug(" done\r\n");
1878 sysinfo->mem_trained[i]=1;
1883 clear_mtrr_dqs(sysinfo->tom2_k);
1887 print_debug_dqs_tsc_x("DQS Training:tsc", i, tsc[i].hi, tsc[i].lo);
1897 #if MEM_TRAIN_SEQ > 0
1899 static void dqs_timing(int i, const struct mem_controller *ctrl, struct sys_info *sysinfo, unsigned v)
1906 if(sysinfo->mem_trained[i] != 0x80) return;
1908 #if MEM_TRAIN_SEQ == 1
1909 //need to enable mtrr, so dqs training could access the test address
1910 setup_mtrr_dqs(sysinfo->tom_k, sysinfo->tom2_k);
1913 fill_mem_cs_sysinfo(i, ctrl, sysinfo);
1918 print_debug("set DQS timing:RcvrEn:Pass1: ");
1919 print_debug_hex8(i);
1921 if(train_DqsRcvrEn(ctrl, 1, sysinfo)) {
1922 sysinfo->mem_trained[i]=0x81; //
1927 print_debug(" done\r\n");
1929 print_debug("set DQS timing:DQSPos: ");
1930 print_debug_hex8(i);
1933 if(train_DqsPos(ctrl, sysinfo)) {
1934 sysinfo->mem_trained[i]=0x82; //
1939 print_debug(" done\r\n");
1942 print_debug("set DQS timing:RcvrEn:Pass2: ");
1943 print_debug_hex8(i);
1945 if(train_DqsRcvrEn(ctrl, 2, sysinfo)){
1946 sysinfo->mem_trained[i]=0x83; //
1951 print_debug(" done\r\n");
1957 #if MEM_TRAIN_SEQ == 1
1958 clear_mtrr_dqs(sysinfo->tom2_k);
1962 for(ii=0;ii<4;ii++) {
1963 print_debug_dqs_tsc_x("Total DQS Training : tsc ", ii, tsc[ii].hi, tsc[ii].lo);
1967 if(sysinfo->mem_trained[i] == 0x80) {
1968 sysinfo->mem_trained[i]=1;
1974 #if MEM_TRAIN_SEQ == 1
1975 static void train_ram(unsigned nodeid, struct sys_info *sysinfo, struct sys_info *sysinfox)
1977 dqs_timing(nodeid, &sysinfo->ctrl[nodeid], sysinfo, 0); // keep the output tidy
1978 // memcpy(&sysinfox->dqs_rcvr_dly_a[nodeid * 2 * 8],&sysinfo->dqs_rcvr_dly_a[nodeid * 2 * 8], 2*8);
1979 // memcpy(&sysinfox->dqs_delay_a[nodeid * 2 * 2 * 9], &sysinfo->dqs_delay_a[nodeid * 2 * 2 * 9], 2 * 2 * 9);
1980 sysinfox->mem_trained[nodeid] = sysinfo->mem_trained[nodeid];
1983 static void copy_and_run_ap_code_in_car(unsigned ret_addr);
1984 static inline void train_ram_on_node(unsigned nodeid, unsigned coreid, struct sys_info *sysinfo, unsigned retcall)
1986 if(coreid) return; // only do it on core0
1987 struct sys_info *sysinfox = ((CONFIG_LB_MEM_TOPK<<10) - DCACHE_RAM_GLOBAL_VAR_SIZE);
1988 wait_till_sysinfo_in_ram(); // use pci to get it
1990 if(sysinfox->mem_trained[nodeid] == 0x80) {
1992 sysinfo->tom_k = sysinfox->tom_k;
1993 sysinfo->tom2_k = sysinfox->tom2_k;
1994 sysinfo->meminfo[nodeid].is_Width128 = sysinfox->meminfo[nodeid].is_Width128;
1995 sysinfo->mem_trained[nodeid] = sysinfox->mem_trained[nodeid];
1996 memcpy(&sysinfo->ctrl[nodeid], &sysinfox->ctrl[nodeid], sizeof(struct mem_controller));
1998 memcpy(sysinfo, sysinfox, DCACHE_RAM_GLOBAL_VAR_SIZE);
2000 set_top_mem_ap(sysinfo->tom_k, sysinfo->tom2_k); // keep the ap's tom consistent with bsp's
2001 #if CONFIG_AP_CODE_IN_CAR == 0
2002 print_debug("CODE IN ROM AND RUN ON NODE:"); print_debug_hex8(nodeid); print_debug("\r\n");
2003 train_ram(nodeid, sysinfo, sysinfox);
2005 /* Can copy dqs_timing to ap cache and run from cache?
2006 * we need coreboot_ap_car.rom? and treat it as coreboot_ram.rom for ap ?
2008 copy_and_run_ap_code_in_car(retcall);
2009 // will go back by jump