2 * This file is part of the coreboot project.
4 * Copyright (C) 2005 YingHai Lu
5 * Copyright (C) 2008 Advanced Micro Devices, Inc.
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; version 2 of the License.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 //0: mean no debug info
22 #define DQS_TRAIN_DEBUG 0
24 #if CONFIG_USE_PRINTK_IN_CAR
26 #error This file needs CONFIG_USE_PRINTK_IN_CAR
29 static inline void print_debug_dqs(const char *str, unsigned val, unsigned level)
31 #if DQS_TRAIN_DEBUG > 0
32 if(DQS_TRAIN_DEBUG > level) {
33 printk_debug("%s%x\r\n", str, val);
38 static inline void print_debug_dqs_pair(const char *str, unsigned val, const char *str2, unsigned val2, unsigned level)
40 #if DQS_TRAIN_DEBUG > 0
41 if(DQS_TRAIN_DEBUG > level) {
42 printk_debug("%s%08x%s%08x\r\n", str, val, str2, val2);
47 static inline void print_debug_dqs_tsc(const char *str, unsigned i, unsigned val, unsigned val2, unsigned level)
49 #if DQS_TRAIN_DEBUG > 0
50 if(DQS_TRAIN_DEBUG > level) {
51 printk_debug("%s[%02x]=%08x%08x\r\n", str, i, val, val2);
56 static inline void print_debug_dqs_tsc_x(const char *str, unsigned i, unsigned val, unsigned val2)
58 printk_debug("%s[%02x]=%08x%08x\r\n", str, i, val, val2);
62 static void fill_mem_cs_sysinfo(unsigned nodeid, const struct mem_controller *ctrl, struct sys_info *sysinfo)
66 sysinfo->mem_base[nodeid] = pci_read_config32(ctrl->f1, 0x40 + (nodeid<<3));
69 sysinfo->cs_base[nodeid*8+i] = pci_read_config32(ctrl->f2, 0x40 + (i<<2));
72 sysinfo->hole_reg[nodeid] = pci_read_config32(ctrl->f1, 0xf0);
75 static unsigned Get_MCTSysAddr(const struct mem_controller *ctrl, unsigned cs_idx, struct sys_info *sysinfo)
79 unsigned nodeid = ctrl->node_id;
81 #if HW_MEM_HOLE_SIZEK != 0
85 //get the local base addr of the chipselect
86 dword = sysinfo->cs_base[nodeid * 8 + cs_idx];
89 //sys addr= node base + local cs base
90 mem_base = sysinfo->mem_base[nodeid];
91 mem_base &= 0xffff0000;
94 #if HW_MEM_HOLE_SIZEK != 0
95 hole_reg = sysinfo->hole_reg[nodeid];
98 hole_startk = (hole_reg & (0xff<<24)) >> 10;
99 if( (dword >= (hole_startk<<2)) && (dword < ((4*1024*1024)<<2))) {
100 dword += ((4*1024*1024 - hole_startk)<<2);
105 //add 1MB offset to avoid compat area
106 dword += (1<<(20-8));
108 //So final result is upper 32 bit addr
114 static unsigned Get_RcvrSysAddr(const struct mem_controller * ctrl, unsigned channel, unsigned cs_idx, struct sys_info *sysinfo)
116 return Get_MCTSysAddr(ctrl, cs_idx, sysinfo);
120 static inline unsigned long read_cr4(void)
123 asm volatile ("movl %%cr4, %0" : "=r" (cr4));
127 static inline void write_cr4(unsigned long cr4)
129 asm volatile ("movl %0, %%cr4" : : "r" (cr4));
133 static inline void enable_sse2()
141 static inline void disable_sse2()
150 static void set_wrap32dis(void) {
153 msr = rdmsr(0xc0010015);
156 wrmsr(0xc0010015, msr);
160 static void clear_wrap32dis(void) {
163 msr = rdmsr(0xc0010015);
166 wrmsr(0xc0010015, msr);
170 static void set_FSBASE(uint32_t addr_hi)
174 //set fs and use fs prefix to access the mem
177 wrmsr(0xc0000100, msr); //FS_BASE
181 static unsigned ChipSelPresent(const struct mem_controller *ctrl, unsigned cs_idx, struct sys_info *sysinfo)
184 unsigned nodeid = ctrl->node_id;
187 enabled = sysinfo->cs_base[nodeid * 8 + cs_idx];
194 static unsigned RcvrRankEnabled(const struct mem_controller *ctrl, int channel, int cs_idx, unsigned is_Width128, struct sys_info *sysinfo)
196 return ChipSelPresent(ctrl, cs_idx, sysinfo);
199 static void WriteLNTestPattern(unsigned addr_lo, uint8_t *buf_a, unsigned line_num)
203 "movdqa (%3), %%xmm0\n\t"
204 "movntdq %%xmm0, %%fs:(%0)\n\t" /* xmm0 is 128 bit */
209 :: "a" (addr_lo), "d" (16), "c" (line_num * 4), "b"(buf_a)
215 static void Write1LTestPattern(unsigned addr, unsigned p, uint8_t *buf_a, uint8_t *buf_b)
218 if(p==1) { buf = buf_b; }
219 else { buf = buf_a; }
221 set_FSBASE (addr>>24);
223 WriteLNTestPattern(addr<<8, buf, 1);
226 static void Read1LTestPattern(unsigned addr)
230 set_FSBASE(addr>>24);
232 /* 1st move causes read fill (to exclusive or shared)*/
234 "movl %%fs:(%1), %0\n\t"
235 :"=b"(value): "a" (addr<<8)
243 #define DQS_FIRST_PASS 1
244 #define DQS_SECOND_PASS 2
246 #define SB_NORCVREN 11
247 #define RCVREN_MARGIN 6
248 #define SB_SmallRCVR 13
249 #define SB_CHA2BRCVREN 12
250 #define SB_NODQSPOS 14
251 #define MIN_DQS_WNDW 3
252 #define SB_SMALLDQS 15
255 static unsigned CompareTestPatternQW0(unsigned channel, unsigned addr, unsigned pattern, const uint32_t *TestPattern0, const uint32_t *TestPattern1, const uint32_t *TestPattern2, unsigned Pass, unsigned is_Width128)
261 unsigned result = DQS_FAIL;
263 if(Pass == DQS_FIRST_PASS) {
265 test_buf = (uint32_t *)TestPattern1;
268 test_buf = (uint32_t *)TestPattern0;
272 test_buf = (uint32_t *)TestPattern2;
275 set_FSBASE(addr>>24);
279 if(is_Width128 && (channel == 1)) {
280 addr_lo += 8; //second channel
285 "movl %%fs:(%1), %0\n\t"
286 :"=b"(value): "a" (addr_lo)
289 value_test = *test_buf;
292 print_debug_dqs_pair("\t\t\t\t\t\tQW0.lo : test_buf= ", (unsigned)test_buf, " value = ", value_test, 4);
293 print_debug_dqs_pair("\t\t\t\t\t\tQW0.lo : addr_lo = ", addr_lo, " value = ", value, 4);
295 if(value == value_test) {
299 "movl %%fs:(%1), %0\n\t"
300 :"=b"(value): "a" (addr_lo)
302 value_test = *test_buf;
303 print_debug_dqs_pair("\t\t\t\t\t\tQW0.hi : test_buf= ", (unsigned)test_buf, " value = ", value_test, 4);
304 print_debug_dqs_pair("\t\t\t\t\t\tQW0.hi : addr_lo = ", addr_lo, " value = ", value, 4);
306 if(value == value_test){
311 if(Pass == DQS_SECOND_PASS) { // second pass need to be inverted
312 if(result==DQS_PASS) {
324 static void SetMaxAL_RcvrDly(const struct mem_controller *ctrl, unsigned dly)
328 dly += (20-1); // round it
329 dly /= 20; // convert from unit 50ps to 1ns
334 reg = pci_read_config32(ctrl->f2, DRAM_CONFIG_HIGH);
335 reg &= ~(DCH_MaxAsyncLat_MASK <<DCH_MaxAsyncLat_SHIFT);
336 reg |= ((dly - DCH_MaxAsyncLat_BASE) << DCH_MaxAsyncLat_SHIFT);
337 pci_write_config32(ctrl->f2, DRAM_CONFIG_HIGH, reg);
342 Set the Target range to WT IO (using an IORR overlapping the already existing
343 WB dram type). Use IORR0
345 static void SetTargetWTIO(unsigned addr)
350 wrmsr(0xc0010016, msr); //IORR0 BASE
353 msr.lo = 0xfc000800; // 64MB Mask
354 wrmsr(0xc0010017, msr); // IORR0 Mask
357 static void ResetTargetWTIO(void)
363 wrmsr(0xc0010017, msr); // IORR0 Mask
366 static void proc_CLFLUSH(unsigned addr)
369 set_FSBASE(addr>>24);
371 /* 1st move causes read fill (to exclusive or shared)*/
373 /* clflush fs:[eax] */
374 "clflush %%fs:(%0)\n\t"
379 static void proc_IOCLFLUSH(unsigned addr)
386 static void ResetDCTWrPtr(const struct mem_controller *ctrl)
389 unsigned index = 0x10;
391 dword = pci_read_config32_index_wait(ctrl->f2, 0x98, index);
392 pci_write_config32_index_wait(ctrl->f2, 0x98, index, dword);
395 dword = pci_read_config32_index_wait(ctrl->f2, 0x98, index);
396 pci_write_config32_index_wait(ctrl->f2, 0x98, index, dword);
401 static uint16_t get_exact_T1000(unsigned i)
404 static const uint16_t T1000_a[]= { 5000, 3759, 3003, 2500 };
406 static const uint16_t TT_a[] = {
408 /*4 */ 6250, 6250, 6250, 6250,
409 /*5 */ 5000, 5000, 5000, 2500,
410 /*6 */ 5000, 4166, 4166, 2500,
411 /*7 */ 5000, 4285, 3571, 2500,
413 /*8 */ 5000, 3750, 3125, 2500,
414 /*9 */ 5000, 3888, 3333, 2500,
415 /*10*/ 5000, 4000, 3000, 2500,
416 /*11*/ 5000, 4090, 3181, 2500,
418 /*12*/ 5000, 3750, 3333, 2500,
419 /*13*/ 5000, 3846, 3076, 2500,
420 /*14*/ 5000, 3928, 3214, 2500,
421 /*15*/ 5000, 4000, 3000, 2500,
427 /* Check for FID control support */
428 struct cpuid_result cpuid1;
429 cpuid1 = cpuid(0x8000007);
430 if( cpuid1.edx & 0x02 ) {
431 /* Use current FID */
433 msr = rdmsr(0xc0010042);
434 fid_cur = msr.lo & 0x3f;
438 /* Use startup FID */
440 msr = rdmsr(0xc0010015);
441 fid_start = (msr.lo & (0x3f << 24));
443 index = fid_start>>25;
446 if(index>12) return T1000_a[i];
448 return TT_a[index * 4+i];
452 static void InitDQSPos4RcvrEn(const struct mem_controller *ctrl)
458 for(i=1; i<=3; i++) {
459 /* Program the DQS Write Timing Control Registers (Function 2:Offset 0x9c, index 0x01-0x03, 0x21-0x23) to 0x00 for all bytes */
460 pci_write_config32_index_wait(ctrl->f2, 0x98, i, dword);
461 pci_write_config32_index_wait(ctrl->f2, 0x98, i+0x20, dword);
465 for(i=5; i<=7; i++) {
466 /* Program the DQS Write Timing Control Registers (Function 2:Offset 0x9c, index 0x05-0x07, 0x25-0x27) to 0x2f for all bytes */
467 pci_write_config32_index_wait(ctrl->f2, 0x98, i, dword);
468 pci_write_config32_index_wait(ctrl->f2, 0x98, i+0x20, dword);
473 #ifndef K8_REV_F_SUPPORT_F0_F1_WORKAROUND
474 #define K8_REV_F_SUPPORT_F0_F1_WORKAROUND 1
477 static unsigned TrainRcvrEn(const struct mem_controller *ctrl, unsigned Pass, struct sys_info *sysinfo)
480 static const uint32_t TestPattern0[] = {
481 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
482 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
483 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
484 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
486 static const uint32_t TestPattern1[] = {
487 0x55555555, 0x55555555, 0x55555555, 0x55555555,
488 0x55555555, 0x55555555, 0x55555555, 0x55555555,
489 0x55555555, 0x55555555, 0x55555555, 0x55555555,
490 0x55555555, 0x55555555, 0x55555555, 0x55555555,
492 static const uint32_t TestPattern2[] = {
493 0x12345678, 0x87654321, 0x23456789, 0x98765432,
494 0x59385824, 0x30496724, 0x24490795, 0x99938733,
495 0x40385642, 0x38465245, 0x29432163, 0x05067894,
496 0x12349045, 0x98723467, 0x12387634, 0x34587623,
499 uint8_t pattern_buf_x[64 * 4 + 16]; // We need to two cache line So have more 16 bytes to keep 16 byte alignment */
500 uint8_t *buf_a, *buf_b;
503 uint8_t *dqs_rcvr_dly_a = &sysinfo->dqs_rcvr_dly_a[ctrl->node_id * 2* 8] ; //8 node, channel 2, receiver 8
507 unsigned channel, receiver;
510 unsigned CTLRMaxDelay;
515 unsigned Test0, Test1;
517 unsigned RcvrEnDlyRmin;
525 unsigned TestAddr0, TestAddr0B, TestAddr1, TestAddr1B = 0;
527 unsigned CurrRcvrCHADelay = 0;
531 unsigned is_Width128 = sysinfo->meminfo[ctrl->node_id].is_Width128;
535 if(Pass == DQS_FIRST_PASS) {
536 InitDQSPos4RcvrEn(ctrl);
546 dword = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
547 ecc_bit = dword & DCL_DimmEccEn;
548 dword &= ~(DCL_DimmEccEn);
549 pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dword);
552 if(Pass == DQS_FIRST_PASS) {
553 #if K8_REV_F_SUPPORT_F0_F1_WORKAROUND == 1
554 cpu_f0_f1 = is_cpu_pre_f2_in_bsp(ctrl->node_id);
559 /* Set the DqsRcvEnTrain bit */
560 dword = pci_read_config32(ctrl->f2, DRAM_CTRL);
561 dword |= DC_DqsRcvEnTrain;
562 pci_write_config32(ctrl->f2, DRAM_CTRL, dword);
567 //get T1000 figures (cycle time (ns)) * 1K
568 dword = pci_read_config32(ctrl->f2, DRAM_CONFIG_HIGH);
569 dword &= DCH_MemClkFreq_MASK;
571 T1000 = get_exact_T1000(dword);
574 buf_a = (uint8_t *)(((uint32_t)(&pattern_buf_x[0]) + 0x10) & (0xfffffff0));
575 buf_b = buf_a + 128; //??
576 if(Pass==DQS_FIRST_PASS) {
578 *((uint32_t *)(buf_a + i*4)) = TestPattern0[i];
579 *((uint32_t *)(buf_b + i*4)) = TestPattern1[i];
584 *((uint32_t *)(buf_a + i*4)) = TestPattern2[i];
585 *((uint32_t *)(buf_b + i*4)) = TestPattern2[i];
589 print_debug_dqs("\r\nTrainRcvEn: 0 ctrl", ctrl->node_id, 0);
591 print_debug_addr("TrainRcvEn: buf_a:", buf_a);
594 /* for each channel */
598 if (!(sysinfo->meminfo[ctrl->node_id].dimm_mask & 0x0F) &&
599 (sysinfo->meminfo[ctrl->node_id].dimm_mask & 0xF0)) { /* channelB only? */
603 for ( ; (channel < 2) && (!Errors); channel++)
605 print_debug_dqs("\tTrainRcvEn51: channel ",channel, 1);
608 /* there are four recriver pairs, loosely associated with CS */
609 for( receiver = 0; (receiver < 8) && (!Errors); receiver+=2)
612 unsigned index=(receiver>>1) * 3 + 0x10;
614 print_debug_dqs("\t\tTrainRcvEn52: index ", index, 2);
618 dword = pci_read_config32_index_wait(ctrl->f2, 0x98, index);
619 CurrRcvrCHADelay= dword & 0xff;
629 RcvrEnDlyRmin = 0xaf;
631 if(!RcvrRankEnabled(ctrl, channel, receiver, is_Width128, sysinfo)) continue;
633 /* for each DQS receiver enable setting */
635 TestAddr0 = Get_RcvrSysAddr(ctrl, channel, receiver, sysinfo);
637 TestAddr0B = TestAddr0 + (1<<(20+2-8)); // 4MB
639 if(RcvrRankEnabled(ctrl, channel, receiver+1, is_Width128, sysinfo)) {
640 TestAddr1 = Get_RcvrSysAddr(ctrl, channel, receiver+1, sysinfo);
641 TestAddr1B = TestAddr1 + (1<<(20+2-8)); //4MB
648 print_debug_dqs("\t\tTrainRcvEn53: TestAddr0B ", TestAddr0B, 2);
650 Write1LTestPattern(TestAddr0, 0, buf_a, buf_b); // rank0 of dimm, test p0
651 Write1LTestPattern(TestAddr0B, 1, buf_a, buf_b); //rank0 of dimm, test p1
654 Write1LTestPattern(TestAddr1, 0, buf_a, buf_b); //rank 1 of dimm
655 Write1LTestPattern(TestAddr1B, 1, buf_a, buf_b);//rank 1 of dimm
658 if(Pass == DQS_FIRST_PASS) {
661 RcvrEnDly = dqs_rcvr_dly_a[channel * 8 + receiver];
664 while ( RcvrEnDly < 0xaf) { // Sweep Delay value here
665 print_debug_dqs("\t\t\tTrainRcvEn541: RcvrEnDly ", RcvrEnDly, 3);
668 /* Odd steps get another pattern such that even
669 and odd steps alternate.
670 The pointers to the patterns will be swapped
671 at the end of the loop so they are correspond
682 /* Program current Receiver enable delay */
683 pci_write_config32_index_wait(ctrl->f2, 0x98, index, RcvrEnDly);
684 /* FIXME: 64bit MUX */
687 /* Program current Receiver enable delay chaannel b */
688 pci_write_config32_index_wait(ctrl->f2, 0x98, index+ 0x20, RcvrEnDly);
691 /* Program the MaxAsyncLat filed with the
692 current DQS receiver enable setting plus 6ns
694 /*Porgram MaxAsyncLat to correspond with current delay */
695 SetMaxAL_RcvrDly(ctrl, RcvrEnDly);
699 Read1LTestPattern(TestAddr0); //Cache Fill
700 /* ROM vs cache compare */
701 Test0 = CompareTestPatternQW0(channel, TestAddr0, PatternA, TestPattern0, TestPattern1, TestPattern2, Pass, is_Width128);
702 proc_IOCLFLUSH(TestAddr0);
706 print_debug_dqs("\t\t\tTrainRcvEn542: Test0 ", Test0, 3);
708 if(Test0 == DQS_PASS) {
710 Read1LTestPattern(TestAddr0B);
711 Test1 = CompareTestPatternQW0(channel, TestAddr0B, PatternB, TestPattern0, TestPattern1, TestPattern2, Pass, is_Width128);
712 proc_IOCLFLUSH(TestAddr0B);
716 print_debug_dqs("\t\t\tTrainRcvEn543: Test1 ", Test1, 3);
718 if(Test1 == DQS_PASS) {
720 Read1LTestPattern(TestAddr1);
721 Test0 = CompareTestPatternQW0(channel, TestAddr1, PatternA, TestPattern0, TestPattern1, TestPattern2, Pass, is_Width128);
722 proc_IOCLFLUSH(TestAddr1);
725 if(Test0 == DQS_PASS) {
726 Read1LTestPattern(TestAddr1B);
727 Test1 = CompareTestPatternQW0(channel, TestAddr1B, PatternB, TestPattern0, TestPattern1, TestPattern2, Pass, is_Width128);
728 proc_IOCLFLUSH(TestAddr1B);
731 if(Test1 == DQS_PASS) {
735 print_debug_dqs("\t\t\tTrainRcvEn544: Test0 ", Test0, 3);
743 print_debug_dqs("\t\t\tTrainRcvEn55: RcvrEnDly ", RcvrEnDly, 3);
745 if(CurrTest == DQS_PASS) {
746 if(LastTest == DQS_FAIL) {
747 RcvrEnDlyRmin = RcvrEnDly;
754 /* swap the rank 0 pointers */
756 TestAddr0 = TestAddr0B;
759 /* swap the rank 1 pointers */
761 TestAddr1 = TestAddr1B;
764 print_debug_dqs("\t\t\tTrainRcvEn56: RcvrEnDly ", RcvrEnDly, 3);
770 print_debug_dqs("\t\tTrainRcvEn61: RcvrEnDly ", RcvrEnDly, 2);
772 if(RcvrEnDlyRmin == 0xaf) {
774 Errors |= SB_NORCVREN;
777 if(Pass == DQS_FIRST_PASS) {
778 // We need a better value for DQSPos trainning
779 RcvrEnDly = RcvrEnDlyRmin /* + RCVREN_MARGIN * T1000/64/50 */;
781 RcvrEnDly = RcvrEnDlyRmin;
784 if(RcvrEnDly > 0xae) {
785 //passing window too narrow, too far delayed
786 Errors |= SB_SmallRCVR;
790 if(Pass == DQS_SECOND_PASS) { //second pass must average vales
791 RcvrEnDly += dqs_rcvr_dly_a[channel * 8 + receiver] /* - (RCVREN_MARGIN * T1000/64/50)*/;
795 dqs_rcvr_dly_a[channel * 8 + receiver] = RcvrEnDly;
797 //Set final RcvrEnDly for this DIMM and Channel
798 pci_write_config32_index_wait(ctrl->f2, 0x98, index, RcvrEnDly);
801 pci_write_config32_index_wait(ctrl->f2, 0x98, index+0x20, RcvrEnDly); // channel B
803 pci_write_config32_index_wait(ctrl->f2, 0x98, index, CurrRcvrCHADelay);
804 if(RcvrEnDly > CurrRcvrCHADelay) {
805 dword = RcvrEnDly - CurrRcvrCHADelay;
808 dword = CurrRcvrCHADelay - RcvrEnDly;
812 Errors |= SB_CHA2BRCVREN;
817 print_debug_dqs("\t\tTrainRcvEn63: RcvrEnDly ", RcvrEnDly, 2);
819 if(RcvrEnDly > CTLRMaxDelay) {
820 CTLRMaxDelay = RcvrEnDly;
823 print_debug_dqs("\t\tTrainRcvEn64: CTLRMaxDelay ", CTLRMaxDelay, 2);
828 print_debug_dqs("\tTrainRcvEn65: CTLRMaxDelay ", CTLRMaxDelay, 1);
830 /* Program the MaxAsysncLat field with the largest DQS Receiver Enable setting */
831 SetMaxAL_RcvrDly(ctrl, CTLRMaxDelay);
835 dword = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
836 dword &= ~(DCL_DimmEccEn);
838 pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dword);
840 if(Pass == DQS_FIRST_PASS) {
841 #if K8_REV_F_SUPPORT_F0_F1_WORKAROUND == 1
845 dword = pci_read_config32(ctrl->f2, DRAM_CTRL);
846 dword &= ~DC_DqsRcvEnTrain;
847 pci_write_config32(ctrl->f2, DRAM_CTRL, dword);
855 //restore SSE2 setting
858 #if MEM_TRAIN_SEQ != 1
859 /* We need tidy output for type 1 */
860 printk_debug(" CTLRMaxDelay=%02x\n", CTLRMaxDelay);
863 return (CTLRMaxDelay==0xae)?1:0;
867 #define DQS_READDIR 1
868 #define DQS_WRITEDIR 0
871 static void SetDQSDelayCSR(const struct mem_controller *ctrl, unsigned channel, unsigned bytelane, unsigned direction, unsigned dqs_delay)
872 { //ByteLane could be 0-8, last is for ECC
879 index = (bytelane>>2) + 1 + channel * 0x20 + (direction << 2);
884 shift <<= 3; // 8 bit
886 dword = pci_read_config32_index_wait(ctrl->f2, 0x98, index);
887 dword &= ~(0x3f<<shift);
888 dword |= (dqs_delay<<shift);
889 pci_write_config32_index_wait(ctrl->f2, 0x98, index, dword);
893 static void SetDQSDelayAllCSR(const struct mem_controller *ctrl, unsigned channel, unsigned direction, unsigned dqs_delay)
902 dword |= dqs_delay<<(i*8);
905 index = 1 + channel * 0x20 + direction * 4;
908 pci_write_config32_index_wait(ctrl->f2, 0x98, index + i, dword);
913 static unsigned MiddleDQS(unsigned min_d, unsigned max_d)
916 size_d = max_d-min_d;
917 if(size_d & 1) { //need round up
920 return ( min_d + (size_d>>1));
923 static inline void save_dqs_delay(unsigned channel, unsigned bytelane, unsigned direction, uint8_t *dqs_delay_a, uint8_t dqs_delay)
925 dqs_delay_a[channel * 2*9 + direction * 9 + bytelane] = dqs_delay;
928 static void WriteDQSTestPattern(unsigned addr_lo, unsigned pattern , uint8_t *buf_a)
930 WriteLNTestPattern(addr_lo, buf_a, (pattern+1) * 9);
933 static void ReadL18TestPattern(unsigned addr_lo)
935 //set fs and use fs prefix to access the mem
937 "movl %%fs:-128(%%esi), %%eax\n\t" //TestAddr cache line
938 "movl %%fs:-64(%%esi), %%eax\n\t" //+1
939 "movl %%fs:(%%esi), %%eax\n\t" //+2
940 "movl %%fs:64(%%esi), %%eax\n\t" //+3
942 "movl %%fs:-128(%%edi), %%eax\n\t" //+4
943 "movl %%fs:-64(%%edi), %%eax\n\t" //+5
944 "movl %%fs:(%%edi), %%eax\n\t" //+6
945 "movl %%fs:64(%%edi), %%eax\n\t" //+7
947 "movl %%fs:-128(%%ebx), %%eax\n\t" //+8
948 "movl %%fs:-64(%%ebx), %%eax\n\t" //+9
949 "movl %%fs:(%%ebx), %%eax\n\t" //+10
950 "movl %%fs:64(%%ebx), %%eax\n\t" //+11
952 "movl %%fs:-128(%%ecx), %%eax\n\t" //+12
953 "movl %%fs:-64(%%ecx), %%eax\n\t" //+13
954 "movl %%fs:(%%ecx), %%eax\n\t" //+14
955 "movl %%fs:64(%%ecx), %%eax\n\t" //+15
957 "movl %%fs:-128(%%edx), %%eax\n\t" //+16
958 "movl %%fs:-64(%%edx), %%eax\n\t" //+17
960 :: "a"(0), "b" (addr_lo+128+8*64), "c" (addr_lo+128+12*64), "d" (addr_lo +128+16*64), "S"(addr_lo+128), "D"(addr_lo+128+4*64)
965 static void ReadL9TestPattern(unsigned addr_lo)
968 //set fs and use fs prefix to access the mem
971 "movl %%fs:-128(%%ecx), %%eax\n\t" //TestAddr cache line
972 "movl %%fs:-64(%%ecx), %%eax\n\t" //+1
973 "movl %%fs:(%%ecx), %%eax\n\t" //+2
974 "movl %%fs:64(%%ecx), %%eax\n\t" //+3
976 "movl %%fs:-128(%%edx), %%eax\n\t" //+4
977 "movl %%fs:-64(%%edx), %%eax\n\t" //+5
978 "movl %%fs:(%%edx), %%eax\n\t" //+6
979 "movl %%fs:64(%%edx), %%eax\n\t" //+7
981 "movl %%fs:-128(%%ebx), %%eax\n\t" //+8
983 :: "a"(0), "b" (addr_lo+128+8*64), "c"(addr_lo+128), "d"(addr_lo+128+4*64)
989 static void ReadDQSTestPattern(unsigned addr_lo, unsigned pattern)
992 ReadL9TestPattern(addr_lo);
995 ReadL18TestPattern(addr_lo);
999 static void FlushDQSTestPattern_L9(unsigned addr_lo)
1002 "clflush %%fs:-128(%%ecx)\n\t"
1003 "clflush %%fs:-64(%%ecx)\n\t"
1004 "clflush %%fs:(%%ecx)\n\t"
1005 "clflush %%fs:64(%%ecx)\n\t"
1007 "clflush %%fs:-128(%%eax)\n\t"
1008 "clflush %%fs:-64(%%eax)\n\t"
1009 "clflush %%fs:(%%eax)\n\t"
1010 "clflush %%fs:64(%%eax)\n\t"
1012 "clflush %%fs:-128(%%ebx)\n\t"
1014 :: "b" (addr_lo+128+8*64), "c"(addr_lo+128), "a"(addr_lo+128+4*64)
1018 static __attribute__((noinline)) void FlushDQSTestPattern_L18(unsigned addr_lo)
1021 "clflush %%fs:-128(%%eax)\n\t"
1022 "clflush %%fs:-64(%%eax)\n\t"
1023 "clflush %%fs:(%%eax)\n\t"
1024 "clflush %%fs:64(%%eax)\n\t"
1026 "clflush %%fs:-128(%%edi)\n\t"
1027 "clflush %%fs:-64(%%edi)\n\t"
1028 "clflush %%fs:(%%edi)\n\t"
1029 "clflush %%fs:64(%%edi)\n\t"
1031 "clflush %%fs:-128(%%ebx)\n\t"
1032 "clflush %%fs:-64(%%ebx)\n\t"
1033 "clflush %%fs:(%%ebx)\n\t"
1034 "clflush %%fs:64(%%ebx)\n\t"
1036 "clflush %%fs:-128(%%ecx)\n\t"
1037 "clflush %%fs:-64(%%ecx)\n\t"
1038 "clflush %%fs:(%%ecx)\n\t"
1039 "clflush %%fs:64(%%ecx)\n\t"
1041 "clflush %%fs:-128(%%edx)\n\t"
1042 "clflush %%fs:-64(%%edx)\n\t"
1044 :: "b" (addr_lo+128+8*64), "c" (addr_lo+128+12*64), "d" (addr_lo +128+16*64), "a"(addr_lo+128), "D"(addr_lo+128+4*64)
1048 static void FlushDQSTestPattern(unsigned addr_lo, unsigned pattern )
1052 FlushDQSTestPattern_L9(addr_lo);
1055 FlushDQSTestPattern_L18(addr_lo);
1059 static unsigned CompareDQSTestPattern(unsigned channel, unsigned addr_lo, unsigned pattern, uint8_t *buf_a)
1062 unsigned bitmap = 0xff;
1067 uint32_t value_test;
1069 test_buf = (uint32_t *)buf_a;
1072 if(pattern && channel) {
1073 addr_lo += 8; //second channel
1078 for(i=0;i<9*64/4;i++) {
1080 "movl %%fs:(%1), %0\n\t"
1081 :"=b"(value): "a" (addr_lo)
1083 value_test = *test_buf;
1085 print_debug_dqs_pair("\t\t\t\t\t\ttest_buf= ", (unsigned)test_buf, " value = ", value_test, 7);
1086 print_debug_dqs_pair("\t\t\t\t\t\ttaddr_lo = ",addr_lo, " value = ", value, 7);
1088 for(j=0;j<4*8;j+=8) {
1089 if(((value>>j)&0xff) != ((value_test>>j)& 0xff)) {
1090 bitmap &= ~(1<<bytelane);
1096 print_debug_dqs("\t\t\t\t\t\tbitmap = ", bitmap, 7);
1099 if(pattern == 1) { //dual channel
1100 addr_lo += 8; //skip over other channel's data
1114 static unsigned TrainDQSPos(const struct mem_controller *ctrl, unsigned channel, unsigned Direction, unsigned Pattern, uint8_t *buf_a, uint8_t *dqs_delay_a, struct sys_info *sysinfo)
1118 unsigned BanksPresent;
1120 unsigned MutualCSPassW[48];
1128 unsigned RnkDlyFilterMax, RnkDlyFilterMin = 0;
1129 unsigned RnkDlySeqPassMax, RnkDlySeqPassMin = 0;
1134 print_debug_dqs("\t\t\tTrainDQSPos begin ", 0, 3);
1136 printk_debug("TrainDQSPos: MutualCSPassW[48] :%p\n", MutualCSPassW);
1138 for(DQSDelay=0; DQSDelay<48; DQSDelay++) {
1139 MutualCSPassW[DQSDelay] = 0xff; // Bitmapped status per delay setting, 0xff=All positions passing (1= PASS)
1142 for(ChipSel = 0; ChipSel < 8; ChipSel++) { //logical register chipselects 0..7
1143 print_debug_dqs("\t\t\t\tTrainDQSPos: 11 ChipSel ", ChipSel, 4);
1144 //FIXME: process 64MUXedMode
1145 if(!ChipSelPresent(ctrl, ChipSel, sysinfo)) continue;
1148 TestAddr = Get_MCTSysAddr(ctrl, ChipSel, sysinfo);
1150 print_debug_dqs("\t\t\t\tTrainDQSPos: 12 TestAddr ", TestAddr, 4);
1152 //set fs and use fs prefix to access the mem
1153 set_FSBASE(TestAddr>>24);
1155 if(Direction == DQS_READDIR) {
1156 print_debug_dqs("\t\t\t\tTrainDQSPos: 13 for read so write at first", 0, 4);
1157 WriteDQSTestPattern(TestAddr<<8, Pattern, buf_a);
1160 for(DQSDelay = 0; DQSDelay < 48; DQSDelay++ ){
1161 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 141 DQSDelay ", DQSDelay, 5);
1162 if(MutualCSPassW[DQSDelay] == 0) continue; //skip current delay value if other chipselects have failed all 8 bytelanes
1163 SetDQSDelayAllCSR(ctrl, channel, Direction, DQSDelay);
1164 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 142 MutualCSPassW ", MutualCSPassW[DQSDelay], 5);
1165 if(Direction == DQS_WRITEDIR) {
1166 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 143 for write", 0, 5);
1167 WriteDQSTestPattern(TestAddr<<8, Pattern, buf_a);
1169 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 144 Pattern ", Pattern, 5);
1170 ReadDQSTestPattern(TestAddr<<8, Pattern);
1171 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 145 MutualCSPassW ", MutualCSPassW[DQSDelay], 5);
1172 MutualCSPassW[DQSDelay] &= CompareDQSTestPattern(channel, TestAddr<<8, Pattern, buf_a); //0: fail, 1=pass
1173 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 146 MutualCSPassW ", MutualCSPassW[DQSDelay], 5);
1174 SetTargetWTIO(TestAddr);
1175 FlushDQSTestPattern(TestAddr<<8, Pattern);
1181 for(ByteLane = 0; ByteLane < 8; ByteLane++) {
1182 print_debug_dqs("\t\t\t\tTrainDQSPos: 31 ByteLane ",ByteLane, 4);
1184 LastTest = DQS_FAIL;
1185 RnkDlySeqPassMax = 0;
1186 RnkDlyFilterMax = 0;
1187 RnkDlyFilterMin = 0;
1188 for(DQSDelay=0; DQSDelay<48; DQSDelay++) {
1189 if(MutualCSPassW[DQSDelay] & (1<<ByteLane)) {
1191 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 321 DQSDelay ", DQSDelay, 5);
1192 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 322 MutualCSPassW ", MutualCSPassW[DQSDelay], 5);
1194 RnkDlySeqPassMax = DQSDelay;
1195 if(LastTest == DQS_FAIL) {
1196 RnkDlySeqPassMin = DQSDelay; //start sequential run
1198 if((RnkDlySeqPassMax - RnkDlySeqPassMin)>(RnkDlyFilterMax-RnkDlyFilterMin)){
1199 RnkDlyFilterMin = RnkDlySeqPassMin;
1200 RnkDlyFilterMax = RnkDlySeqPassMax;
1202 LastTest = DQS_PASS;
1205 LastTest = DQS_FAIL;
1208 print_debug_dqs("\t\t\t\tTrainDQSPos: 33 RnkDlySeqPassMax ", RnkDlySeqPassMax, 4);
1210 if(RnkDlySeqPassMax == 0) {
1211 Errors |= SB_NODQSPOS; // no passing window
1214 print_debug_dqs("\t\t\t\tTrainDQSPos: 34 RnkDlyFilterMax ", RnkDlyFilterMax, 4);
1215 print_debug_dqs("\t\t\t\tTrainDQSPos: 34 RnkDlyFilterMin ", RnkDlyFilterMin, 4);
1216 if((RnkDlyFilterMax - RnkDlyFilterMin)< MIN_DQS_WNDW){
1217 Errors |= SB_SMALLDQS;
1220 unsigned middle_dqs;
1221 middle_dqs = MiddleDQS(RnkDlyFilterMin, RnkDlyFilterMax);
1222 print_debug_dqs("\t\t\t\tTrainDQSPos: 35 middle_dqs ",middle_dqs, 4);
1223 SetDQSDelayCSR(ctrl, channel, ByteLane, Direction, middle_dqs);
1224 save_dqs_delay(channel, ByteLane, Direction, dqs_delay_a, middle_dqs);
1230 print_debug_dqs("\t\t\tTrainDQSPos: end", 0xff, 3);
1237 static unsigned TrainReadDQS(const struct mem_controller *ctrl, unsigned channel, unsigned pattern, uint8_t *buf_a, uint8_t *dqs_delay_a, struct sys_info *sysinfo)
1239 print_debug_dqs("\t\tTrainReadPos", 0, 2);
1240 return TrainDQSPos(ctrl, channel, DQS_READDIR, pattern, buf_a, dqs_delay_a, sysinfo);
1243 static unsigned TrainWriteDQS(const struct mem_controller *ctrl, unsigned channel, unsigned pattern, uint8_t *buf_a, uint8_t *dqs_delay_a, struct sys_info *sysinfo)
1245 print_debug_dqs("\t\tTrainWritePos", 0, 2);
1246 return TrainDQSPos(ctrl, channel, DQS_WRITEDIR, pattern, buf_a, dqs_delay_a, sysinfo);
1251 static unsigned TrainDQSRdWrPos(const struct mem_controller *ctrl, struct sys_info *sysinfo)
1253 static const uint32_t TestPatternJD1a[] = {
1254 0x00000000,0x00000000,0xFFFFFFFF,0xFFFFFFFF, // QW0-1, ALL-EVEN
1255 0x00000000,0x00000000,0x00000000,0x00000000, // QW2-3, ALL-EVEN
1256 0x00000000,0x00000000,0xFFFFFFFF,0xFFFFFFFF, // QW4-5, ALL-EVEN
1257 0x00000000,0x00000000,0x00000000,0x00000000, // QW6-7, ALL-EVEN
1258 0xFeFeFeFe,0xFeFeFeFe,0x01010101,0x01010101, // QW0-1, DQ0-ODD
1259 0xFeFeFeFe,0xFeFeFeFe,0x01010101,0x01010101, // QW2-3, DQ0-ODD
1260 0x01010101,0x01010101,0xFeFeFeFe,0xFeFeFeFe, // QW4-5, DQ0-ODD
1261 0xFeFeFeFe,0xFeFeFeFe,0x01010101,0x01010101, // QW6-7, DQ0-ODD
1262 0x02020202,0x02020202,0x02020202,0x02020202, // QW0-1, DQ1-ODD
1263 0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, // QW2-3, DQ1-ODD
1264 0xFdFdFdFd,0xFdFdFdFd,0x02020202,0x02020202, // QW4-5, DQ1-ODD
1265 0x02020202,0x02020202,0x02020202,0x02020202, // QW6-7, DQ1-ODD
1266 0x04040404,0x04040404,0xfBfBfBfB,0xfBfBfBfB, // QW0-1, DQ2-ODD
1267 0x04040404,0x04040404,0x04040404,0x04040404, // QW2-3, DQ2-ODD
1268 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW4-5, DQ2-ODD
1269 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW6-7, DQ2-ODD
1270 0x08080808,0x08080808,0xF7F7F7F7,0xF7F7F7F7, // QW0-1, DQ3-ODD
1271 0x08080808,0x08080808,0x08080808,0x08080808, // QW2-3, DQ3-ODD
1272 0xF7F7F7F7,0xF7F7F7F7,0x08080808,0x08080808, // QW4-5, DQ3-ODD
1273 0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, // QW6-7, DQ3-ODD
1274 0x10101010,0x10101010,0x10101010,0x10101010, // QW0-1, DQ4-ODD
1275 0xeFeFeFeF,0xeFeFeFeF,0x10101010,0x10101010, // QW2-3, DQ4-ODD
1276 0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, // QW4-5, DQ4-ODD
1277 0xeFeFeFeF,0xeFeFeFeF,0x10101010,0x10101010, // QW6-7, DQ4-ODD
1278 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW0-1, DQ5-ODD
1279 0xdFdFdFdF,0xdFdFdFdF,0x20202020,0x20202020, // QW2-3, DQ5-ODD
1280 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW4-5, DQ5-ODD
1281 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW6-7, DQ5-ODD
1282 0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW0-1, DQ6-ODD
1283 0x40404040,0x40404040,0xBfBfBfBf,0xBfBfBfBf, // QW2-3, DQ6-ODD
1284 0x40404040,0x40404040,0xBfBfBfBf,0xBfBfBfBf, // QW4-5, DQ6-ODD
1285 0x40404040,0x40404040,0xBfBfBfBf,0xBfBfBfBf, // QW6-7, DQ6-ODD
1286 0x80808080,0x80808080,0x7F7F7F7F,0x7F7F7F7F, // QW0-1, DQ7-ODD
1287 0x80808080,0x80808080,0x7F7F7F7F,0x7F7F7F7F, // QW2-3, DQ7-ODD
1288 0x80808080,0x80808080,0x7F7F7F7F,0x7F7F7F7F, // QW4-5, DQ7-ODD
1289 0x80808080,0x80808080,0x80808080,0x80808080 // QW6-7, DQ7-ODD
1291 static const uint32_t TestPatternJD1b[] = {
1292 0x00000000,0x00000000,0x00000000,0x00000000, // QW0,CHA-B, ALL-EVEN
1293 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, // QW1,CHA-B, ALL-EVEN
1294 0x00000000,0x00000000,0x00000000,0x00000000, // QW2,CHA-B, ALL-EVEN
1295 0x00000000,0x00000000,0x00000000,0x00000000, // QW3,CHA-B, ALL-EVEN
1296 0x00000000,0x00000000,0x00000000,0x00000000, // QW4,CHA-B, ALL-EVEN
1297 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, // QW5,CHA-B, ALL-EVEN
1298 0x00000000,0x00000000,0x00000000,0x00000000, // QW6,CHA-B, ALL-EVEN
1299 0x00000000,0x00000000,0x00000000,0x00000000, // QW7,CHA-B, ALL-EVEN
1300 0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, // QW0,CHA-B, DQ0-ODD
1301 0x01010101,0x01010101,0x01010101,0x01010101, // QW1,CHA-B, DQ0-ODD
1302 0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, // QW2,CHA-B, DQ0-ODD
1303 0x01010101,0x01010101,0x01010101,0x01010101, // QW3,CHA-B, DQ0-ODD
1304 0x01010101,0x01010101,0x01010101,0x01010101, // QW4,CHA-B, DQ0-ODD
1305 0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, // QW5,CHA-B, DQ0-ODD
1306 0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, // QW6,CHA-B, DQ0-ODD
1307 0x01010101,0x01010101,0x01010101,0x01010101, // QW7,CHA-B, DQ0-ODD
1308 0x02020202,0x02020202,0x02020202,0x02020202, // QW0,CHA-B, DQ1-ODD
1309 0x02020202,0x02020202,0x02020202,0x02020202, // QW1,CHA-B, DQ1-ODD
1310 0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, // QW2,CHA-B, DQ1-ODD
1311 0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, // QW3,CHA-B, DQ1-ODD
1312 0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, // QW4,CHA-B, DQ1-ODD
1313 0x02020202,0x02020202,0x02020202,0x02020202, // QW5,CHA-B, DQ1-ODD
1314 0x02020202,0x02020202,0x02020202,0x02020202, // QW6,CHA-B, DQ1-ODD
1315 0x02020202,0x02020202,0x02020202,0x02020202, // QW7,CHA-B, DQ1-ODD
1316 0x04040404,0x04040404,0x04040404,0x04040404, // QW0,CHA-B, DQ2-ODD
1317 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW1,CHA-B, DQ2-ODD
1318 0x04040404,0x04040404,0x04040404,0x04040404, // QW2,CHA-B, DQ2-ODD
1319 0x04040404,0x04040404,0x04040404,0x04040404, // QW3,CHA-B, DQ2-ODD
1320 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW4,CHA-B, DQ2-ODD
1321 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW5,CHA-B, DQ2-ODD
1322 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW6,CHA-B, DQ2-ODD
1323 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW7,CHA-B, DQ2-ODD
1324 0x08080808,0x08080808,0x08080808,0x08080808, // QW0,CHA-B, DQ3-ODD
1325 0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, // QW1,CHA-B, DQ3-ODD
1326 0x08080808,0x08080808,0x08080808,0x08080808, // QW2,CHA-B, DQ3-ODD
1327 0x08080808,0x08080808,0x08080808,0x08080808, // QW3,CHA-B, DQ3-ODD
1328 0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, // QW4,CHA-B, DQ3-ODD
1329 0x08080808,0x08080808,0x08080808,0x08080808, // QW5,CHA-B, DQ3-ODD
1330 0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, // QW6,CHA-B, DQ3-ODD
1331 0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, // QW7,CHA-B, DQ3-ODD
1332 0x10101010,0x10101010,0x10101010,0x10101010, // QW0,CHA-B, DQ4-ODD
1333 0x10101010,0x10101010,0x10101010,0x10101010, // QW1,CHA-B, DQ4-ODD
1334 0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, // QW2,CHA-B, DQ4-ODD
1335 0x10101010,0x10101010,0x10101010,0x10101010, // QW3,CHA-B, DQ4-ODD
1336 0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, // QW4,CHA-B, DQ4-ODD
1337 0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, // QW5,CHA-B, DQ4-ODD
1338 0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, // QW6,CHA-B, DQ4-ODD
1339 0x10101010,0x10101010,0x10101010,0x10101010, // QW7,CHA-B, DQ4-ODD
1340 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW0,CHA-B, DQ5-ODD
1341 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW1,CHA-B, DQ5-ODD
1342 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW2,CHA-B, DQ5-ODD
1343 0x20202020,0x20202020,0x20202020,0x20202020, // QW3,CHA-B, DQ5-ODD
1344 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW4,CHA-B, DQ5-ODD
1345 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW5,CHA-B, DQ5-ODD
1346 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW6,CHA-B, DQ5-ODD
1347 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW7,CHA-B, DQ5-ODD
1348 0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW0,CHA-B, DQ6-ODD
1349 0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW1,CHA-B, DQ6-ODD
1350 0x40404040,0x40404040,0x40404040,0x40404040, // QW2,CHA-B, DQ6-ODD
1351 0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW3,CHA-B, DQ6-ODD
1352 0x40404040,0x40404040,0x40404040,0x40404040, // QW4,CHA-B, DQ6-ODD
1353 0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW5,CHA-B, DQ6-ODD
1354 0x40404040,0x40404040,0x40404040,0x40404040, // QW6,CHA-B, DQ6-ODD
1355 0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW7,CHA-B, DQ6-ODD
1356 0x80808080,0x80808080,0x80808080,0x80808080, // QW0,CHA-B, DQ7-ODD
1357 0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F, // QW1,CHA-B, DQ7-ODD
1358 0x80808080,0x80808080,0x80808080,0x80808080, // QW2,CHA-B, DQ7-ODD
1359 0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F, // QW3,CHA-B, DQ7-ODD
1360 0x80808080,0x80808080,0x80808080,0x80808080, // QW4,CHA-B, DQ7-ODD
1361 0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F, // QW5,CHA-B, DQ7-ODD
1362 0x80808080,0x80808080,0x80808080,0x80808080, // QW6,CHA-B, DQ7-ODD
1363 0x80808080,0x80808080,0x80808080,0x80808080 // QW7,CHA-B, DQ7-ODD
1365 uint8_t pattern_buf_x[64 * 18 + 16]; // We need to two cache line So have more 16 bytes to keep 16 byte alignment */
1374 unsigned DQSWrDelay;
1375 unsigned is_Width128 = sysinfo->meminfo[ctrl->node_id].is_Width128;
1376 uint8_t *dqs_delay_a = &sysinfo->dqs_delay_a[ctrl->node_id * 2*2*9]; //channel 2, direction 2 , bytelane *9
1385 dword = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
1386 ecc_bit = dword & DCL_DimmEccEn;
1387 dword &= ~(DCL_DimmEccEn);
1388 pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dword);
1391 buf_a = (uint8_t *)(((uint32_t)(&pattern_buf_x[0]) + 0x10) & (~0xf));
1395 for(i=0;i<16*18;i++) {
1396 *((uint32_t *)(buf_a + i*4)) = TestPatternJD1b[i];
1401 for(i=0; i<16*9;i++) {
1402 *((uint32_t *)(buf_a + i*4)) = TestPatternJD1a[i];
1407 print_debug_dqs("\r\nTrainDQSRdWrPos: 0 ctrl ", ctrl->node_id, 0);
1409 printk_debug("TrainDQSRdWrPos: buf_a:%p\n", buf_a);
1414 if (!(sysinfo->meminfo[ctrl->node_id].dimm_mask & 0x0F) &&
1415 (sysinfo->meminfo[ctrl->node_id].dimm_mask & 0xF0)) { /* channelB only? */
1419 while( (channel<2) && (!Errors)) {
1420 print_debug_dqs("\tTrainDQSRdWrPos: 1 channel ",channel, 1);
1421 for(DQSWrDelay = 0; DQSWrDelay < 48; DQSWrDelay++) {
1423 SetDQSDelayAllCSR(ctrl, channel, DQS_WRITEDIR, DQSWrDelay);
1424 print_debug_dqs("\t\tTrainDQSRdWrPos: 21 DQSWrDelay ", DQSWrDelay, 2);
1425 err= TrainReadDQS(ctrl, channel, pattern, buf_a, dqs_delay_a, sysinfo);
1426 print_debug_dqs("\t\tTrainDQSRdWrPos: 22 err ",err, 2);
1431 print_debug_dqs("\tTrainDQSRdWrPos: 3 DQSWrDelay ", DQSWrDelay, 1);
1433 if(DQSWrDelay < 48) {
1434 Errors = TrainWriteDQS(ctrl, channel, pattern, buf_a, dqs_delay_a, sysinfo);
1435 print_debug_dqs("\tTrainDQSRdWrPos: 4 Errors ", Errors, 1);
1440 //FIXME: 64MuxMode??
1441 channel++; // skip channel if 64-bit mode
1446 dword = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
1447 dword &= ~(DCL_DimmEccEn);
1449 pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dword);
1455 //restore SSE2 setting
1458 print_debug_dqs("TrainDQSRdWrPos: ", 5, 0);
1463 static inline uint8_t get_dqs_delay(unsigned channel, unsigned bytelane, unsigned direction, uint8_t *dqs_delay_a)
1465 return dqs_delay_a[channel * 2*9 + direction * 9 + bytelane];
1468 static unsigned CalcEccDQSPos(unsigned channel,unsigned ByteLane0, unsigned ByteLane1, unsigned InterFactor, unsigned Direction, uint8_t *dqs_delay_a)
1469 /* InterFactor: 0: 100% ByteLane 0
1470 0x80: 50% between ByteLane 0 and 1
1471 0xff: 99.6% ByteLane 1 and 0.4% like 0
1474 unsigned DQSDelay0, DQSDelay1;
1477 DQSDelay0 = get_dqs_delay(channel, ByteLane0, Direction, dqs_delay_a);
1478 DQSDelay1 = get_dqs_delay(channel, ByteLane1, Direction, dqs_delay_a);
1480 if(DQSDelay0>DQSDelay1) {
1481 DQSDelay = DQSDelay0 - DQSDelay1;
1482 InterFactor = 0xff - InterFactor;
1485 DQSDelay = DQSDelay1 - DQSDelay0;
1488 DQSDelay *= InterFactor;
1490 DQSDelay >>= 8; // /255
1492 if(DQSDelay0>DQSDelay1) {
1493 DQSDelay += DQSDelay1;
1496 DQSDelay += DQSDelay0;
1503 static void SetEccDQSRdWrPos(const struct mem_controller *ctrl, struct sys_info *sysinfo)
1508 unsigned lane0, lane1, ratio;
1511 unsigned direction[] = { DQS_READDIR, DQS_WRITEDIR };
1513 uint8_t *dqs_delay_a = &sysinfo->dqs_delay_a[ctrl->node_id * 2*2*9]; //channel 2, direction 2 , bytelane *9
1517 for(channel = 0; channel < 2; channel++) {
1519 Direction = direction[i];
1520 lane0 = 4; lane1 = 5; ratio = 0;
1521 dqs_delay = CalcEccDQSPos(channel, lane0, lane1, ratio, Direction, dqs_delay_a);
1522 print_debug_dqs_pair("\t\tSetEccDQSRdWrPos: channel ", channel, Direction==DQS_READDIR? " R dqs_delay":" W dqs_delay", dqs_delay, 2);
1523 SetDQSDelayCSR(ctrl, channel, ByteLane, Direction, dqs_delay);
1524 save_dqs_delay(channel, ByteLane, Direction, dqs_delay_a, dqs_delay);
1529 static unsigned train_DqsRcvrEn(const struct mem_controller *ctrl, unsigned Pass, struct sys_info *sysinfo)
1531 print_debug_dqs("\r\ntrain_DqsRcvrEn: begin ctrl ", ctrl->node_id, 0);
1532 if(TrainRcvrEn(ctrl, Pass, sysinfo)) {
1535 print_debug_dqs("\r\ntrain_DqsRcvrEn: end ctrl ", ctrl->node_id, 0);
1539 static unsigned train_DqsPos(const struct mem_controller *ctrl, struct sys_info *sysinfo)
1541 print_debug_dqs("\r\ntrain_DqsPos: begin ctrl ", ctrl->node_id, 0);
1542 if(TrainDQSRdWrPos(ctrl, sysinfo) != 0) {
1543 printk_err("\r\nDQS Training Rd Wr failed ctrl%02x\r\n", ctrl->node_id);
1547 SetEccDQSRdWrPos(ctrl, sysinfo);
1549 print_debug_dqs("\r\ntrain_DqsPos: end ctrl ", ctrl->node_id, 0);
1554 #if K8_REV_F_SUPPORT_F0_F1_WORKAROUND == 1
1555 static void f0_svm_workaround(int controllers, const struct mem_controller *ctrl, tsc_t *tsc0, struct sys_info *sysinfo)
1558 unsigned cpu_f0_f1[8];
1561 print_debug_addr("dqs_timing: tsc1[8] :", tsc1);
1563 for(i = 0; i < controllers; i++) {
1564 if (!sysinfo->ctrl_present[i])
1567 /* Skip everything if I don't have any memory on this controller */
1568 if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
1572 cpu_f0_f1[i] = is_cpu_pre_f2_in_bsp(i);
1574 if(!cpu_f0_f1[i]) continue;
1576 dword = pci_read_config32(ctrl[i].f2, DRAM_CTRL);
1577 dword &= ~DC_DqsRcvEnTrain;
1578 pci_write_config32(ctrl[i].f2, DRAM_CTRL, dword);
1580 dword = pci_read_config32(ctrl[i].f2, DRAM_INIT);
1581 dword |= DI_EnDramInit;
1582 pci_write_config32(ctrl[i].f2, DRAM_INIT, dword);
1583 dword &= ~DI_EnDramInit;
1584 pci_write_config32(ctrl[i].f2, DRAM_INIT, dword);
1587 print_debug_dqs_tsc("begin: tsc1", i, tsc1[i].hi, tsc1[i].lo, 2);
1589 dword = tsc1[i].lo + tsc0[i].lo;
1590 if((dword<tsc1[i].lo) || (dword<tsc0[i].lo)) {
1594 tsc1[i].hi+= tsc0[i].hi;
1596 print_debug_dqs_tsc("end : tsc1", i, tsc1[i].hi, tsc1[i].lo, 2);
1600 for(i = 0; i < controllers; i++) {
1601 if (!sysinfo->ctrl_present[i])
1604 /* Skip everything if I don't have any memory on this controller */
1605 if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
1607 if(!cpu_f0_f1[i]) continue;
1613 } while ((tsc1[i].hi>tsc.hi) || ((tsc1[i].hi==tsc.hi) && (tsc1[i].lo>tsc.lo)));
1615 print_debug_dqs_tsc("end : tsc ", i, tsc.hi, tsc.lo, 2);
1623 /* setting variable mtrr, comes from linux kernel source */
1624 static void set_var_mtrr_dqs(
1625 unsigned int reg, unsigned long basek, unsigned long sizek,
1626 unsigned char type, unsigned address_bits)
1629 unsigned address_mask_high;
1631 address_mask_high = ((1u << (address_bits - 32u)) - 1u);
1633 base.hi = basek >> 22;
1634 base.lo = basek << 10;
1636 if (sizek < 4*1024*1024) {
1637 mask.hi = address_mask_high;
1638 mask.lo = ~((sizek << 10) -1);
1641 mask.hi = address_mask_high & (~((sizek >> 22) -1));
1650 zero.lo = zero.hi = 0;
1651 /* The invalid bit is kept in the mask, so we simply clear the
1652 relevant mask register to disable a range. */
1653 wrmsr (MTRRphysMask_MSR(reg), zero);
1655 /* Bit 32-35 of MTRRphysMask should be set to 1 */
1658 wrmsr (MTRRphysBase_MSR(reg), base);
1659 wrmsr (MTRRphysMask_MSR(reg), mask);
1664 /* fms: find most sigificant bit set, stolen from Linux Kernel Source. */
1665 static inline unsigned int fms(unsigned int x)
1669 __asm__("bsrl %1,%0\n\t"
1672 "1:" : "=r" (r) : "g" (x));
1676 /* fls: find least sigificant bit set */
1677 static inline unsigned int fls(unsigned int x)
1681 __asm__("bsfl %1,%0\n\t"
1684 "1:" : "=r" (r) : "g" (x));
1688 static unsigned int range_to_mtrr(unsigned int reg,
1689 unsigned long range_startk, unsigned long range_sizek,
1690 unsigned long next_range_startk, unsigned char type, unsigned address_bits)
1692 if (!range_sizek || (reg >= 8)) {
1695 while(range_sizek) {
1696 unsigned long max_align, align;
1697 unsigned long sizek;
1698 /* Compute the maximum size I can make a range */
1699 max_align = fls(range_startk);
1700 align = fms(range_sizek);
1701 if (align > max_align) {
1705 #if MEM_TRAIN_SEQ != 1
1706 printk_debug("Setting variable MTRR %d, base: %4dMB, range: %4dMB, type %s\r\n",
1707 reg, range_startk >>10, sizek >> 10,
1708 (type==MTRR_TYPE_UNCACHEABLE)?"UC":
1709 ((type==MTRR_TYPE_WRBACK)?"WB":"Other")
1712 set_var_mtrr_dqs(reg++, range_startk, sizek, type, address_bits);
1713 range_startk += sizek;
1714 range_sizek -= sizek;
1721 static void set_top_mem_ap(unsigned tom_k, unsigned tom2_k)
1725 /* Now set top of memory */
1726 msr.lo = (tom2_k & 0x003fffff) << 10;
1727 msr.hi = (tom2_k & 0xffc00000) >> 22;
1728 wrmsr(TOP_MEM2, msr);
1730 msr.lo = (tom_k & 0x003fffff) << 10;
1731 msr.hi = (tom_k & 0xffc00000) >> 22;
1732 wrmsr(TOP_MEM, msr);
1735 static void setup_mtrr_dqs(unsigned tom_k, unsigned tom2_k){
1740 //still enable from cache_as_ram.inc
1741 msr = rdmsr(SYSCFG_MSR);
1742 msr.lo |= SYSCFG_MSR_MtrrFixDramModEn;
1743 wrmsr(SYSCFG_MSR,msr);
1746 //[0,512k), [512k, 640k)
1747 msr.hi = 0x1e1e1e1e;
1753 reg = range_to_mtrr(2, 0, tom_k,4*1024*1024, MTRR_TYPE_WRBACK, 40);
1757 //enable tom2 and type
1758 msr = rdmsr(SYSCFG_MSR);
1759 msr.lo |= (1<<21) | (1<<22); //MtrrTom2En and Tom2ForceMemTypeWB
1760 wrmsr(SYSCFG_MSR, msr);
1765 static void clear_mtrr_dqs(unsigned tom2_k){
1769 //still enable from cache_as_ram.inc
1770 msr = rdmsr(SYSCFG_MSR);
1771 msr.lo |= SYSCFG_MSR_MtrrFixDramModEn;
1772 wrmsr(SYSCFG_MSR,msr);
1774 //[0,512k), [512k, 640k)
1781 for(i=0x204;i<0x210;i++) {
1787 //enable tom2 and type
1788 msr = rdmsr(SYSCFG_MSR);
1789 msr.lo &= ~((1<<21) | (1<<22)); //MtrrTom2En and Tom2ForceMemTypeWB
1790 wrmsr(SYSCFG_MSR, msr);
1794 static void set_htic_bit(unsigned i, unsigned val, unsigned bit)
1797 dword = pci_read_config32(PCI_DEV(0, 0x18+i, 0), HT_INIT_CONTROL);
1799 dword |= ((val & 1) <<bit);
1800 pci_write_config32(PCI_DEV(0, 0x18+i, 0), HT_INIT_CONTROL, dword);
1804 static unsigned get_htic_bit(unsigned i, unsigned bit)
1807 dword = pci_read_config32(PCI_DEV(0, 0x18+i, 0), HT_INIT_CONTROL);
1812 static void wait_till_sysinfo_in_ram(void)
1815 if(get_htic_bit(0, 9)) return;
1819 static void set_sysinfo_in_ram(unsigned val)
1821 set_htic_bit(0, val, 9);
1825 #if MEM_TRAIN_SEQ == 0
1828 #if K8_REV_F_SUPPORT_F0_F1_WORKAROUND == 1
1829 static void dqs_timing(int controllers, const struct mem_controller *ctrl, tsc_t *tsc0, struct sys_info *sysinfo)
1831 static void dqs_timing(int controllers, const struct mem_controller *ctrl, struct sys_info *sysinfo)
1838 //need to enable mtrr, so dqs training could access the test address
1839 setup_mtrr_dqs(sysinfo->tom_k, sysinfo->tom2_k);
1841 for(i = 0; i < controllers; i++) {
1842 if (!sysinfo->ctrl_present[ i ])
1845 /* Skip everything if I don't have any memory on this controller */
1846 if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
1848 fill_mem_cs_sysinfo(i, ctrl+i, sysinfo);
1852 for(i = 0; i < controllers; i++) {
1853 if (!sysinfo->ctrl_present[ i ])
1856 /* Skip everything if I don't have any memory on this controller */
1857 if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
1859 printk_debug("DQS Training:RcvrEn:Pass1: %02x\n", i);
1860 if(train_DqsRcvrEn(ctrl+i, 1, sysinfo)) goto out;
1861 printk_debug(" done\r\n");
1865 #if K8_REV_F_SUPPORT_F0_F1_WORKAROUND == 1
1866 f0_svm_workaround(controllers, ctrl, tsc0, sysinfo);
1870 for(i = 0; i < controllers; i++) {
1871 if (!sysinfo->ctrl_present[i])
1874 /* Skip everything if I don't have any memory on this controller */
1875 if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
1877 printk_debug("DQS Training:DQSPos: %02x\n", i);
1878 if(train_DqsPos(ctrl+i, sysinfo)) goto out;
1879 printk_debug(" done\r\n");
1883 for(i = 0; i < controllers; i++) {
1884 if (!sysinfo->ctrl_present[i])
1887 /* Skip everything if I don't have any memory on this controller */
1888 if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
1890 printk_debug("DQS Training:RcvrEn:Pass2: %02x\n", i);
1891 if(train_DqsRcvrEn(ctrl+i, 2, sysinfo)) goto out;
1892 printk_debug(" done\r\n");
1893 sysinfo->mem_trained[i]=1;
1898 clear_mtrr_dqs(sysinfo->tom2_k);
1902 print_debug_dqs_tsc_x("DQS Training:tsc", i, tsc[i].hi, tsc[i].lo);
1912 #if MEM_TRAIN_SEQ > 0
1914 static void dqs_timing(int i, const struct mem_controller *ctrl, struct sys_info *sysinfo, unsigned v)
1921 if(sysinfo->mem_trained[i] != 0x80) return;
1923 #if MEM_TRAIN_SEQ == 1
1924 //need to enable mtrr, so dqs training could access the test address
1925 setup_mtrr_dqs(sysinfo->tom_k, sysinfo->tom2_k);
1928 fill_mem_cs_sysinfo(i, ctrl, sysinfo);
1933 printk_debug("set DQS timing:RcvrEn:Pass1: %02x\n", i);
1935 if(train_DqsRcvrEn(ctrl, 1, sysinfo)) {
1936 sysinfo->mem_trained[i]=0x81; //
1941 printk_debug(" done\r\n");
1943 printk_debug("set DQS timing:DQSPos: %02x\n", i);
1946 if(train_DqsPos(ctrl, sysinfo)) {
1947 sysinfo->mem_trained[i]=0x82; //
1952 printk_debug(" done\r\n");
1955 printk_debug("set DQS timing:RcvrEn:Pass2: %02x\n", i);
1957 if(train_DqsRcvrEn(ctrl, 2, sysinfo)){
1958 sysinfo->mem_trained[i]=0x83; //
1963 printk_debug(" done\r\n");
1969 #if MEM_TRAIN_SEQ == 1
1970 clear_mtrr_dqs(sysinfo->tom2_k);
1974 for(ii=0;ii<4;ii++) {
1975 print_debug_dqs_tsc_x("Total DQS Training : tsc ", ii, tsc[ii].hi, tsc[ii].lo);
1979 if(sysinfo->mem_trained[i] == 0x80) {
1980 sysinfo->mem_trained[i]=1;
1986 #if MEM_TRAIN_SEQ == 1
1987 static void train_ram(unsigned nodeid, struct sys_info *sysinfo, struct sys_info *sysinfox)
1989 dqs_timing(nodeid, &sysinfo->ctrl[nodeid], sysinfo, 0); // keep the output tidy
1990 // memcpy(&sysinfox->dqs_rcvr_dly_a[nodeid * 2 * 8],&sysinfo->dqs_rcvr_dly_a[nodeid * 2 * 8], 2*8);
1991 // memcpy(&sysinfox->dqs_delay_a[nodeid * 2 * 2 * 9], &sysinfo->dqs_delay_a[nodeid * 2 * 2 * 9], 2 * 2 * 9);
1992 sysinfox->mem_trained[nodeid] = sysinfo->mem_trained[nodeid];
1995 static void copy_and_run_ap_code_in_car(unsigned ret_addr);
1996 static inline void train_ram_on_node(unsigned nodeid, unsigned coreid, struct sys_info *sysinfo, unsigned retcall)
1998 if(coreid) return; // only do it on core0
1999 struct sys_info *sysinfox = ((CONFIG_LB_MEM_TOPK<<10) - DCACHE_RAM_GLOBAL_VAR_SIZE);
2000 wait_till_sysinfo_in_ram(); // use pci to get it
2002 if(sysinfox->mem_trained[nodeid] == 0x80) {
2004 sysinfo->tom_k = sysinfox->tom_k;
2005 sysinfo->tom2_k = sysinfox->tom2_k;
2006 sysinfo->meminfo[nodeid].is_Width128 = sysinfox->meminfo[nodeid].is_Width128;
2007 sysinfo->mem_trained[nodeid] = sysinfox->mem_trained[nodeid];
2008 memcpy(&sysinfo->ctrl[nodeid], &sysinfox->ctrl[nodeid], sizeof(struct mem_controller));
2010 memcpy(sysinfo, sysinfox, DCACHE_RAM_GLOBAL_VAR_SIZE);
2012 set_top_mem_ap(sysinfo->tom_k, sysinfo->tom2_k); // keep the ap's tom consistent with bsp's
2013 #if CONFIG_AP_CODE_IN_CAR == 0
2014 printk_debug("CODE IN ROM AND RUN ON NODE: %02x\n", nodeid);
2015 train_ram(nodeid, sysinfo, sysinfox);
2017 /* Can copy dqs_timing to ap cache and run from cache?
2018 * we need coreboot_ap_car.rom? and treat it as coreboot_ram.rom for ap ?
2020 copy_and_run_ap_code_in_car(retcall);
2021 // will go back by jump