2 * This file is part of the coreboot project.
4 * Copyright (C) 2005 YingHai Lu
5 * Copyright (C) 2008 Advanced Micro Devices, Inc.
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; version 2 of the License.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 //0: mean no debug info
22 #define DQS_TRAIN_DEBUG 0
24 static inline void print_debug_dqs(const char *str, unsigned val, unsigned level)
26 #if DQS_TRAIN_DEBUG > 0
27 if(DQS_TRAIN_DEBUG > level) {
28 #if CONFIG_USE_PRINTK_IN_CAR
29 printk_debug("%s%x\r\n", str, val);
31 print_debug(str); print_debug_hex32(val); print_debug("\r\n");
37 static inline void print_debug_dqs_pair(const char *str, unsigned val, const char *str2, unsigned val2, unsigned level)
39 #if DQS_TRAIN_DEBUG > 0
40 if(DQS_TRAIN_DEBUG > level) {
41 #if CONFIG_USE_PRINTK_IN_CAR
42 printk_debug("%s%08x%s%08x\r\n", str, val, str2, val2);
44 print_debug(str); print_debug_hex32(val); print_debug(str2); print_debug_hex32(val2); print_debug("\r\n");
50 static inline void print_debug_dqs_tsc(const char *str, unsigned i, unsigned val, unsigned val2, unsigned level)
52 #if DQS_TRAIN_DEBUG > 0
53 if(DQS_TRAIN_DEBUG > level) {
54 #if CONFIG_USE_PRINTK_IN_CAR
55 printk_debug("%s[%02x]=%08x%08x\r\n", str, i, val, val2);
57 print_debug(str); print_debug("["); print_debug_hex8(i); print_debug("]="); print_debug_hex32(val); print_debug_hex32(val2); print_debug("\r\n");
63 static inline void print_debug_dqs_tsc_x(const char *str, unsigned i, unsigned val, unsigned val2)
65 #if CONFIG_USE_PRINTK_IN_CAR
66 printk_debug("%s[%02x]=%08x%08x\r\n", str, i, val, val2);
68 print_debug(str); print_debug("["); print_debug_hex8(i); print_debug("]="); print_debug_hex32(val); print_debug_hex32(val2); print_debug("\r\n");
73 static void fill_mem_cs_sysinfo(unsigned nodeid, const struct mem_controller *ctrl, struct sys_info *sysinfo)
77 sysinfo->mem_base[nodeid] = pci_read_config32(ctrl->f1, 0x40 + (nodeid<<3));
80 sysinfo->cs_base[nodeid*8+i] = pci_read_config32(ctrl->f2, 0x40 + (i<<2));
83 sysinfo->hole_reg[nodeid] = pci_read_config32(ctrl->f1, 0xf0);
86 static unsigned Get_MCTSysAddr(const struct mem_controller *ctrl, unsigned cs_idx, struct sys_info *sysinfo)
90 unsigned nodeid = ctrl->node_id;
92 #if HW_MEM_HOLE_SIZEK != 0
96 //get the local base addr of the chipselect
97 dword = sysinfo->cs_base[nodeid * 8 + cs_idx];
100 //sys addr= node base + local cs base
101 mem_base = sysinfo->mem_base[nodeid];
102 mem_base &= 0xffff0000;
105 #if HW_MEM_HOLE_SIZEK != 0
106 hole_reg = sysinfo->hole_reg[nodeid];
108 unsigned hole_startk;
109 hole_startk = (hole_reg & (0xff<<24)) >> 10;
110 if( (dword >= (hole_startk<<2)) && (dword < ((4*1024*1024)<<2))) {
111 dword += ((4*1024*1024 - hole_startk)<<2);
116 //add 1MB offset to avoid compat area
117 dword += (1<<(20-8));
119 //So final result is upper 32 bit addr
125 static unsigned Get_RcvrSysAddr(const struct mem_controller * ctrl, unsigned channel, unsigned cs_idx, struct sys_info *sysinfo)
127 return Get_MCTSysAddr(ctrl, cs_idx, sysinfo);
131 static inline unsigned long read_cr4(void)
134 asm volatile ("movl %%cr4, %0" : "=r" (cr4));
138 static inline void write_cr4(unsigned long cr4)
140 asm volatile ("movl %0, %%cr4" : : "r" (cr4));
144 static inline void enable_sse2()
152 static inline void disable_sse2()
161 static void set_wrap32dis(void) {
164 msr = rdmsr(0xc0010015);
167 wrmsr(0xc0010015, msr);
171 static void clear_wrap32dis(void) {
174 msr = rdmsr(0xc0010015);
177 wrmsr(0xc0010015, msr);
181 static void set_FSBASE(uint32_t addr_hi)
185 //set fs and use fs prefix to access the mem
188 wrmsr(0xc0000100, msr); //FS_BASE
192 static unsigned ChipSelPresent(const struct mem_controller *ctrl, unsigned cs_idx, struct sys_info *sysinfo)
195 unsigned nodeid = ctrl->node_id;
198 enabled = sysinfo->cs_base[nodeid * 8 + cs_idx];
205 static unsigned RcvrRankEnabled(const struct mem_controller *ctrl, int channel, int cs_idx, unsigned is_Width128, struct sys_info *sysinfo)
207 return ChipSelPresent(ctrl, cs_idx, sysinfo);
210 static void WriteLNTestPattern(unsigned addr_lo, uint8_t *buf_a, unsigned line_num)
214 "movdqa (%3), %%xmm0\n\t"
215 "movntdq %%xmm0, %%fs:(%0)\n\t" /* xmm0 is 128 bit */
220 :: "a" (addr_lo), "d" (16), "c" (line_num * 4), "b"(buf_a)
226 static void Write1LTestPattern(unsigned addr, unsigned p, uint8_t *buf_a, uint8_t *buf_b)
229 if(p==1) { buf = buf_b; }
230 else { buf = buf_a; }
232 set_FSBASE (addr>>24);
234 WriteLNTestPattern(addr<<8, buf, 1);
237 static void Read1LTestPattern(unsigned addr)
241 set_FSBASE(addr>>24);
243 /* 1st move causes read fill (to exclusive or shared)*/
245 "movl %%fs:(%1), %0\n\t"
246 :"=b"(value): "a" (addr<<8)
254 #define DQS_FIRST_PASS 1
255 #define DQS_SECOND_PASS 2
257 #define SB_NORCVREN 11
258 #define RCVREN_MARGIN 6
259 #define SB_SmallRCVR 13
260 #define SB_CHA2BRCVREN 12
261 #define SB_NODQSPOS 14
262 #define MIN_DQS_WNDW 3
263 #define SB_SMALLDQS 15
266 static unsigned CompareTestPatternQW0(unsigned channel, unsigned addr, unsigned pattern, const uint32_t *TestPattern0, const uint32_t *TestPattern1, const uint32_t *TestPattern2, unsigned Pass, unsigned is_Width128)
272 unsigned result = DQS_FAIL;
274 if(Pass == DQS_FIRST_PASS) {
276 test_buf = (uint32_t *)TestPattern1;
279 test_buf = (uint32_t *)TestPattern0;
283 test_buf = (uint32_t *)TestPattern2;
286 set_FSBASE(addr>>24);
290 if(is_Width128 && (channel == 1)) {
291 addr_lo += 8; //second channel
296 "movl %%fs:(%1), %0\n\t"
297 :"=b"(value): "a" (addr_lo)
300 value_test = *test_buf;
303 print_debug_dqs_pair("\t\t\t\t\t\tQW0.lo : test_buf= ", (unsigned)test_buf, " value = ", value_test, 4);
304 print_debug_dqs_pair("\t\t\t\t\t\tQW0.lo : addr_lo = ", addr_lo, " value = ", value, 4);
306 if(value == value_test) {
310 "movl %%fs:(%1), %0\n\t"
311 :"=b"(value): "a" (addr_lo)
313 value_test = *test_buf;
314 print_debug_dqs_pair("\t\t\t\t\t\tQW0.hi : test_buf= ", (unsigned)test_buf, " value = ", value_test, 4);
315 print_debug_dqs_pair("\t\t\t\t\t\tQW0.hi : addr_lo = ", addr_lo, " value = ", value, 4);
317 if(value == value_test){
322 if(Pass == DQS_SECOND_PASS) { // second pass need to be inverted
323 if(result==DQS_PASS) {
335 static void SetMaxAL_RcvrDly(const struct mem_controller *ctrl, unsigned dly)
339 dly += (20-1); // round it
340 dly /= 20; // convert from unit 50ps to 1ns
345 reg = pci_read_config32(ctrl->f2, DRAM_CONFIG_HIGH);
346 reg &= ~(DCH_MaxAsyncLat_MASK <<DCH_MaxAsyncLat_SHIFT);
347 reg |= ((dly - DCH_MaxAsyncLat_BASE) << DCH_MaxAsyncLat_SHIFT);
348 pci_write_config32(ctrl->f2, DRAM_CONFIG_HIGH, reg);
353 Set the Target range to WT IO (using an IORR overlapping the already existing
354 WB dram type). Use IORR0
356 static void SetTargetWTIO(unsigned addr)
361 wrmsr(0xc0010016, msr); //IORR0 BASE
364 msr.lo = 0xfc000800; // 64MB Mask
365 wrmsr(0xc0010017, msr); // IORR0 Mask
368 static void ResetTargetWTIO(void)
374 wrmsr(0xc0010017, msr); // IORR0 Mask
377 static void proc_CLFLUSH(unsigned addr)
380 set_FSBASE(addr>>24);
382 /* 1st move causes read fill (to exclusive or shared)*/
384 /* clflush fs:[eax] */
385 "clflush %%fs:(%0)\n\t"
390 static void proc_IOCLFLUSH(unsigned addr)
397 static void ResetDCTWrPtr(const struct mem_controller *ctrl)
400 unsigned index = 0x10;
402 dword = pci_read_config32_index_wait(ctrl->f2, 0x98, index);
403 pci_write_config32_index_wait(ctrl->f2, 0x98, index, dword);
406 dword = pci_read_config32_index_wait(ctrl->f2, 0x98, index);
407 pci_write_config32_index_wait(ctrl->f2, 0x98, index, dword);
412 static uint16_t get_exact_T1000(unsigned i)
415 static const uint16_t T1000_a[]= { 5000, 3759, 3003, 2500 };
417 static const uint16_t TT_a[] = {
419 /*4 */ 6250, 6250, 6250, 6250,
420 /*5 */ 5000, 5000, 5000, 2500,
421 /*6 */ 5000, 4166, 4166, 2500,
422 /*7 */ 5000, 4285, 3571, 2500,
424 /*8 */ 5000, 3750, 3125, 2500,
425 /*9 */ 5000, 3888, 3333, 2500,
426 /*10*/ 5000, 4000, 3000, 2500,
427 /*11*/ 5000, 4090, 3181, 2500,
429 /*12*/ 5000, 3750, 3333, 2500,
430 /*13*/ 5000, 3846, 3076, 2500,
431 /*14*/ 5000, 3928, 3214, 2500,
432 /*15*/ 5000, 4000, 3000, 2500,
439 msr = rdmsr(0xc0010042);
440 fid_cur = msr.lo & 0x3f;
444 if(index>12) return T1000_a[i];
446 return TT_a[index * 4+i];
450 static void InitDQSPos4RcvrEn(const struct mem_controller *ctrl)
456 for(i=1; i<=3; i++) {
457 /* Program the DQS Write Timing Control Registers (Function 2:Offset 0x9c, index 0x01-0x03, 0x21-0x23) to 0x00 for all bytes */
458 pci_write_config32_index_wait(ctrl->f2, 0x98, i, dword);
459 pci_write_config32_index_wait(ctrl->f2, 0x98, i+0x20, dword);
463 for(i=5; i<=7; i++) {
464 /* Program the DQS Write Timing Control Registers (Function 2:Offset 0x9c, index 0x05-0x07, 0x25-0x27) to 0x2f for all bytes */
465 pci_write_config32_index_wait(ctrl->f2, 0x98, i, dword);
466 pci_write_config32_index_wait(ctrl->f2, 0x98, i+0x20, dword);
471 #ifndef K8_REV_F_SUPPORT_F0_F1_WORKAROUND
472 #define K8_REV_F_SUPPORT_F0_F1_WORKAROUND 1
475 static unsigned TrainRcvrEn(const struct mem_controller *ctrl, unsigned Pass, struct sys_info *sysinfo)
478 static const uint32_t TestPattern0[] = {
479 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
480 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
481 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
482 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
484 static const uint32_t TestPattern1[] = {
485 0x55555555, 0x55555555, 0x55555555, 0x55555555,
486 0x55555555, 0x55555555, 0x55555555, 0x55555555,
487 0x55555555, 0x55555555, 0x55555555, 0x55555555,
488 0x55555555, 0x55555555, 0x55555555, 0x55555555,
490 static const uint32_t TestPattern2[] = {
491 0x12345678, 0x87654321, 0x23456789, 0x98765432,
492 0x59385824, 0x30496724, 0x24490795, 0x99938733,
493 0x40385642, 0x38465245, 0x29432163, 0x05067894,
494 0x12349045, 0x98723467, 0x12387634, 0x34587623,
497 uint8_t pattern_buf_x[64 * 4 + 16]; // We need to two cache line So have more 16 bytes to keep 16 byte alignment */
498 uint8_t *buf_a, *buf_b;
501 uint8_t *dqs_rcvr_dly_a = &sysinfo->dqs_rcvr_dly_a[ctrl->node_id * 2* 8] ; //8 node, channel 2, receiver 8
505 unsigned channel, receiver;
508 unsigned CTLRMaxDelay;
513 unsigned Test0, Test1;
515 unsigned RcvrEnDlyRmin;
523 unsigned TestAddr0, TestAddr0B, TestAddr1, TestAddr1B = 0;
525 unsigned CurrRcvrCHADelay = 0;
529 unsigned is_Width128 = sysinfo->meminfo[ctrl->node_id].is_Width128;
533 if(Pass == DQS_FIRST_PASS) {
534 InitDQSPos4RcvrEn(ctrl);
544 dword = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
545 ecc_bit = dword & DCL_DimmEccEn;
546 dword &= ~(DCL_DimmEccEn);
547 pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dword);
550 if(Pass == DQS_FIRST_PASS) {
551 #if K8_REV_F_SUPPORT_F0_F1_WORKAROUND == 1
552 cpu_f0_f1 = is_cpu_pre_f2_in_bsp(ctrl->node_id);
557 /* Set the DqsRcvEnTrain bit */
558 dword = pci_read_config32(ctrl->f2, DRAM_CTRL);
559 dword |= DC_DqsRcvEnTrain;
560 pci_write_config32(ctrl->f2, DRAM_CTRL, dword);
565 //get T1000 figures (cycle time (ns)) * 1K
566 dword = pci_read_config32(ctrl->f2, DRAM_CONFIG_HIGH);
567 dword &= DCH_MemClkFreq_MASK;
569 T1000 = get_exact_T1000(dword);
572 buf_a = (uint8_t *)(((uint32_t)(&pattern_buf_x[0]) + 0x10) & (0xfffffff0));
573 buf_b = buf_a + 128; //??
574 if(Pass==DQS_FIRST_PASS) {
576 *((uint32_t *)(buf_a + i*4)) = TestPattern0[i];
577 *((uint32_t *)(buf_b + i*4)) = TestPattern1[i];
582 *((uint32_t *)(buf_a + i*4)) = TestPattern2[i];
583 *((uint32_t *)(buf_b + i*4)) = TestPattern2[i];
587 print_debug_dqs("\r\nTrainRcvEn: 0 ctrl", ctrl->node_id, 0);
589 print_debug_addr("TrainRcvEn: buf_a:", buf_a);
592 /* for each channel */
596 if (!(sysinfo->meminfo[ctrl->node_id].dimm_mask & 0x0F) &&
597 (sysinfo->meminfo[ctrl->node_id].dimm_mask & 0xF0)) { /* channelB only? */
601 for ( ; (channel < 2) && (!Errors); channel++)
603 print_debug_dqs("\tTrainRcvEn51: channel ",channel, 1);
606 /* there are four recriver pairs, loosely associated with CS */
607 for( receiver = 0; (receiver < 8) && (!Errors); receiver+=2)
610 unsigned index=(receiver>>1) * 3 + 0x10;
612 print_debug_dqs("\t\tTrainRcvEn52: index ", index, 2);
616 dword = pci_read_config32_index_wait(ctrl->f2, 0x98, index);
617 CurrRcvrCHADelay= dword & 0xff;
627 RcvrEnDlyRmin = 0xaf;
629 if(!RcvrRankEnabled(ctrl, channel, receiver, is_Width128, sysinfo)) continue;
631 /* for each DQS receiver enable setting */
633 TestAddr0 = Get_RcvrSysAddr(ctrl, channel, receiver, sysinfo);
635 TestAddr0B = TestAddr0 + (1<<(20+2-8)); // 4MB
637 if(RcvrRankEnabled(ctrl, channel, receiver+1, is_Width128, sysinfo)) {
638 TestAddr1 = Get_RcvrSysAddr(ctrl, channel, receiver+1, sysinfo);
639 TestAddr1B = TestAddr1 + (1<<(20+2-8)); //4MB
646 print_debug_dqs("\t\tTrainRcvEn53: TestAddr0B ", TestAddr0B, 2);
648 Write1LTestPattern(TestAddr0, 0, buf_a, buf_b); // rank0 of dimm, test p0
649 Write1LTestPattern(TestAddr0B, 1, buf_a, buf_b); //rank0 of dimm, test p1
652 Write1LTestPattern(TestAddr1, 0, buf_a, buf_b); //rank 1 of dimm
653 Write1LTestPattern(TestAddr1B, 1, buf_a, buf_b);//rank 1 of dimm
656 if(Pass == DQS_FIRST_PASS) {
659 RcvrEnDly = dqs_rcvr_dly_a[channel * 8 + receiver];
662 while ( RcvrEnDly < 0xaf) { // Sweep Delay value here
663 print_debug_dqs("\t\t\tTrainRcvEn541: RcvrEnDly ", RcvrEnDly, 3);
666 /* Odd steps get another pattern such that even
667 and odd steps alternate.
668 The pointers to the patterns will be swapped
669 at the end of the loop so they are correspond
680 /* Program current Receiver enable delay */
681 pci_write_config32_index_wait(ctrl->f2, 0x98, index, RcvrEnDly);
682 /* FIXME: 64bit MUX */
685 /* Program current Receiver enable delay chaannel b */
686 pci_write_config32_index_wait(ctrl->f2, 0x98, index+ 0x20, RcvrEnDly);
689 /* Program the MaxAsyncLat filed with the
690 current DQS receiver enable setting plus 6ns
692 /*Porgram MaxAsyncLat to correspond with current delay */
693 SetMaxAL_RcvrDly(ctrl, RcvrEnDly);
697 Read1LTestPattern(TestAddr0); //Cache Fill
698 /* ROM vs cache compare */
699 Test0 = CompareTestPatternQW0(channel, TestAddr0, PatternA, TestPattern0, TestPattern1, TestPattern2, Pass, is_Width128);
700 proc_IOCLFLUSH(TestAddr0);
704 print_debug_dqs("\t\t\tTrainRcvEn542: Test0 ", Test0, 3);
706 if(Test0 == DQS_PASS) {
708 Read1LTestPattern(TestAddr0B);
709 Test1 = CompareTestPatternQW0(channel, TestAddr0B, PatternB, TestPattern0, TestPattern1, TestPattern2, Pass, is_Width128);
710 proc_IOCLFLUSH(TestAddr0B);
714 print_debug_dqs("\t\t\tTrainRcvEn543: Test1 ", Test1, 3);
716 if(Test1 == DQS_PASS) {
718 Read1LTestPattern(TestAddr1);
719 Test0 = CompareTestPatternQW0(channel, TestAddr1, PatternA, TestPattern0, TestPattern1, TestPattern2, Pass, is_Width128);
720 proc_IOCLFLUSH(TestAddr1);
723 if(Test0 == DQS_PASS) {
724 Read1LTestPattern(TestAddr1B);
725 Test1 = CompareTestPatternQW0(channel, TestAddr1B, PatternB, TestPattern0, TestPattern1, TestPattern2, Pass, is_Width128);
726 proc_IOCLFLUSH(TestAddr1B);
729 if(Test1 == DQS_PASS) {
733 print_debug_dqs("\t\t\tTrainRcvEn544: Test0 ", Test0, 3);
741 print_debug_dqs("\t\t\tTrainRcvEn55: RcvrEnDly ", RcvrEnDly, 3);
743 if(CurrTest == DQS_PASS) {
744 if(LastTest == DQS_FAIL) {
745 RcvrEnDlyRmin = RcvrEnDly;
752 /* swap the rank 0 pointers */
754 TestAddr0 = TestAddr0B;
757 /* swap the rank 1 pointers */
759 TestAddr1 = TestAddr1B;
762 print_debug_dqs("\t\t\tTrainRcvEn56: RcvrEnDly ", RcvrEnDly, 3);
768 print_debug_dqs("\t\tTrainRcvEn61: RcvrEnDly ", RcvrEnDly, 2);
770 if(RcvrEnDlyRmin == 0xaf) {
772 Errors |= SB_NORCVREN;
775 if(Pass == DQS_FIRST_PASS) {
776 // We need a better value for DQSPos trainning
777 RcvrEnDly = RcvrEnDlyRmin /* + RCVREN_MARGIN * T1000/64/50 */;
779 RcvrEnDly = RcvrEnDlyRmin;
782 if(RcvrEnDly > 0xae) {
783 //passing window too narrow, too far delayed
784 Errors |= SB_SmallRCVR;
788 if(Pass == DQS_SECOND_PASS) { //second pass must average vales
789 RcvrEnDly += dqs_rcvr_dly_a[channel * 8 + receiver] /* - (RCVREN_MARGIN * T1000/64/50)*/;
793 dqs_rcvr_dly_a[channel * 8 + receiver] = RcvrEnDly;
795 //Set final RcvrEnDly for this DIMM and Channel
796 pci_write_config32_index_wait(ctrl->f2, 0x98, index, RcvrEnDly);
799 pci_write_config32_index_wait(ctrl->f2, 0x98, index+0x20, RcvrEnDly); // channel B
801 pci_write_config32_index_wait(ctrl->f2, 0x98, index, CurrRcvrCHADelay);
802 if(RcvrEnDly > CurrRcvrCHADelay) {
803 dword = RcvrEnDly - CurrRcvrCHADelay;
806 dword = CurrRcvrCHADelay - RcvrEnDly;
810 Errors |= SB_CHA2BRCVREN;
815 print_debug_dqs("\t\tTrainRcvEn63: RcvrEnDly ", RcvrEnDly, 2);
817 if(RcvrEnDly > CTLRMaxDelay) {
818 CTLRMaxDelay = RcvrEnDly;
821 print_debug_dqs("\t\tTrainRcvEn64: CTLRMaxDelay ", CTLRMaxDelay, 2);
826 print_debug_dqs("\tTrainRcvEn65: CTLRMaxDelay ", CTLRMaxDelay, 1);
828 /* Program the MaxAsysncLat field with the largest DQS Receiver Enable setting */
829 SetMaxAL_RcvrDly(ctrl, CTLRMaxDelay);
833 dword = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
834 dword &= ~(DCL_DimmEccEn);
836 pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dword);
838 if(Pass == DQS_FIRST_PASS) {
839 #if K8_REV_F_SUPPORT_F0_F1_WORKAROUND == 1
843 dword = pci_read_config32(ctrl->f2, DRAM_CTRL);
844 dword &= ~DC_DqsRcvEnTrain;
845 pci_write_config32(ctrl->f2, DRAM_CTRL, dword);
853 //restore SSE2 setting
856 #if MEM_TRAIN_SEQ != 1
857 /* We need tidy output for type 1 */
858 #if CONFIG_USE_PRINTK_IN_CAR
859 printk_debug(" CTLRMaxDelay=%02x", CTLRMaxDelay);
861 print_debug(" CTLRMaxDelay="); print_debug_hex8(CTLRMaxDelay);
865 return (CTLRMaxDelay==0xae)?1:0;
869 #define DQS_READDIR 1
870 #define DQS_WRITEDIR 0
873 static void SetDQSDelayCSR(const struct mem_controller *ctrl, unsigned channel, unsigned bytelane, unsigned direction, unsigned dqs_delay)
874 { //ByteLane could be 0-8, last is for ECC
881 index = (bytelane>>2) + 1 + channel * 0x20 + (direction << 2);
886 shift <<= 3; // 8 bit
888 dword = pci_read_config32_index_wait(ctrl->f2, 0x98, index);
889 dword &= ~(0x3f<<shift);
890 dword |= (dqs_delay<<shift);
891 pci_write_config32_index_wait(ctrl->f2, 0x98, index, dword);
895 static void SetDQSDelayAllCSR(const struct mem_controller *ctrl, unsigned channel, unsigned direction, unsigned dqs_delay)
904 dword |= dqs_delay<<(i*8);
907 index = 1 + channel * 0x20 + direction * 4;
910 pci_write_config32_index_wait(ctrl->f2, 0x98, index + i, dword);
915 static unsigned MiddleDQS(unsigned min_d, unsigned max_d)
918 size_d = max_d-min_d;
919 if(size_d & 1) { //need round up
922 return ( min_d + (size_d>>1));
925 static inline void save_dqs_delay(unsigned channel, unsigned bytelane, unsigned direction, uint8_t *dqs_delay_a, uint8_t dqs_delay)
927 dqs_delay_a[channel * 2*9 + direction * 9 + bytelane] = dqs_delay;
930 static void WriteDQSTestPattern(unsigned addr_lo, unsigned pattern , uint8_t *buf_a)
932 WriteLNTestPattern(addr_lo, buf_a, (pattern+1) * 9);
935 static void ReadL18TestPattern(unsigned addr_lo)
937 //set fs and use fs prefix to access the mem
939 "movl %%fs:-128(%%esi), %%eax\n\t" //TestAddr cache line
940 "movl %%fs:-64(%%esi), %%eax\n\t" //+1
941 "movl %%fs:(%%esi), %%eax\n\t" //+2
942 "movl %%fs:64(%%esi), %%eax\n\t" //+3
944 "movl %%fs:-128(%%edi), %%eax\n\t" //+4
945 "movl %%fs:-64(%%edi), %%eax\n\t" //+5
946 "movl %%fs:(%%edi), %%eax\n\t" //+6
947 "movl %%fs:64(%%edi), %%eax\n\t" //+7
949 "movl %%fs:-128(%%ebx), %%eax\n\t" //+8
950 "movl %%fs:-64(%%ebx), %%eax\n\t" //+9
951 "movl %%fs:(%%ebx), %%eax\n\t" //+10
952 "movl %%fs:64(%%ebx), %%eax\n\t" //+11
954 "movl %%fs:-128(%%ecx), %%eax\n\t" //+12
955 "movl %%fs:-64(%%ecx), %%eax\n\t" //+13
956 "movl %%fs:(%%ecx), %%eax\n\t" //+14
957 "movl %%fs:64(%%ecx), %%eax\n\t" //+15
959 "movl %%fs:-128(%%edx), %%eax\n\t" //+16
960 "movl %%fs:-64(%%edx), %%eax\n\t" //+17
962 :: "a"(0), "b" (addr_lo+128+8*64), "c" (addr_lo+128+12*64), "d" (addr_lo +128+16*64), "S"(addr_lo+128), "D"(addr_lo+128+4*64)
967 static void ReadL9TestPattern(unsigned addr_lo)
970 //set fs and use fs prefix to access the mem
973 "movl %%fs:-128(%%ecx), %%eax\n\t" //TestAddr cache line
974 "movl %%fs:-64(%%ecx), %%eax\n\t" //+1
975 "movl %%fs:(%%ecx), %%eax\n\t" //+2
976 "movl %%fs:64(%%ecx), %%eax\n\t" //+3
978 "movl %%fs:-128(%%edx), %%eax\n\t" //+4
979 "movl %%fs:-64(%%edx), %%eax\n\t" //+5
980 "movl %%fs:(%%edx), %%eax\n\t" //+6
981 "movl %%fs:64(%%edx), %%eax\n\t" //+7
983 "movl %%fs:-128(%%ebx), %%eax\n\t" //+8
985 :: "a"(0), "b" (addr_lo+128+8*64), "c"(addr_lo+128), "d"(addr_lo+128+4*64)
991 static void ReadDQSTestPattern(unsigned addr_lo, unsigned pattern)
994 ReadL9TestPattern(addr_lo);
997 ReadL18TestPattern(addr_lo);
1001 static void FlushDQSTestPattern_L9(unsigned addr_lo)
1004 "clflush %%fs:-128(%%ecx)\n\t"
1005 "clflush %%fs:-64(%%ecx)\n\t"
1006 "clflush %%fs:(%%ecx)\n\t"
1007 "clflush %%fs:64(%%ecx)\n\t"
1009 "clflush %%fs:-128(%%eax)\n\t"
1010 "clflush %%fs:-64(%%eax)\n\t"
1011 "clflush %%fs:(%%eax)\n\t"
1012 "clflush %%fs:64(%%eax)\n\t"
1014 "clflush %%fs:-128(%%ebx)\n\t"
1016 :: "b" (addr_lo+128+8*64), "c"(addr_lo+128), "a"(addr_lo+128+4*64)
1020 static __attribute__((noinline)) void FlushDQSTestPattern_L18(unsigned addr_lo)
1023 "clflush %%fs:-128(%%eax)\n\t"
1024 "clflush %%fs:-64(%%eax)\n\t"
1025 "clflush %%fs:(%%eax)\n\t"
1026 "clflush %%fs:64(%%eax)\n\t"
1028 "clflush %%fs:-128(%%edi)\n\t"
1029 "clflush %%fs:-64(%%edi)\n\t"
1030 "clflush %%fs:(%%edi)\n\t"
1031 "clflush %%fs:64(%%edi)\n\t"
1033 "clflush %%fs:-128(%%ebx)\n\t"
1034 "clflush %%fs:-64(%%ebx)\n\t"
1035 "clflush %%fs:(%%ebx)\n\t"
1036 "clflush %%fs:64(%%ebx)\n\t"
1038 "clflush %%fs:-128(%%ecx)\n\t"
1039 "clflush %%fs:-64(%%ecx)\n\t"
1040 "clflush %%fs:(%%ecx)\n\t"
1041 "clflush %%fs:64(%%ecx)\n\t"
1043 "clflush %%fs:-128(%%edx)\n\t"
1044 "clflush %%fs:-64(%%edx)\n\t"
1046 :: "b" (addr_lo+128+8*64), "c" (addr_lo+128+12*64), "d" (addr_lo +128+16*64), "a"(addr_lo+128), "D"(addr_lo+128+4*64)
1050 static void FlushDQSTestPattern(unsigned addr_lo, unsigned pattern )
1054 FlushDQSTestPattern_L9(addr_lo);
1057 FlushDQSTestPattern_L18(addr_lo);
1061 static unsigned CompareDQSTestPattern(unsigned channel, unsigned addr_lo, unsigned pattern, uint8_t *buf_a)
1064 unsigned bitmap = 0xff;
1069 uint32_t value_test;
1071 test_buf = (uint32_t *)buf_a;
1074 if(pattern && channel) {
1075 addr_lo += 8; //second channel
1080 for(i=0;i<9*64/4;i++) {
1082 "movl %%fs:(%1), %0\n\t"
1083 :"=b"(value): "a" (addr_lo)
1085 value_test = *test_buf;
1087 print_debug_dqs_pair("\t\t\t\t\t\ttest_buf= ", (unsigned)test_buf, " value = ", value_test, 7);
1088 print_debug_dqs_pair("\t\t\t\t\t\ttaddr_lo = ",addr_lo, " value = ", value, 7);
1090 for(j=0;j<4*8;j+=8) {
1091 if(((value>>j)&0xff) != ((value_test>>j)& 0xff)) {
1092 bitmap &= ~(1<<bytelane);
1098 print_debug_dqs("\t\t\t\t\t\tbitmap = ", bitmap, 7);
1101 if(pattern == 1) { //dual channel
1102 addr_lo += 8; //skip over other channel's data
1116 static unsigned TrainDQSPos(const struct mem_controller *ctrl, unsigned channel, unsigned Direction, unsigned Pattern, uint8_t *buf_a, uint8_t *dqs_delay_a, struct sys_info *sysinfo)
1120 unsigned BanksPresent;
1122 unsigned MutualCSPassW[48];
1130 unsigned RnkDlyFilterMax, RnkDlyFilterMin = 0;
1131 unsigned RnkDlySeqPassMax, RnkDlySeqPassMin = 0;
1136 print_debug_dqs("\t\t\tTrainDQSPos begin ", 0, 3);
1138 print_debug_addr("TrainDQSPos: MutualCSPassW[48] :", MutualCSPassW);
1140 for(DQSDelay=0; DQSDelay<48; DQSDelay++) {
1141 MutualCSPassW[DQSDelay] = 0xff; // Bitmapped status per delay setting, 0xff=All positions passing (1= PASS)
1144 for(ChipSel = 0; ChipSel < 8; ChipSel++) { //logical register chipselects 0..7
1145 print_debug_dqs("\t\t\t\tTrainDQSPos: 11 ChipSel ", ChipSel, 4);
1146 //FIXME: process 64MUXedMode
1147 if(!ChipSelPresent(ctrl, ChipSel, sysinfo)) continue;
1150 TestAddr = Get_MCTSysAddr(ctrl, ChipSel, sysinfo);
1152 print_debug_dqs("\t\t\t\tTrainDQSPos: 12 TestAddr ", TestAddr, 4);
1154 //set fs and use fs prefix to access the mem
1155 set_FSBASE(TestAddr>>24);
1157 if(Direction == DQS_READDIR) {
1158 print_debug_dqs("\t\t\t\tTrainDQSPos: 13 for read so write at first", 0, 4);
1159 WriteDQSTestPattern(TestAddr<<8, Pattern, buf_a);
1162 for(DQSDelay = 0; DQSDelay < 48; DQSDelay++ ){
1163 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 141 DQSDelay ", DQSDelay, 5);
1164 if(MutualCSPassW[DQSDelay] == 0) continue; //skip current delay value if other chipselects have failed all 8 bytelanes
1165 SetDQSDelayAllCSR(ctrl, channel, Direction, DQSDelay);
1166 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 142 MutualCSPassW ", MutualCSPassW[DQSDelay], 5);
1167 if(Direction == DQS_WRITEDIR) {
1168 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 143 for write", 0, 5);
1169 WriteDQSTestPattern(TestAddr<<8, Pattern, buf_a);
1171 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 144 Pattern ", Pattern, 5);
1172 ReadDQSTestPattern(TestAddr<<8, Pattern);
1173 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 145 MutualCSPassW ", MutualCSPassW[DQSDelay], 5);
1174 MutualCSPassW[DQSDelay] &= CompareDQSTestPattern(channel, TestAddr<<8, Pattern, buf_a); //0: fail, 1=pass
1175 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 146 MutualCSPassW ", MutualCSPassW[DQSDelay], 5);
1176 SetTargetWTIO(TestAddr);
1177 FlushDQSTestPattern(TestAddr<<8, Pattern);
1183 for(ByteLane = 0; ByteLane < 8; ByteLane++) {
1184 print_debug_dqs("\t\t\t\tTrainDQSPos: 31 ByteLane ",ByteLane, 4);
1186 LastTest = DQS_FAIL;
1187 RnkDlySeqPassMax = 0;
1188 RnkDlyFilterMax = 0;
1189 RnkDlyFilterMin = 0;
1190 for(DQSDelay=0; DQSDelay<48; DQSDelay++) {
1191 if(MutualCSPassW[DQSDelay] & (1<<ByteLane)) {
1193 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 321 DQSDelay ", DQSDelay, 5);
1194 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 322 MutualCSPassW ", MutualCSPassW[DQSDelay], 5);
1196 RnkDlySeqPassMax = DQSDelay;
1197 if(LastTest == DQS_FAIL) {
1198 RnkDlySeqPassMin = DQSDelay; //start sequential run
1200 if((RnkDlySeqPassMax - RnkDlySeqPassMin)>(RnkDlyFilterMax-RnkDlyFilterMin)){
1201 RnkDlyFilterMin = RnkDlySeqPassMin;
1202 RnkDlyFilterMax = RnkDlySeqPassMax;
1204 LastTest = DQS_PASS;
1207 LastTest = DQS_FAIL;
1210 print_debug_dqs("\t\t\t\tTrainDQSPos: 33 RnkDlySeqPassMax ", RnkDlySeqPassMax, 4);
1212 if(RnkDlySeqPassMax == 0) {
1213 Errors |= SB_NODQSPOS; // no passing window
1216 print_debug_dqs("\t\t\t\tTrainDQSPos: 34 RnkDlyFilterMax ", RnkDlyFilterMax, 4);
1217 print_debug_dqs("\t\t\t\tTrainDQSPos: 34 RnkDlyFilterMin ", RnkDlyFilterMin, 4);
1218 if((RnkDlyFilterMax - RnkDlyFilterMin)< MIN_DQS_WNDW){
1219 Errors |= SB_SMALLDQS;
1222 unsigned middle_dqs;
1223 middle_dqs = MiddleDQS(RnkDlyFilterMin, RnkDlyFilterMax);
1224 print_debug_dqs("\t\t\t\tTrainDQSPos: 35 middle_dqs ",middle_dqs, 4);
1225 SetDQSDelayCSR(ctrl, channel, ByteLane, Direction, middle_dqs);
1226 save_dqs_delay(channel, ByteLane, Direction, dqs_delay_a, middle_dqs);
1232 print_debug_dqs("\t\t\tTrainDQSPos: end", 0xff, 3);
1239 static unsigned TrainReadDQS(const struct mem_controller *ctrl, unsigned channel, unsigned pattern, uint8_t *buf_a, uint8_t *dqs_delay_a, struct sys_info *sysinfo)
1241 print_debug_dqs("\t\tTrainReadPos", 0, 2);
1242 return TrainDQSPos(ctrl, channel, DQS_READDIR, pattern, buf_a, dqs_delay_a, sysinfo);
1245 static unsigned TrainWriteDQS(const struct mem_controller *ctrl, unsigned channel, unsigned pattern, uint8_t *buf_a, uint8_t *dqs_delay_a, struct sys_info *sysinfo)
1247 print_debug_dqs("\t\tTrainWritePos", 0, 2);
1248 return TrainDQSPos(ctrl, channel, DQS_WRITEDIR, pattern, buf_a, dqs_delay_a, sysinfo);
1253 static unsigned TrainDQSRdWrPos(const struct mem_controller *ctrl, struct sys_info *sysinfo)
1255 static const uint32_t TestPatternJD1a[] = {
1256 0x00000000,0x00000000,0xFFFFFFFF,0xFFFFFFFF, // QW0-1, ALL-EVEN
1257 0x00000000,0x00000000,0x00000000,0x00000000, // QW2-3, ALL-EVEN
1258 0x00000000,0x00000000,0xFFFFFFFF,0xFFFFFFFF, // QW4-5, ALL-EVEN
1259 0x00000000,0x00000000,0x00000000,0x00000000, // QW6-7, ALL-EVEN
1260 0xFeFeFeFe,0xFeFeFeFe,0x01010101,0x01010101, // QW0-1, DQ0-ODD
1261 0xFeFeFeFe,0xFeFeFeFe,0x01010101,0x01010101, // QW2-3, DQ0-ODD
1262 0x01010101,0x01010101,0xFeFeFeFe,0xFeFeFeFe, // QW4-5, DQ0-ODD
1263 0xFeFeFeFe,0xFeFeFeFe,0x01010101,0x01010101, // QW6-7, DQ0-ODD
1264 0x02020202,0x02020202,0x02020202,0x02020202, // QW0-1, DQ1-ODD
1265 0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, // QW2-3, DQ1-ODD
1266 0xFdFdFdFd,0xFdFdFdFd,0x02020202,0x02020202, // QW4-5, DQ1-ODD
1267 0x02020202,0x02020202,0x02020202,0x02020202, // QW6-7, DQ1-ODD
1268 0x04040404,0x04040404,0xfBfBfBfB,0xfBfBfBfB, // QW0-1, DQ2-ODD
1269 0x04040404,0x04040404,0x04040404,0x04040404, // QW2-3, DQ2-ODD
1270 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW4-5, DQ2-ODD
1271 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW6-7, DQ2-ODD
1272 0x08080808,0x08080808,0xF7F7F7F7,0xF7F7F7F7, // QW0-1, DQ3-ODD
1273 0x08080808,0x08080808,0x08080808,0x08080808, // QW2-3, DQ3-ODD
1274 0xF7F7F7F7,0xF7F7F7F7,0x08080808,0x08080808, // QW4-5, DQ3-ODD
1275 0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, // QW6-7, DQ3-ODD
1276 0x10101010,0x10101010,0x10101010,0x10101010, // QW0-1, DQ4-ODD
1277 0xeFeFeFeF,0xeFeFeFeF,0x10101010,0x10101010, // QW2-3, DQ4-ODD
1278 0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, // QW4-5, DQ4-ODD
1279 0xeFeFeFeF,0xeFeFeFeF,0x10101010,0x10101010, // QW6-7, DQ4-ODD
1280 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW0-1, DQ5-ODD
1281 0xdFdFdFdF,0xdFdFdFdF,0x20202020,0x20202020, // QW2-3, DQ5-ODD
1282 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW4-5, DQ5-ODD
1283 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW6-7, DQ5-ODD
1284 0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW0-1, DQ6-ODD
1285 0x40404040,0x40404040,0xBfBfBfBf,0xBfBfBfBf, // QW2-3, DQ6-ODD
1286 0x40404040,0x40404040,0xBfBfBfBf,0xBfBfBfBf, // QW4-5, DQ6-ODD
1287 0x40404040,0x40404040,0xBfBfBfBf,0xBfBfBfBf, // QW6-7, DQ6-ODD
1288 0x80808080,0x80808080,0x7F7F7F7F,0x7F7F7F7F, // QW0-1, DQ7-ODD
1289 0x80808080,0x80808080,0x7F7F7F7F,0x7F7F7F7F, // QW2-3, DQ7-ODD
1290 0x80808080,0x80808080,0x7F7F7F7F,0x7F7F7F7F, // QW4-5, DQ7-ODD
1291 0x80808080,0x80808080,0x80808080,0x80808080 // QW6-7, DQ7-ODD
1293 static const uint32_t TestPatternJD1b[] = {
1294 0x00000000,0x00000000,0x00000000,0x00000000, // QW0,CHA-B, ALL-EVEN
1295 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, // QW1,CHA-B, ALL-EVEN
1296 0x00000000,0x00000000,0x00000000,0x00000000, // QW2,CHA-B, ALL-EVEN
1297 0x00000000,0x00000000,0x00000000,0x00000000, // QW3,CHA-B, ALL-EVEN
1298 0x00000000,0x00000000,0x00000000,0x00000000, // QW4,CHA-B, ALL-EVEN
1299 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, // QW5,CHA-B, ALL-EVEN
1300 0x00000000,0x00000000,0x00000000,0x00000000, // QW6,CHA-B, ALL-EVEN
1301 0x00000000,0x00000000,0x00000000,0x00000000, // QW7,CHA-B, ALL-EVEN
1302 0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, // QW0,CHA-B, DQ0-ODD
1303 0x01010101,0x01010101,0x01010101,0x01010101, // QW1,CHA-B, DQ0-ODD
1304 0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, // QW2,CHA-B, DQ0-ODD
1305 0x01010101,0x01010101,0x01010101,0x01010101, // QW3,CHA-B, DQ0-ODD
1306 0x01010101,0x01010101,0x01010101,0x01010101, // QW4,CHA-B, DQ0-ODD
1307 0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, // QW5,CHA-B, DQ0-ODD
1308 0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, // QW6,CHA-B, DQ0-ODD
1309 0x01010101,0x01010101,0x01010101,0x01010101, // QW7,CHA-B, DQ0-ODD
1310 0x02020202,0x02020202,0x02020202,0x02020202, // QW0,CHA-B, DQ1-ODD
1311 0x02020202,0x02020202,0x02020202,0x02020202, // QW1,CHA-B, DQ1-ODD
1312 0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, // QW2,CHA-B, DQ1-ODD
1313 0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, // QW3,CHA-B, DQ1-ODD
1314 0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, // QW4,CHA-B, DQ1-ODD
1315 0x02020202,0x02020202,0x02020202,0x02020202, // QW5,CHA-B, DQ1-ODD
1316 0x02020202,0x02020202,0x02020202,0x02020202, // QW6,CHA-B, DQ1-ODD
1317 0x02020202,0x02020202,0x02020202,0x02020202, // QW7,CHA-B, DQ1-ODD
1318 0x04040404,0x04040404,0x04040404,0x04040404, // QW0,CHA-B, DQ2-ODD
1319 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW1,CHA-B, DQ2-ODD
1320 0x04040404,0x04040404,0x04040404,0x04040404, // QW2,CHA-B, DQ2-ODD
1321 0x04040404,0x04040404,0x04040404,0x04040404, // QW3,CHA-B, DQ2-ODD
1322 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW4,CHA-B, DQ2-ODD
1323 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW5,CHA-B, DQ2-ODD
1324 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW6,CHA-B, DQ2-ODD
1325 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW7,CHA-B, DQ2-ODD
1326 0x08080808,0x08080808,0x08080808,0x08080808, // QW0,CHA-B, DQ3-ODD
1327 0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, // QW1,CHA-B, DQ3-ODD
1328 0x08080808,0x08080808,0x08080808,0x08080808, // QW2,CHA-B, DQ3-ODD
1329 0x08080808,0x08080808,0x08080808,0x08080808, // QW3,CHA-B, DQ3-ODD
1330 0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, // QW4,CHA-B, DQ3-ODD
1331 0x08080808,0x08080808,0x08080808,0x08080808, // QW5,CHA-B, DQ3-ODD
1332 0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, // QW6,CHA-B, DQ3-ODD
1333 0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, // QW7,CHA-B, DQ3-ODD
1334 0x10101010,0x10101010,0x10101010,0x10101010, // QW0,CHA-B, DQ4-ODD
1335 0x10101010,0x10101010,0x10101010,0x10101010, // QW1,CHA-B, DQ4-ODD
1336 0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, // QW2,CHA-B, DQ4-ODD
1337 0x10101010,0x10101010,0x10101010,0x10101010, // QW3,CHA-B, DQ4-ODD
1338 0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, // QW4,CHA-B, DQ4-ODD
1339 0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, // QW5,CHA-B, DQ4-ODD
1340 0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, // QW6,CHA-B, DQ4-ODD
1341 0x10101010,0x10101010,0x10101010,0x10101010, // QW7,CHA-B, DQ4-ODD
1342 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW0,CHA-B, DQ5-ODD
1343 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW1,CHA-B, DQ5-ODD
1344 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW2,CHA-B, DQ5-ODD
1345 0x20202020,0x20202020,0x20202020,0x20202020, // QW3,CHA-B, DQ5-ODD
1346 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW4,CHA-B, DQ5-ODD
1347 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW5,CHA-B, DQ5-ODD
1348 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW6,CHA-B, DQ5-ODD
1349 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW7,CHA-B, DQ5-ODD
1350 0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW0,CHA-B, DQ6-ODD
1351 0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW1,CHA-B, DQ6-ODD
1352 0x40404040,0x40404040,0x40404040,0x40404040, // QW2,CHA-B, DQ6-ODD
1353 0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW3,CHA-B, DQ6-ODD
1354 0x40404040,0x40404040,0x40404040,0x40404040, // QW4,CHA-B, DQ6-ODD
1355 0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW5,CHA-B, DQ6-ODD
1356 0x40404040,0x40404040,0x40404040,0x40404040, // QW6,CHA-B, DQ6-ODD
1357 0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW7,CHA-B, DQ6-ODD
1358 0x80808080,0x80808080,0x80808080,0x80808080, // QW0,CHA-B, DQ7-ODD
1359 0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F, // QW1,CHA-B, DQ7-ODD
1360 0x80808080,0x80808080,0x80808080,0x80808080, // QW2,CHA-B, DQ7-ODD
1361 0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F, // QW3,CHA-B, DQ7-ODD
1362 0x80808080,0x80808080,0x80808080,0x80808080, // QW4,CHA-B, DQ7-ODD
1363 0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F, // QW5,CHA-B, DQ7-ODD
1364 0x80808080,0x80808080,0x80808080,0x80808080, // QW6,CHA-B, DQ7-ODD
1365 0x80808080,0x80808080,0x80808080,0x80808080 // QW7,CHA-B, DQ7-ODD
1367 uint8_t pattern_buf_x[64 * 18 + 16]; // We need to two cache line So have more 16 bytes to keep 16 byte alignment */
1376 unsigned DQSWrDelay;
1377 unsigned is_Width128 = sysinfo->meminfo[ctrl->node_id].is_Width128;
1378 uint8_t *dqs_delay_a = &sysinfo->dqs_delay_a[ctrl->node_id * 2*2*9]; //channel 2, direction 2 , bytelane *9
1387 dword = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
1388 ecc_bit = dword & DCL_DimmEccEn;
1389 dword &= ~(DCL_DimmEccEn);
1390 pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dword);
1393 buf_a = (uint8_t *)(((uint32_t)(&pattern_buf_x[0]) + 0x10) & (~0xf));
1397 for(i=0;i<16*18;i++) {
1398 *((uint32_t *)(buf_a + i*4)) = TestPatternJD1b[i];
1403 for(i=0; i<16*9;i++) {
1404 *((uint32_t *)(buf_a + i*4)) = TestPatternJD1a[i];
1409 print_debug_dqs("\r\nTrainDQSRdWrPos: 0 ctrl ", ctrl->node_id, 0);
1411 print_debug_addr("TrainDQSRdWrPos: buf_a:", buf_a);
1416 if (!(sysinfo->meminfo[ctrl->node_id].dimm_mask & 0x0F) &&
1417 (sysinfo->meminfo[ctrl->node_id].dimm_mask & 0xF0)) { /* channelB only? */
1421 while( (channel<2) && (!Errors)) {
1422 print_debug_dqs("\tTrainDQSRdWrPos: 1 channel ",channel, 1);
1423 for(DQSWrDelay = 0; DQSWrDelay < 48; DQSWrDelay++) {
1425 SetDQSDelayAllCSR(ctrl, channel, DQS_WRITEDIR, DQSWrDelay);
1426 print_debug_dqs("\t\tTrainDQSRdWrPos: 21 DQSWrDelay ", DQSWrDelay, 2);
1427 err= TrainReadDQS(ctrl, channel, pattern, buf_a, dqs_delay_a, sysinfo);
1428 print_debug_dqs("\t\tTrainDQSRdWrPos: 22 err ",err, 2);
1433 print_debug_dqs("\tTrainDQSRdWrPos: 3 DQSWrDelay ", DQSWrDelay, 1);
1435 if(DQSWrDelay < 48) {
1436 Errors = TrainWriteDQS(ctrl, channel, pattern, buf_a, dqs_delay_a, sysinfo);
1437 print_debug_dqs("\tTrainDQSRdWrPos: 4 Errors ", Errors, 1);
1442 //FIXME: 64MuxMode??
1443 channel++; // skip channel if 64-bit mode
1448 dword = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
1449 dword &= ~(DCL_DimmEccEn);
1451 pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dword);
1457 //restore SSE2 setting
1460 print_debug_dqs("TrainDQSRdWrPos: ", 5, 0);
1465 static inline uint8_t get_dqs_delay(unsigned channel, unsigned bytelane, unsigned direction, uint8_t *dqs_delay_a)
1467 return dqs_delay_a[channel * 2*9 + direction * 9 + bytelane];
1470 static unsigned CalcEccDQSPos(unsigned channel,unsigned ByteLane0, unsigned ByteLane1, unsigned InterFactor, unsigned Direction, uint8_t *dqs_delay_a)
1471 /* InterFactor: 0: 100% ByteLane 0
1472 0x80: 50% between ByteLane 0 and 1
1473 0xff: 99.6% ByteLane 1 and 0.4% like 0
1476 unsigned DQSDelay0, DQSDelay1;
1479 DQSDelay0 = get_dqs_delay(channel, ByteLane0, Direction, dqs_delay_a);
1480 DQSDelay1 = get_dqs_delay(channel, ByteLane1, Direction, dqs_delay_a);
1482 if(DQSDelay0>DQSDelay1) {
1483 DQSDelay = DQSDelay0 - DQSDelay1;
1484 InterFactor = 0xff - InterFactor;
1487 DQSDelay = DQSDelay1 - DQSDelay0;
1490 DQSDelay *= InterFactor;
1492 DQSDelay >>= 8; // /255
1494 if(DQSDelay0>DQSDelay1) {
1495 DQSDelay += DQSDelay1;
1498 DQSDelay += DQSDelay0;
1505 static void SetEccDQSRdWrPos(const struct mem_controller *ctrl, struct sys_info *sysinfo)
1510 unsigned lane0, lane1, ratio;
1513 unsigned direction[] = { DQS_READDIR, DQS_WRITEDIR };
1515 uint8_t *dqs_delay_a = &sysinfo->dqs_delay_a[ctrl->node_id * 2*2*9]; //channel 2, direction 2 , bytelane *9
1519 for(channel = 0; channel < 2; channel++) {
1521 Direction = direction[i];
1522 lane0 = 4; lane1 = 5; ratio = 0;
1523 dqs_delay = CalcEccDQSPos(channel, lane0, lane1, ratio, Direction, dqs_delay_a);
1524 print_debug_dqs_pair("\t\tSetEccDQSRdWrPos: channel ", channel, Direction==DQS_READDIR? " R dqs_delay":" W dqs_delay", dqs_delay, 2);
1525 SetDQSDelayCSR(ctrl, channel, ByteLane, Direction, dqs_delay);
1526 save_dqs_delay(channel, ByteLane, Direction, dqs_delay_a, dqs_delay);
1531 static unsigned train_DqsRcvrEn(const struct mem_controller *ctrl, unsigned Pass, struct sys_info *sysinfo)
1533 print_debug_dqs("\r\ntrain_DqsRcvrEn: begin ctrl ", ctrl->node_id, 0);
1534 if(TrainRcvrEn(ctrl, Pass, sysinfo)) {
1537 print_debug_dqs("\r\ntrain_DqsRcvrEn: end ctrl ", ctrl->node_id, 0);
1541 static unsigned train_DqsPos(const struct mem_controller *ctrl, struct sys_info *sysinfo)
1543 print_debug_dqs("\r\ntrain_DqsPos: begin ctrl ", ctrl->node_id, 0);
1544 if(TrainDQSRdWrPos(ctrl, sysinfo) != 0) {
1545 print_err("\r\nDQS Training Rd Wr failed ctrl"); print_err_hex8(ctrl->node_id); print_err("\r\n");
1549 SetEccDQSRdWrPos(ctrl, sysinfo);
1551 print_debug_dqs("\r\ntrain_DqsPos: end ctrl ", ctrl->node_id, 0);
1556 #if K8_REV_F_SUPPORT_F0_F1_WORKAROUND == 1
1557 static void f0_svm_workaround(int controllers, const struct mem_controller *ctrl, tsc_t *tsc0, struct sys_info *sysinfo)
1560 unsigned cpu_f0_f1[8];
1563 print_debug_addr("dqs_timing: tsc1[8] :", tsc1);
1565 for(i = 0; i < controllers; i++) {
1566 if (!sysinfo->ctrl_present[i])
1569 /* Skip everything if I don't have any memory on this controller */
1570 if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
1574 cpu_f0_f1[i] = is_cpu_pre_f2_in_bsp(i);
1576 if(!cpu_f0_f1[i]) continue;
1578 dword = pci_read_config32(ctrl[i].f2, DRAM_CTRL);
1579 dword &= ~DC_DqsRcvEnTrain;
1580 pci_write_config32(ctrl[i].f2, DRAM_CTRL, dword);
1582 dword = pci_read_config32(ctrl[i].f2, DRAM_INIT);
1583 dword |= DI_EnDramInit;
1584 pci_write_config32(ctrl[i].f2, DRAM_INIT, dword);
1585 dword &= ~DI_EnDramInit;
1586 pci_write_config32(ctrl[i].f2, DRAM_INIT, dword);
1589 print_debug_dqs_tsc("begin: tsc1", i, tsc1[i].hi, tsc1[i].lo, 2);
1591 dword = tsc1[i].lo + tsc0[i].lo;
1592 if((dword<tsc1[i].lo) || (dword<tsc0[i].lo)) {
1596 tsc1[i].hi+= tsc0[i].hi;
1598 print_debug_dqs_tsc("end : tsc1", i, tsc1[i].hi, tsc1[i].lo, 2);
1602 for(i = 0; i < controllers; i++) {
1603 if (!sysinfo->ctrl_present[i])
1606 /* Skip everything if I don't have any memory on this controller */
1607 if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
1609 if(!cpu_f0_f1[i]) continue;
1615 } while ((tsc1[i].hi>tsc.hi) || ((tsc1[i].hi==tsc.hi) && (tsc1[i].lo>tsc.lo)));
1617 print_debug_dqs_tsc("end : tsc ", i, tsc.hi, tsc.lo, 2);
1625 /* setting variable mtrr, comes from linux kernel source */
1626 static void set_var_mtrr_dqs(
1627 unsigned int reg, unsigned long basek, unsigned long sizek,
1628 unsigned char type, unsigned address_bits)
1631 unsigned address_mask_high;
1633 address_mask_high = ((1u << (address_bits - 32u)) - 1u);
1635 base.hi = basek >> 22;
1636 base.lo = basek << 10;
1638 if (sizek < 4*1024*1024) {
1639 mask.hi = address_mask_high;
1640 mask.lo = ~((sizek << 10) -1);
1643 mask.hi = address_mask_high & (~((sizek >> 22) -1));
1652 zero.lo = zero.hi = 0;
1653 /* The invalid bit is kept in the mask, so we simply clear the
1654 relevant mask register to disable a range. */
1655 wrmsr (MTRRphysMask_MSR(reg), zero);
1657 /* Bit 32-35 of MTRRphysMask should be set to 1 */
1660 wrmsr (MTRRphysBase_MSR(reg), base);
1661 wrmsr (MTRRphysMask_MSR(reg), mask);
1666 /* fms: find most sigificant bit set, stolen from Linux Kernel Source. */
1667 static inline unsigned int fms(unsigned int x)
1671 __asm__("bsrl %1,%0\n\t"
1674 "1:" : "=r" (r) : "g" (x));
1678 /* fms: find least sigificant bit set */
1679 static inline unsigned int fls(unsigned int x)
1683 __asm__("bsfl %1,%0\n\t"
1686 "1:" : "=r" (r) : "g" (x));
1690 static unsigned int range_to_mtrr(unsigned int reg,
1691 unsigned long range_startk, unsigned long range_sizek,
1692 unsigned long next_range_startk, unsigned char type, unsigned address_bits)
1694 if (!range_sizek || (reg >= 8)) {
1697 while(range_sizek) {
1698 unsigned long max_align, align;
1699 unsigned long sizek;
1700 /* Compute the maximum size I can make a range */
1701 max_align = fls(range_startk);
1702 align = fms(range_sizek);
1703 if (align > max_align) {
1707 #if MEM_TRAIN_SEQ != 1
1708 #if CONFIG_USE_PRINTK_IN_CAR
1709 printk_debug("Setting variable MTRR %d, base: %4dMB, range: %4dMB, type %s\r\n",
1710 reg, range_startk >>10, sizek >> 10,
1711 (type==MTRR_TYPE_UNCACHEABLE)?"UC":
1712 ((type==MTRR_TYPE_WRBACK)?"WB":"Other")
1715 print_debug("Setting variable MTRR "); print_debug_hex8(reg); print_debug(", base: "); print_debug_hex16(range_startk>>10);
1716 print_debug("MB, range: "); print_debug_hex16(sizek >> 10); print_debug("MB, type ");
1717 print_debug( (type==MTRR_TYPE_UNCACHEABLE)?"UC\r\n":
1718 ((type==MTRR_TYPE_WRBACK)?"WB\r\n":"Other\r\n")
1722 set_var_mtrr_dqs(reg++, range_startk, sizek, type, address_bits);
1723 range_startk += sizek;
1724 range_sizek -= sizek;
1731 static void set_top_mem_ap(unsigned tom_k, unsigned tom2_k)
1735 /* Now set top of memory */
1736 msr.lo = (tom2_k & 0x003fffff) << 10;
1737 msr.hi = (tom2_k & 0xffc00000) >> 22;
1738 wrmsr(TOP_MEM2, msr);
1740 msr.lo = (tom_k & 0x003fffff) << 10;
1741 msr.hi = (tom_k & 0xffc00000) >> 22;
1742 wrmsr(TOP_MEM, msr);
1745 static void setup_mtrr_dqs(unsigned tom_k, unsigned tom2_k){
1750 //still enable from cache_as_ram.inc
1751 msr = rdmsr(SYSCFG_MSR);
1752 msr.lo |= SYSCFG_MSR_MtrrFixDramModEn;
1753 wrmsr(SYSCFG_MSR,msr);
1756 //[0,512k), [512k, 640k)
1757 msr.hi = 0x1e1e1e1e;
1763 reg = range_to_mtrr(2, 0, tom_k,4*1024*1024, MTRR_TYPE_WRBACK, 40);
1767 //enable tom2 and type
1768 msr = rdmsr(SYSCFG_MSR);
1769 msr.lo |= (1<<21) | (1<<22); //MtrrTom2En and Tom2ForceMemTypeWB
1770 wrmsr(SYSCFG_MSR, msr);
1775 static void clear_mtrr_dqs(unsigned tom2_k){
1779 //still enable from cache_as_ram.inc
1780 msr = rdmsr(SYSCFG_MSR);
1781 msr.lo |= SYSCFG_MSR_MtrrFixDramModEn;
1782 wrmsr(SYSCFG_MSR,msr);
1784 //[0,512k), [512k, 640k)
1791 for(i=0x204;i<0x210;i++) {
1797 //enable tom2 and type
1798 msr = rdmsr(SYSCFG_MSR);
1799 msr.lo &= ~((1<<21) | (1<<22)); //MtrrTom2En and Tom2ForceMemTypeWB
1800 wrmsr(SYSCFG_MSR, msr);
1804 static void set_htic_bit(unsigned i, unsigned val, unsigned bit)
1807 dword = pci_read_config32(PCI_DEV(0, 0x18+i, 0), HT_INIT_CONTROL);
1809 dword |= ((val & 1) <<bit);
1810 pci_write_config32(PCI_DEV(0, 0x18+i, 0), HT_INIT_CONTROL, dword);
1814 static unsigned get_htic_bit(unsigned i, unsigned bit)
1817 dword = pci_read_config32(PCI_DEV(0, 0x18+i, 0), HT_INIT_CONTROL);
1822 static void wait_till_sysinfo_in_ram(void)
1825 if(get_htic_bit(0, 9)) return;
1829 static void set_sysinfo_in_ram(unsigned val)
1831 set_htic_bit(0, val, 9);
1835 #if MEM_TRAIN_SEQ == 0
1838 #if K8_REV_F_SUPPORT_F0_F1_WORKAROUND == 1
1839 static void dqs_timing(int controllers, const struct mem_controller *ctrl, tsc_t *tsc0, struct sys_info *sysinfo)
1841 static void dqs_timing(int controllers, const struct mem_controller *ctrl, struct sys_info *sysinfo)
1848 //need to enable mtrr, so dqs training could access the test address
1849 setup_mtrr_dqs(sysinfo->tom_k, sysinfo->tom2_k);
1851 for(i = 0; i < controllers; i++) {
1852 if (!sysinfo->ctrl_present[ i ])
1855 /* Skip everything if I don't have any memory on this controller */
1856 if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
1858 fill_mem_cs_sysinfo(i, ctrl+i, sysinfo);
1862 for(i = 0; i < controllers; i++) {
1863 if (!sysinfo->ctrl_present[ i ])
1866 /* Skip everything if I don't have any memory on this controller */
1867 if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
1869 print_debug("DQS Training:RcvrEn:Pass1: ");
1870 print_debug_hex8(i);
1871 if(train_DqsRcvrEn(ctrl+i, 1, sysinfo)) goto out;
1872 print_debug(" done\r\n");
1876 #if K8_REV_F_SUPPORT_F0_F1_WORKAROUND == 1
1877 f0_svm_workaround(controllers, ctrl, tsc0, sysinfo);
1881 for(i = 0; i < controllers; i++) {
1882 if (!sysinfo->ctrl_present[i])
1885 /* Skip everything if I don't have any memory on this controller */
1886 if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
1888 print_debug("DQS Training:DQSPos: ");
1889 print_debug_hex8(i);
1890 if(train_DqsPos(ctrl+i, sysinfo)) goto out;
1891 print_debug(" done\r\n");
1895 for(i = 0; i < controllers; i++) {
1896 if (!sysinfo->ctrl_present[i])
1899 /* Skip everything if I don't have any memory on this controller */
1900 if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
1902 print_debug("DQS Training:RcvrEn:Pass2: ");
1903 print_debug_hex8(i);
1904 if(train_DqsRcvrEn(ctrl+i, 2, sysinfo)) goto out;
1905 print_debug(" done\r\n");
1906 sysinfo->mem_trained[i]=1;
1911 clear_mtrr_dqs(sysinfo->tom2_k);
1915 print_debug_dqs_tsc_x("DQS Training:tsc", i, tsc[i].hi, tsc[i].lo);
1925 #if MEM_TRAIN_SEQ > 0
1927 static void dqs_timing(int i, const struct mem_controller *ctrl, struct sys_info *sysinfo, unsigned v)
1934 if(sysinfo->mem_trained[i] != 0x80) return;
1936 #if MEM_TRAIN_SEQ == 1
1937 //need to enable mtrr, so dqs training could access the test address
1938 setup_mtrr_dqs(sysinfo->tom_k, sysinfo->tom2_k);
1941 fill_mem_cs_sysinfo(i, ctrl, sysinfo);
1946 print_debug("set DQS timing:RcvrEn:Pass1: ");
1947 print_debug_hex8(i);
1949 if(train_DqsRcvrEn(ctrl, 1, sysinfo)) {
1950 sysinfo->mem_trained[i]=0x81; //
1955 print_debug(" done\r\n");
1957 print_debug("set DQS timing:DQSPos: ");
1958 print_debug_hex8(i);
1961 if(train_DqsPos(ctrl, sysinfo)) {
1962 sysinfo->mem_trained[i]=0x82; //
1967 print_debug(" done\r\n");
1970 print_debug("set DQS timing:RcvrEn:Pass2: ");
1971 print_debug_hex8(i);
1973 if(train_DqsRcvrEn(ctrl, 2, sysinfo)){
1974 sysinfo->mem_trained[i]=0x83; //
1979 print_debug(" done\r\n");
1985 #if MEM_TRAIN_SEQ == 1
1986 clear_mtrr_dqs(sysinfo->tom2_k);
1990 for(ii=0;ii<4;ii++) {
1991 print_debug_dqs_tsc_x("Total DQS Training : tsc ", ii, tsc[ii].hi, tsc[ii].lo);
1995 if(sysinfo->mem_trained[i] == 0x80) {
1996 sysinfo->mem_trained[i]=1;
2002 #if MEM_TRAIN_SEQ == 1
2003 static void train_ram(unsigned nodeid, struct sys_info *sysinfo, struct sys_info *sysinfox)
2005 dqs_timing(nodeid, &sysinfo->ctrl[nodeid], sysinfo, 0); // keep the output tidy
2006 // memcpy(&sysinfox->dqs_rcvr_dly_a[nodeid * 2 * 8],&sysinfo->dqs_rcvr_dly_a[nodeid * 2 * 8], 2*8);
2007 // memcpy(&sysinfox->dqs_delay_a[nodeid * 2 * 2 * 9], &sysinfo->dqs_delay_a[nodeid * 2 * 2 * 9], 2 * 2 * 9);
2008 sysinfox->mem_trained[nodeid] = sysinfo->mem_trained[nodeid];
2011 static void copy_and_run_ap_code_in_car(unsigned ret_addr);
2012 static inline void train_ram_on_node(unsigned nodeid, unsigned coreid, struct sys_info *sysinfo, unsigned retcall)
2014 if(coreid) return; // only do it on core0
2015 struct sys_info *sysinfox = ((CONFIG_LB_MEM_TOPK<<10) - DCACHE_RAM_GLOBAL_VAR_SIZE);
2016 wait_till_sysinfo_in_ram(); // use pci to get it
2018 if(sysinfox->mem_trained[nodeid] == 0x80) {
2020 sysinfo->tom_k = sysinfox->tom_k;
2021 sysinfo->tom2_k = sysinfox->tom2_k;
2022 sysinfo->meminfo[nodeid].is_Width128 = sysinfox->meminfo[nodeid].is_Width128;
2023 sysinfo->mem_trained[nodeid] = sysinfox->mem_trained[nodeid];
2024 memcpy(&sysinfo->ctrl[nodeid], &sysinfox->ctrl[nodeid], sizeof(struct mem_controller));
2026 memcpy(sysinfo, sysinfox, DCACHE_RAM_GLOBAL_VAR_SIZE);
2028 set_top_mem_ap(sysinfo->tom_k, sysinfo->tom2_k); // keep the ap's tom consistent with bsp's
2029 #if CONFIG_AP_CODE_IN_CAR == 0
2030 print_debug("CODE IN ROM AND RUN ON NODE:"); print_debug_hex8(nodeid); print_debug("\r\n");
2031 train_ram(nodeid, sysinfo, sysinfox);
2033 /* Can copy dqs_timing to ap cache and run from cache?
2034 * we need coreboot_ap_car.rom? and treat it as coreboot_ram.rom for ap ?
2036 copy_and_run_ap_code_in_car(retcall);
2037 // will go back by jump