2 * This file is part of the coreboot project.
4 * Copyright (C) 2005 YingHai Lu
5 * Copyright (C) 2008 Advanced Micro Devices, Inc.
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; version 2 of the License.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 //0: mean no debug info
22 #define DQS_TRAIN_DEBUG 0
24 static inline void print_debug_dqs(const char *str, unsigned val, unsigned level)
26 #if DQS_TRAIN_DEBUG > 0
27 if(DQS_TRAIN_DEBUG > level) {
28 #if CONFIG_USE_PRINTK_IN_CAR
29 printk_debug("%s%x\r\n", str, val);
31 print_debug(str); print_debug_hex32(val); print_debug("\r\n");
37 static inline void print_debug_dqs_pair(const char *str, unsigned val, const char *str2, unsigned val2, unsigned level)
39 #if DQS_TRAIN_DEBUG > 0
40 if(DQS_TRAIN_DEBUG > level) {
41 #if CONFIG_USE_PRINTK_IN_CAR
42 printk_debug("%s%08x%s%08x\r\n", str, val, str2, val2);
44 print_debug(str); print_debug_hex32(val); print_debug(str2); print_debug_hex32(val2); print_debug("\r\n");
50 static inline void print_debug_dqs_tsc(const char *str, unsigned i, unsigned val, unsigned val2, unsigned level)
52 #if DQS_TRAIN_DEBUG > 0
53 if(DQS_TRAIN_DEBUG > level) {
54 #if CONFIG_USE_PRINTK_IN_CAR
55 printk_debug("%s[%02x]=%08x%08x\r\n", str, i, val, val2);
57 print_debug(str); print_debug("["); print_debug_hex8(i); print_debug("]="); print_debug_hex32(val); print_debug_hex32(val2); print_debug("\r\n");
63 static inline void print_debug_dqs_tsc_x(const char *str, unsigned i, unsigned val, unsigned val2)
65 #if CONFIG_USE_PRINTK_IN_CAR
66 printk_debug("%s[%02x]=%08x%08x\r\n", str, i, val, val2);
68 print_debug(str); print_debug("["); print_debug_hex8(i); print_debug("]="); print_debug_hex32(val); print_debug_hex32(val2); print_debug("\r\n");
73 static void fill_mem_cs_sysinfo(unsigned nodeid, const struct mem_controller *ctrl, struct sys_info *sysinfo)
77 sysinfo->mem_base[nodeid] = pci_read_config32(ctrl->f1, 0x40 + (nodeid<<3));
80 sysinfo->cs_base[nodeid*8+i] = pci_read_config32(ctrl->f2, 0x40 + (i<<2));
83 sysinfo->hole_reg[nodeid] = pci_read_config32(ctrl->f1, 0xf0);
86 static unsigned Get_MCTSysAddr(const struct mem_controller *ctrl, unsigned cs_idx, struct sys_info *sysinfo)
90 unsigned nodeid = ctrl->node_id;
92 #if HW_MEM_HOLE_SIZEK != 0
96 //get the local base addr of the chipselect
97 dword = sysinfo->cs_base[nodeid * 8 + cs_idx];
100 //sys addr= node base + local cs base
101 mem_base = sysinfo->mem_base[nodeid];
102 mem_base &= 0xffff0000;
105 #if HW_MEM_HOLE_SIZEK != 0
106 hole_reg = sysinfo->hole_reg[nodeid];
108 unsigned hole_startk;
109 hole_startk = (hole_reg & (0xff<<24)) >> 10;
110 if( (dword >= (hole_startk<<2)) && (dword < ((4*1024*1024)<<2))) {
111 dword += ((4*1024*1024 - hole_startk)<<2);
116 //add 1MB offset to avoid compat area
117 dword += (1<<(20-8));
119 //So final result is upper 32 bit addr
125 static unsigned Get_RcvrSysAddr(const struct mem_controller * ctrl, unsigned channel, unsigned cs_idx, struct sys_info *sysinfo)
127 return Get_MCTSysAddr(ctrl, cs_idx, sysinfo);
131 static inline unsigned long read_cr4(void)
134 asm volatile ("movl %%cr4, %0" : "=r" (cr4));
138 static inline void write_cr4(unsigned long cr4)
140 asm volatile ("movl %0, %%cr4" : : "r" (cr4));
144 static inline void enable_sse2()
152 static inline void disable_sse2()
161 static void set_wrap32dis(void) {
164 msr = rdmsr(0xc0010015);
167 wrmsr(0xc0010015, msr);
171 static void clear_wrap32dis(void) {
174 msr = rdmsr(0xc0010015);
177 wrmsr(0xc0010015, msr);
181 static void set_FSBASE(uint32_t addr_hi)
185 //set fs and use fs prefix to access the mem
188 wrmsr(0xc0000100, msr); //FS_BASE
192 static unsigned ChipSelPresent(const struct mem_controller *ctrl, unsigned cs_idx, struct sys_info *sysinfo)
195 unsigned nodeid = ctrl->node_id;
198 enabled = sysinfo->cs_base[nodeid * 8 + cs_idx];
205 static unsigned RcvrRankEnabled(const struct mem_controller *ctrl, int channel, int cs_idx, unsigned is_Width128, struct sys_info *sysinfo)
207 return ChipSelPresent(ctrl, cs_idx, sysinfo);
210 static void WriteLNTestPattern(unsigned addr_lo, uint8_t *buf_a, unsigned line_num)
214 "movdqa (%3), %%xmm0\n\t"
215 "movntdq %%xmm0, %%fs:(%0)\n\t" /* xmm0 is 128 bit */
220 :: "a" (addr_lo), "d" (16), "c" (line_num * 4), "b"(buf_a)
226 static void Write1LTestPattern(unsigned addr, unsigned p, uint8_t *buf_a, uint8_t *buf_b)
229 if(p==1) { buf = buf_b; }
230 else { buf = buf_a; }
232 set_FSBASE (addr>>24);
234 WriteLNTestPattern(addr<<8, buf, 1);
237 static void Read1LTestPattern(unsigned addr)
241 set_FSBASE(addr>>24);
243 /* 1st move causes read fill (to exclusive or shared)*/
245 "movl %%fs:(%1), %0\n\t"
246 :"=b"(value): "a" (addr<<8)
254 #define DQS_FIRST_PASS 1
255 #define DQS_SECOND_PASS 2
257 #define SB_NORCVREN 11
258 #define RCVREN_MARGIN 6
259 #define SB_SmallRCVR 13
260 #define SB_CHA2BRCVREN 12
261 #define SB_NODQSPOS 14
262 #define MIN_DQS_WNDW 3
263 #define SB_SMALLDQS 15
266 static unsigned CompareTestPatternQW0(unsigned channel, unsigned addr, unsigned pattern, const uint32_t *TestPattern0, const uint32_t *TestPattern1, const uint32_t *TestPattern2, unsigned Pass, unsigned is_Width128)
272 unsigned result = DQS_FAIL;
274 if(Pass == DQS_FIRST_PASS) {
276 test_buf = (uint32_t *)TestPattern1;
279 test_buf = (uint32_t *)TestPattern0;
283 test_buf = (uint32_t *)TestPattern2;
286 set_FSBASE(addr>>24);
290 if(is_Width128 && (channel == 1)) {
291 addr_lo += 8; //second channel
296 "movl %%fs:(%1), %0\n\t"
297 :"=b"(value): "a" (addr_lo)
300 value_test = *test_buf;
303 print_debug_dqs_pair("\t\t\t\t\t\tQW0.lo : test_buf= ", (unsigned)test_buf, " value = ", value_test, 4);
304 print_debug_dqs_pair("\t\t\t\t\t\tQW0.lo : addr_lo = ", addr_lo, " value = ", value, 4);
306 if(value == value_test) {
310 "movl %%fs:(%1), %0\n\t"
311 :"=b"(value): "a" (addr_lo)
313 value_test = *test_buf;
314 print_debug_dqs_pair("\t\t\t\t\t\tQW0.hi : test_buf= ", (unsigned)test_buf, " value = ", value_test, 4);
315 print_debug_dqs_pair("\t\t\t\t\t\tQW0.hi : addr_lo = ", addr_lo, " value = ", value, 4);
317 if(value == value_test){
322 if(Pass == DQS_SECOND_PASS) { // second pass need to be inverted
323 if(result==DQS_PASS) {
335 static void SetMaxAL_RcvrDly(const struct mem_controller *ctrl, unsigned dly)
339 dly += (20-1); // round it
340 dly /= 20; // convert from unit 50ps to 1ns
345 reg = pci_read_config32(ctrl->f2, DRAM_CONFIG_HIGH);
346 reg &= ~(DCH_MaxAsyncLat_MASK <<DCH_MaxAsyncLat_SHIFT);
347 reg |= ((dly - DCH_MaxAsyncLat_BASE) << DCH_MaxAsyncLat_SHIFT);
348 pci_write_config32(ctrl->f2, DRAM_CONFIG_HIGH, reg);
353 Set the Target range to WT IO (using an IORR overlapping the already existing
354 WB dram type). Use IORR0
356 static void SetTargetWTIO(unsigned addr)
361 wrmsr(0xc0010016, msr); //IORR0 BASE
364 msr.lo = 0xfc000800; // 64MB Mask
365 wrmsr(0xc0010017, msr); // IORR0 Mask
368 static void ResetTargetWTIO(void)
374 wrmsr(0xc0010017, msr); // IORR0 Mask
377 static void proc_CLFLUSH(unsigned addr)
380 set_FSBASE(addr>>24);
382 /* 1st move causes read fill (to exclusive or shared)*/
384 /* clflush fs:[eax] */
385 "clflush %%fs:(%0)\n\t"
390 static void proc_IOCLFLUSH(unsigned addr)
397 static void ResetDCTWrPtr(const struct mem_controller *ctrl)
400 unsigned index = 0x10;
402 dword = pci_read_config32_index_wait(ctrl->f2, 0x98, index);
403 pci_write_config32_index_wait(ctrl->f2, 0x98, index, dword);
406 dword = pci_read_config32_index_wait(ctrl->f2, 0x98, index);
407 pci_write_config32_index_wait(ctrl->f2, 0x98, index, dword);
412 static uint16_t get_exact_T1000(unsigned i)
415 static const uint16_t T1000_a[]= { 5000, 3759, 3003, 2500 };
417 static const uint16_t TT_a[] = {
419 /*4 */ 6250, 6250, 6250, 6250,
420 /*5 */ 5000, 5000, 5000, 2500,
421 /*6 */ 5000, 4166, 4166, 2500,
422 /*7 */ 5000, 4285, 3571, 2500,
424 /*8 */ 5000, 3750, 3125, 2500,
425 /*9 */ 5000, 3888, 3333, 2500,
426 /*10*/ 5000, 4000, 3000, 2500,
427 /*11*/ 5000, 4090, 3181, 2500,
429 /*12*/ 5000, 3750, 3333, 2500,
430 /*13*/ 5000, 3846, 3076, 2500,
431 /*14*/ 5000, 3928, 3214, 2500,
432 /*15*/ 5000, 4000, 3000, 2500,
438 /* Check for FID control support */
439 struct cpuid_result cpuid1;
440 cpuid1 = cpuid(0x8000007);
441 if( cpuid1.edx & 0x02 ) {
442 /* Use current FID */
444 msr = rdmsr(0xc0010042);
445 fid_cur = msr.lo & 0x3f;
449 /* Use startup FID */
451 msr = rdmsr(0xc0010015);
452 fid_start = (msr.lo & (0x3f << 24));
454 index = fid_start>>25;
457 if(index>12) return T1000_a[i];
459 return TT_a[index * 4+i];
463 static void InitDQSPos4RcvrEn(const struct mem_controller *ctrl)
469 for(i=1; i<=3; i++) {
470 /* Program the DQS Write Timing Control Registers (Function 2:Offset 0x9c, index 0x01-0x03, 0x21-0x23) to 0x00 for all bytes */
471 pci_write_config32_index_wait(ctrl->f2, 0x98, i, dword);
472 pci_write_config32_index_wait(ctrl->f2, 0x98, i+0x20, dword);
476 for(i=5; i<=7; i++) {
477 /* Program the DQS Write Timing Control Registers (Function 2:Offset 0x9c, index 0x05-0x07, 0x25-0x27) to 0x2f for all bytes */
478 pci_write_config32_index_wait(ctrl->f2, 0x98, i, dword);
479 pci_write_config32_index_wait(ctrl->f2, 0x98, i+0x20, dword);
484 #ifndef K8_REV_F_SUPPORT_F0_F1_WORKAROUND
485 #define K8_REV_F_SUPPORT_F0_F1_WORKAROUND 1
488 static unsigned TrainRcvrEn(const struct mem_controller *ctrl, unsigned Pass, struct sys_info *sysinfo)
491 static const uint32_t TestPattern0[] = {
492 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
493 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
494 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
495 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
497 static const uint32_t TestPattern1[] = {
498 0x55555555, 0x55555555, 0x55555555, 0x55555555,
499 0x55555555, 0x55555555, 0x55555555, 0x55555555,
500 0x55555555, 0x55555555, 0x55555555, 0x55555555,
501 0x55555555, 0x55555555, 0x55555555, 0x55555555,
503 static const uint32_t TestPattern2[] = {
504 0x12345678, 0x87654321, 0x23456789, 0x98765432,
505 0x59385824, 0x30496724, 0x24490795, 0x99938733,
506 0x40385642, 0x38465245, 0x29432163, 0x05067894,
507 0x12349045, 0x98723467, 0x12387634, 0x34587623,
510 uint8_t pattern_buf_x[64 * 4 + 16]; // We need to two cache line So have more 16 bytes to keep 16 byte alignment */
511 uint8_t *buf_a, *buf_b;
514 uint8_t *dqs_rcvr_dly_a = &sysinfo->dqs_rcvr_dly_a[ctrl->node_id * 2* 8] ; //8 node, channel 2, receiver 8
518 unsigned channel, receiver;
521 unsigned CTLRMaxDelay;
526 unsigned Test0, Test1;
528 unsigned RcvrEnDlyRmin;
536 unsigned TestAddr0, TestAddr0B, TestAddr1, TestAddr1B = 0;
538 unsigned CurrRcvrCHADelay = 0;
542 unsigned is_Width128 = sysinfo->meminfo[ctrl->node_id].is_Width128;
546 if(Pass == DQS_FIRST_PASS) {
547 InitDQSPos4RcvrEn(ctrl);
557 dword = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
558 ecc_bit = dword & DCL_DimmEccEn;
559 dword &= ~(DCL_DimmEccEn);
560 pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dword);
563 if(Pass == DQS_FIRST_PASS) {
564 #if K8_REV_F_SUPPORT_F0_F1_WORKAROUND == 1
565 cpu_f0_f1 = is_cpu_pre_f2_in_bsp(ctrl->node_id);
570 /* Set the DqsRcvEnTrain bit */
571 dword = pci_read_config32(ctrl->f2, DRAM_CTRL);
572 dword |= DC_DqsRcvEnTrain;
573 pci_write_config32(ctrl->f2, DRAM_CTRL, dword);
578 //get T1000 figures (cycle time (ns)) * 1K
579 dword = pci_read_config32(ctrl->f2, DRAM_CONFIG_HIGH);
580 dword &= DCH_MemClkFreq_MASK;
582 T1000 = get_exact_T1000(dword);
585 buf_a = (uint8_t *)(((uint32_t)(&pattern_buf_x[0]) + 0x10) & (0xfffffff0));
586 buf_b = buf_a + 128; //??
587 if(Pass==DQS_FIRST_PASS) {
589 *((uint32_t *)(buf_a + i*4)) = TestPattern0[i];
590 *((uint32_t *)(buf_b + i*4)) = TestPattern1[i];
595 *((uint32_t *)(buf_a + i*4)) = TestPattern2[i];
596 *((uint32_t *)(buf_b + i*4)) = TestPattern2[i];
600 print_debug_dqs("\r\nTrainRcvEn: 0 ctrl", ctrl->node_id, 0);
602 print_debug_addr("TrainRcvEn: buf_a:", buf_a);
605 /* for each channel */
609 if (!(sysinfo->meminfo[ctrl->node_id].dimm_mask & 0x0F) &&
610 (sysinfo->meminfo[ctrl->node_id].dimm_mask & 0xF0)) { /* channelB only? */
614 for ( ; (channel < 2) && (!Errors); channel++)
616 print_debug_dqs("\tTrainRcvEn51: channel ",channel, 1);
619 /* there are four recriver pairs, loosely associated with CS */
620 for( receiver = 0; (receiver < 8) && (!Errors); receiver+=2)
623 unsigned index=(receiver>>1) * 3 + 0x10;
625 print_debug_dqs("\t\tTrainRcvEn52: index ", index, 2);
629 dword = pci_read_config32_index_wait(ctrl->f2, 0x98, index);
630 CurrRcvrCHADelay= dword & 0xff;
640 RcvrEnDlyRmin = 0xaf;
642 if(!RcvrRankEnabled(ctrl, channel, receiver, is_Width128, sysinfo)) continue;
644 /* for each DQS receiver enable setting */
646 TestAddr0 = Get_RcvrSysAddr(ctrl, channel, receiver, sysinfo);
648 TestAddr0B = TestAddr0 + (1<<(20+2-8)); // 4MB
650 if(RcvrRankEnabled(ctrl, channel, receiver+1, is_Width128, sysinfo)) {
651 TestAddr1 = Get_RcvrSysAddr(ctrl, channel, receiver+1, sysinfo);
652 TestAddr1B = TestAddr1 + (1<<(20+2-8)); //4MB
659 print_debug_dqs("\t\tTrainRcvEn53: TestAddr0B ", TestAddr0B, 2);
661 Write1LTestPattern(TestAddr0, 0, buf_a, buf_b); // rank0 of dimm, test p0
662 Write1LTestPattern(TestAddr0B, 1, buf_a, buf_b); //rank0 of dimm, test p1
665 Write1LTestPattern(TestAddr1, 0, buf_a, buf_b); //rank 1 of dimm
666 Write1LTestPattern(TestAddr1B, 1, buf_a, buf_b);//rank 1 of dimm
669 if(Pass == DQS_FIRST_PASS) {
672 RcvrEnDly = dqs_rcvr_dly_a[channel * 8 + receiver];
675 while ( RcvrEnDly < 0xaf) { // Sweep Delay value here
676 print_debug_dqs("\t\t\tTrainRcvEn541: RcvrEnDly ", RcvrEnDly, 3);
679 /* Odd steps get another pattern such that even
680 and odd steps alternate.
681 The pointers to the patterns will be swapped
682 at the end of the loop so they are correspond
693 /* Program current Receiver enable delay */
694 pci_write_config32_index_wait(ctrl->f2, 0x98, index, RcvrEnDly);
695 /* FIXME: 64bit MUX */
698 /* Program current Receiver enable delay chaannel b */
699 pci_write_config32_index_wait(ctrl->f2, 0x98, index+ 0x20, RcvrEnDly);
702 /* Program the MaxAsyncLat filed with the
703 current DQS receiver enable setting plus 6ns
705 /*Porgram MaxAsyncLat to correspond with current delay */
706 SetMaxAL_RcvrDly(ctrl, RcvrEnDly);
710 Read1LTestPattern(TestAddr0); //Cache Fill
711 /* ROM vs cache compare */
712 Test0 = CompareTestPatternQW0(channel, TestAddr0, PatternA, TestPattern0, TestPattern1, TestPattern2, Pass, is_Width128);
713 proc_IOCLFLUSH(TestAddr0);
717 print_debug_dqs("\t\t\tTrainRcvEn542: Test0 ", Test0, 3);
719 if(Test0 == DQS_PASS) {
721 Read1LTestPattern(TestAddr0B);
722 Test1 = CompareTestPatternQW0(channel, TestAddr0B, PatternB, TestPattern0, TestPattern1, TestPattern2, Pass, is_Width128);
723 proc_IOCLFLUSH(TestAddr0B);
727 print_debug_dqs("\t\t\tTrainRcvEn543: Test1 ", Test1, 3);
729 if(Test1 == DQS_PASS) {
731 Read1LTestPattern(TestAddr1);
732 Test0 = CompareTestPatternQW0(channel, TestAddr1, PatternA, TestPattern0, TestPattern1, TestPattern2, Pass, is_Width128);
733 proc_IOCLFLUSH(TestAddr1);
736 if(Test0 == DQS_PASS) {
737 Read1LTestPattern(TestAddr1B);
738 Test1 = CompareTestPatternQW0(channel, TestAddr1B, PatternB, TestPattern0, TestPattern1, TestPattern2, Pass, is_Width128);
739 proc_IOCLFLUSH(TestAddr1B);
742 if(Test1 == DQS_PASS) {
746 print_debug_dqs("\t\t\tTrainRcvEn544: Test0 ", Test0, 3);
754 print_debug_dqs("\t\t\tTrainRcvEn55: RcvrEnDly ", RcvrEnDly, 3);
756 if(CurrTest == DQS_PASS) {
757 if(LastTest == DQS_FAIL) {
758 RcvrEnDlyRmin = RcvrEnDly;
765 /* swap the rank 0 pointers */
767 TestAddr0 = TestAddr0B;
770 /* swap the rank 1 pointers */
772 TestAddr1 = TestAddr1B;
775 print_debug_dqs("\t\t\tTrainRcvEn56: RcvrEnDly ", RcvrEnDly, 3);
781 print_debug_dqs("\t\tTrainRcvEn61: RcvrEnDly ", RcvrEnDly, 2);
783 if(RcvrEnDlyRmin == 0xaf) {
785 Errors |= SB_NORCVREN;
788 if(Pass == DQS_FIRST_PASS) {
789 // We need a better value for DQSPos trainning
790 RcvrEnDly = RcvrEnDlyRmin /* + RCVREN_MARGIN * T1000/64/50 */;
792 RcvrEnDly = RcvrEnDlyRmin;
795 if(RcvrEnDly > 0xae) {
796 //passing window too narrow, too far delayed
797 Errors |= SB_SmallRCVR;
801 if(Pass == DQS_SECOND_PASS) { //second pass must average vales
802 RcvrEnDly += dqs_rcvr_dly_a[channel * 8 + receiver] /* - (RCVREN_MARGIN * T1000/64/50)*/;
806 dqs_rcvr_dly_a[channel * 8 + receiver] = RcvrEnDly;
808 //Set final RcvrEnDly for this DIMM and Channel
809 pci_write_config32_index_wait(ctrl->f2, 0x98, index, RcvrEnDly);
812 pci_write_config32_index_wait(ctrl->f2, 0x98, index+0x20, RcvrEnDly); // channel B
814 pci_write_config32_index_wait(ctrl->f2, 0x98, index, CurrRcvrCHADelay);
815 if(RcvrEnDly > CurrRcvrCHADelay) {
816 dword = RcvrEnDly - CurrRcvrCHADelay;
819 dword = CurrRcvrCHADelay - RcvrEnDly;
823 Errors |= SB_CHA2BRCVREN;
828 print_debug_dqs("\t\tTrainRcvEn63: RcvrEnDly ", RcvrEnDly, 2);
830 if(RcvrEnDly > CTLRMaxDelay) {
831 CTLRMaxDelay = RcvrEnDly;
834 print_debug_dqs("\t\tTrainRcvEn64: CTLRMaxDelay ", CTLRMaxDelay, 2);
839 print_debug_dqs("\tTrainRcvEn65: CTLRMaxDelay ", CTLRMaxDelay, 1);
841 /* Program the MaxAsysncLat field with the largest DQS Receiver Enable setting */
842 SetMaxAL_RcvrDly(ctrl, CTLRMaxDelay);
846 dword = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
847 dword &= ~(DCL_DimmEccEn);
849 pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dword);
851 if(Pass == DQS_FIRST_PASS) {
852 #if K8_REV_F_SUPPORT_F0_F1_WORKAROUND == 1
856 dword = pci_read_config32(ctrl->f2, DRAM_CTRL);
857 dword &= ~DC_DqsRcvEnTrain;
858 pci_write_config32(ctrl->f2, DRAM_CTRL, dword);
866 //restore SSE2 setting
869 #if MEM_TRAIN_SEQ != 1
870 /* We need tidy output for type 1 */
871 #if CONFIG_USE_PRINTK_IN_CAR
872 printk_debug(" CTLRMaxDelay=%02x", CTLRMaxDelay);
874 print_debug(" CTLRMaxDelay="); print_debug_hex8(CTLRMaxDelay);
878 return (CTLRMaxDelay==0xae)?1:0;
882 #define DQS_READDIR 1
883 #define DQS_WRITEDIR 0
886 static void SetDQSDelayCSR(const struct mem_controller *ctrl, unsigned channel, unsigned bytelane, unsigned direction, unsigned dqs_delay)
887 { //ByteLane could be 0-8, last is for ECC
894 index = (bytelane>>2) + 1 + channel * 0x20 + (direction << 2);
899 shift <<= 3; // 8 bit
901 dword = pci_read_config32_index_wait(ctrl->f2, 0x98, index);
902 dword &= ~(0x3f<<shift);
903 dword |= (dqs_delay<<shift);
904 pci_write_config32_index_wait(ctrl->f2, 0x98, index, dword);
908 static void SetDQSDelayAllCSR(const struct mem_controller *ctrl, unsigned channel, unsigned direction, unsigned dqs_delay)
917 dword |= dqs_delay<<(i*8);
920 index = 1 + channel * 0x20 + direction * 4;
923 pci_write_config32_index_wait(ctrl->f2, 0x98, index + i, dword);
928 static unsigned MiddleDQS(unsigned min_d, unsigned max_d)
931 size_d = max_d-min_d;
932 if(size_d & 1) { //need round up
935 return ( min_d + (size_d>>1));
938 static inline void save_dqs_delay(unsigned channel, unsigned bytelane, unsigned direction, uint8_t *dqs_delay_a, uint8_t dqs_delay)
940 dqs_delay_a[channel * 2*9 + direction * 9 + bytelane] = dqs_delay;
943 static void WriteDQSTestPattern(unsigned addr_lo, unsigned pattern , uint8_t *buf_a)
945 WriteLNTestPattern(addr_lo, buf_a, (pattern+1) * 9);
948 static void ReadL18TestPattern(unsigned addr_lo)
950 //set fs and use fs prefix to access the mem
952 "movl %%fs:-128(%%esi), %%eax\n\t" //TestAddr cache line
953 "movl %%fs:-64(%%esi), %%eax\n\t" //+1
954 "movl %%fs:(%%esi), %%eax\n\t" //+2
955 "movl %%fs:64(%%esi), %%eax\n\t" //+3
957 "movl %%fs:-128(%%edi), %%eax\n\t" //+4
958 "movl %%fs:-64(%%edi), %%eax\n\t" //+5
959 "movl %%fs:(%%edi), %%eax\n\t" //+6
960 "movl %%fs:64(%%edi), %%eax\n\t" //+7
962 "movl %%fs:-128(%%ebx), %%eax\n\t" //+8
963 "movl %%fs:-64(%%ebx), %%eax\n\t" //+9
964 "movl %%fs:(%%ebx), %%eax\n\t" //+10
965 "movl %%fs:64(%%ebx), %%eax\n\t" //+11
967 "movl %%fs:-128(%%ecx), %%eax\n\t" //+12
968 "movl %%fs:-64(%%ecx), %%eax\n\t" //+13
969 "movl %%fs:(%%ecx), %%eax\n\t" //+14
970 "movl %%fs:64(%%ecx), %%eax\n\t" //+15
972 "movl %%fs:-128(%%edx), %%eax\n\t" //+16
973 "movl %%fs:-64(%%edx), %%eax\n\t" //+17
975 :: "a"(0), "b" (addr_lo+128+8*64), "c" (addr_lo+128+12*64), "d" (addr_lo +128+16*64), "S"(addr_lo+128), "D"(addr_lo+128+4*64)
980 static void ReadL9TestPattern(unsigned addr_lo)
983 //set fs and use fs prefix to access the mem
986 "movl %%fs:-128(%%ecx), %%eax\n\t" //TestAddr cache line
987 "movl %%fs:-64(%%ecx), %%eax\n\t" //+1
988 "movl %%fs:(%%ecx), %%eax\n\t" //+2
989 "movl %%fs:64(%%ecx), %%eax\n\t" //+3
991 "movl %%fs:-128(%%edx), %%eax\n\t" //+4
992 "movl %%fs:-64(%%edx), %%eax\n\t" //+5
993 "movl %%fs:(%%edx), %%eax\n\t" //+6
994 "movl %%fs:64(%%edx), %%eax\n\t" //+7
996 "movl %%fs:-128(%%ebx), %%eax\n\t" //+8
998 :: "a"(0), "b" (addr_lo+128+8*64), "c"(addr_lo+128), "d"(addr_lo+128+4*64)
1004 static void ReadDQSTestPattern(unsigned addr_lo, unsigned pattern)
1007 ReadL9TestPattern(addr_lo);
1010 ReadL18TestPattern(addr_lo);
1014 static void FlushDQSTestPattern_L9(unsigned addr_lo)
1017 "clflush %%fs:-128(%%ecx)\n\t"
1018 "clflush %%fs:-64(%%ecx)\n\t"
1019 "clflush %%fs:(%%ecx)\n\t"
1020 "clflush %%fs:64(%%ecx)\n\t"
1022 "clflush %%fs:-128(%%eax)\n\t"
1023 "clflush %%fs:-64(%%eax)\n\t"
1024 "clflush %%fs:(%%eax)\n\t"
1025 "clflush %%fs:64(%%eax)\n\t"
1027 "clflush %%fs:-128(%%ebx)\n\t"
1029 :: "b" (addr_lo+128+8*64), "c"(addr_lo+128), "a"(addr_lo+128+4*64)
1033 static __attribute__((noinline)) void FlushDQSTestPattern_L18(unsigned addr_lo)
1036 "clflush %%fs:-128(%%eax)\n\t"
1037 "clflush %%fs:-64(%%eax)\n\t"
1038 "clflush %%fs:(%%eax)\n\t"
1039 "clflush %%fs:64(%%eax)\n\t"
1041 "clflush %%fs:-128(%%edi)\n\t"
1042 "clflush %%fs:-64(%%edi)\n\t"
1043 "clflush %%fs:(%%edi)\n\t"
1044 "clflush %%fs:64(%%edi)\n\t"
1046 "clflush %%fs:-128(%%ebx)\n\t"
1047 "clflush %%fs:-64(%%ebx)\n\t"
1048 "clflush %%fs:(%%ebx)\n\t"
1049 "clflush %%fs:64(%%ebx)\n\t"
1051 "clflush %%fs:-128(%%ecx)\n\t"
1052 "clflush %%fs:-64(%%ecx)\n\t"
1053 "clflush %%fs:(%%ecx)\n\t"
1054 "clflush %%fs:64(%%ecx)\n\t"
1056 "clflush %%fs:-128(%%edx)\n\t"
1057 "clflush %%fs:-64(%%edx)\n\t"
1059 :: "b" (addr_lo+128+8*64), "c" (addr_lo+128+12*64), "d" (addr_lo +128+16*64), "a"(addr_lo+128), "D"(addr_lo+128+4*64)
1063 static void FlushDQSTestPattern(unsigned addr_lo, unsigned pattern )
1067 FlushDQSTestPattern_L9(addr_lo);
1070 FlushDQSTestPattern_L18(addr_lo);
1074 static unsigned CompareDQSTestPattern(unsigned channel, unsigned addr_lo, unsigned pattern, uint8_t *buf_a)
1077 unsigned bitmap = 0xff;
1082 uint32_t value_test;
1084 test_buf = (uint32_t *)buf_a;
1087 if(pattern && channel) {
1088 addr_lo += 8; //second channel
1093 for(i=0;i<9*64/4;i++) {
1095 "movl %%fs:(%1), %0\n\t"
1096 :"=b"(value): "a" (addr_lo)
1098 value_test = *test_buf;
1100 print_debug_dqs_pair("\t\t\t\t\t\ttest_buf= ", (unsigned)test_buf, " value = ", value_test, 7);
1101 print_debug_dqs_pair("\t\t\t\t\t\ttaddr_lo = ",addr_lo, " value = ", value, 7);
1103 for(j=0;j<4*8;j+=8) {
1104 if(((value>>j)&0xff) != ((value_test>>j)& 0xff)) {
1105 bitmap &= ~(1<<bytelane);
1111 print_debug_dqs("\t\t\t\t\t\tbitmap = ", bitmap, 7);
1114 if(pattern == 1) { //dual channel
1115 addr_lo += 8; //skip over other channel's data
1129 static unsigned TrainDQSPos(const struct mem_controller *ctrl, unsigned channel, unsigned Direction, unsigned Pattern, uint8_t *buf_a, uint8_t *dqs_delay_a, struct sys_info *sysinfo)
1133 unsigned BanksPresent;
1135 unsigned MutualCSPassW[48];
1143 unsigned RnkDlyFilterMax, RnkDlyFilterMin = 0;
1144 unsigned RnkDlySeqPassMax, RnkDlySeqPassMin = 0;
1149 print_debug_dqs("\t\t\tTrainDQSPos begin ", 0, 3);
1151 print_debug_addr("TrainDQSPos: MutualCSPassW[48] :", MutualCSPassW);
1153 for(DQSDelay=0; DQSDelay<48; DQSDelay++) {
1154 MutualCSPassW[DQSDelay] = 0xff; // Bitmapped status per delay setting, 0xff=All positions passing (1= PASS)
1157 for(ChipSel = 0; ChipSel < 8; ChipSel++) { //logical register chipselects 0..7
1158 print_debug_dqs("\t\t\t\tTrainDQSPos: 11 ChipSel ", ChipSel, 4);
1159 //FIXME: process 64MUXedMode
1160 if(!ChipSelPresent(ctrl, ChipSel, sysinfo)) continue;
1163 TestAddr = Get_MCTSysAddr(ctrl, ChipSel, sysinfo);
1165 print_debug_dqs("\t\t\t\tTrainDQSPos: 12 TestAddr ", TestAddr, 4);
1167 //set fs and use fs prefix to access the mem
1168 set_FSBASE(TestAddr>>24);
1170 if(Direction == DQS_READDIR) {
1171 print_debug_dqs("\t\t\t\tTrainDQSPos: 13 for read so write at first", 0, 4);
1172 WriteDQSTestPattern(TestAddr<<8, Pattern, buf_a);
1175 for(DQSDelay = 0; DQSDelay < 48; DQSDelay++ ){
1176 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 141 DQSDelay ", DQSDelay, 5);
1177 if(MutualCSPassW[DQSDelay] == 0) continue; //skip current delay value if other chipselects have failed all 8 bytelanes
1178 SetDQSDelayAllCSR(ctrl, channel, Direction, DQSDelay);
1179 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 142 MutualCSPassW ", MutualCSPassW[DQSDelay], 5);
1180 if(Direction == DQS_WRITEDIR) {
1181 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 143 for write", 0, 5);
1182 WriteDQSTestPattern(TestAddr<<8, Pattern, buf_a);
1184 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 144 Pattern ", Pattern, 5);
1185 ReadDQSTestPattern(TestAddr<<8, Pattern);
1186 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 145 MutualCSPassW ", MutualCSPassW[DQSDelay], 5);
1187 MutualCSPassW[DQSDelay] &= CompareDQSTestPattern(channel, TestAddr<<8, Pattern, buf_a); //0: fail, 1=pass
1188 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 146 MutualCSPassW ", MutualCSPassW[DQSDelay], 5);
1189 SetTargetWTIO(TestAddr);
1190 FlushDQSTestPattern(TestAddr<<8, Pattern);
1196 for(ByteLane = 0; ByteLane < 8; ByteLane++) {
1197 print_debug_dqs("\t\t\t\tTrainDQSPos: 31 ByteLane ",ByteLane, 4);
1199 LastTest = DQS_FAIL;
1200 RnkDlySeqPassMax = 0;
1201 RnkDlyFilterMax = 0;
1202 RnkDlyFilterMin = 0;
1203 for(DQSDelay=0; DQSDelay<48; DQSDelay++) {
1204 if(MutualCSPassW[DQSDelay] & (1<<ByteLane)) {
1206 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 321 DQSDelay ", DQSDelay, 5);
1207 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 322 MutualCSPassW ", MutualCSPassW[DQSDelay], 5);
1209 RnkDlySeqPassMax = DQSDelay;
1210 if(LastTest == DQS_FAIL) {
1211 RnkDlySeqPassMin = DQSDelay; //start sequential run
1213 if((RnkDlySeqPassMax - RnkDlySeqPassMin)>(RnkDlyFilterMax-RnkDlyFilterMin)){
1214 RnkDlyFilterMin = RnkDlySeqPassMin;
1215 RnkDlyFilterMax = RnkDlySeqPassMax;
1217 LastTest = DQS_PASS;
1220 LastTest = DQS_FAIL;
1223 print_debug_dqs("\t\t\t\tTrainDQSPos: 33 RnkDlySeqPassMax ", RnkDlySeqPassMax, 4);
1225 if(RnkDlySeqPassMax == 0) {
1226 Errors |= SB_NODQSPOS; // no passing window
1229 print_debug_dqs("\t\t\t\tTrainDQSPos: 34 RnkDlyFilterMax ", RnkDlyFilterMax, 4);
1230 print_debug_dqs("\t\t\t\tTrainDQSPos: 34 RnkDlyFilterMin ", RnkDlyFilterMin, 4);
1231 if((RnkDlyFilterMax - RnkDlyFilterMin)< MIN_DQS_WNDW){
1232 Errors |= SB_SMALLDQS;
1235 unsigned middle_dqs;
1236 middle_dqs = MiddleDQS(RnkDlyFilterMin, RnkDlyFilterMax);
1237 print_debug_dqs("\t\t\t\tTrainDQSPos: 35 middle_dqs ",middle_dqs, 4);
1238 SetDQSDelayCSR(ctrl, channel, ByteLane, Direction, middle_dqs);
1239 save_dqs_delay(channel, ByteLane, Direction, dqs_delay_a, middle_dqs);
1245 print_debug_dqs("\t\t\tTrainDQSPos: end", 0xff, 3);
1252 static unsigned TrainReadDQS(const struct mem_controller *ctrl, unsigned channel, unsigned pattern, uint8_t *buf_a, uint8_t *dqs_delay_a, struct sys_info *sysinfo)
1254 print_debug_dqs("\t\tTrainReadPos", 0, 2);
1255 return TrainDQSPos(ctrl, channel, DQS_READDIR, pattern, buf_a, dqs_delay_a, sysinfo);
1258 static unsigned TrainWriteDQS(const struct mem_controller *ctrl, unsigned channel, unsigned pattern, uint8_t *buf_a, uint8_t *dqs_delay_a, struct sys_info *sysinfo)
1260 print_debug_dqs("\t\tTrainWritePos", 0, 2);
1261 return TrainDQSPos(ctrl, channel, DQS_WRITEDIR, pattern, buf_a, dqs_delay_a, sysinfo);
1266 static unsigned TrainDQSRdWrPos(const struct mem_controller *ctrl, struct sys_info *sysinfo)
1268 static const uint32_t TestPatternJD1a[] = {
1269 0x00000000,0x00000000,0xFFFFFFFF,0xFFFFFFFF, // QW0-1, ALL-EVEN
1270 0x00000000,0x00000000,0x00000000,0x00000000, // QW2-3, ALL-EVEN
1271 0x00000000,0x00000000,0xFFFFFFFF,0xFFFFFFFF, // QW4-5, ALL-EVEN
1272 0x00000000,0x00000000,0x00000000,0x00000000, // QW6-7, ALL-EVEN
1273 0xFeFeFeFe,0xFeFeFeFe,0x01010101,0x01010101, // QW0-1, DQ0-ODD
1274 0xFeFeFeFe,0xFeFeFeFe,0x01010101,0x01010101, // QW2-3, DQ0-ODD
1275 0x01010101,0x01010101,0xFeFeFeFe,0xFeFeFeFe, // QW4-5, DQ0-ODD
1276 0xFeFeFeFe,0xFeFeFeFe,0x01010101,0x01010101, // QW6-7, DQ0-ODD
1277 0x02020202,0x02020202,0x02020202,0x02020202, // QW0-1, DQ1-ODD
1278 0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, // QW2-3, DQ1-ODD
1279 0xFdFdFdFd,0xFdFdFdFd,0x02020202,0x02020202, // QW4-5, DQ1-ODD
1280 0x02020202,0x02020202,0x02020202,0x02020202, // QW6-7, DQ1-ODD
1281 0x04040404,0x04040404,0xfBfBfBfB,0xfBfBfBfB, // QW0-1, DQ2-ODD
1282 0x04040404,0x04040404,0x04040404,0x04040404, // QW2-3, DQ2-ODD
1283 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW4-5, DQ2-ODD
1284 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW6-7, DQ2-ODD
1285 0x08080808,0x08080808,0xF7F7F7F7,0xF7F7F7F7, // QW0-1, DQ3-ODD
1286 0x08080808,0x08080808,0x08080808,0x08080808, // QW2-3, DQ3-ODD
1287 0xF7F7F7F7,0xF7F7F7F7,0x08080808,0x08080808, // QW4-5, DQ3-ODD
1288 0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, // QW6-7, DQ3-ODD
1289 0x10101010,0x10101010,0x10101010,0x10101010, // QW0-1, DQ4-ODD
1290 0xeFeFeFeF,0xeFeFeFeF,0x10101010,0x10101010, // QW2-3, DQ4-ODD
1291 0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, // QW4-5, DQ4-ODD
1292 0xeFeFeFeF,0xeFeFeFeF,0x10101010,0x10101010, // QW6-7, DQ4-ODD
1293 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW0-1, DQ5-ODD
1294 0xdFdFdFdF,0xdFdFdFdF,0x20202020,0x20202020, // QW2-3, DQ5-ODD
1295 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW4-5, DQ5-ODD
1296 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW6-7, DQ5-ODD
1297 0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW0-1, DQ6-ODD
1298 0x40404040,0x40404040,0xBfBfBfBf,0xBfBfBfBf, // QW2-3, DQ6-ODD
1299 0x40404040,0x40404040,0xBfBfBfBf,0xBfBfBfBf, // QW4-5, DQ6-ODD
1300 0x40404040,0x40404040,0xBfBfBfBf,0xBfBfBfBf, // QW6-7, DQ6-ODD
1301 0x80808080,0x80808080,0x7F7F7F7F,0x7F7F7F7F, // QW0-1, DQ7-ODD
1302 0x80808080,0x80808080,0x7F7F7F7F,0x7F7F7F7F, // QW2-3, DQ7-ODD
1303 0x80808080,0x80808080,0x7F7F7F7F,0x7F7F7F7F, // QW4-5, DQ7-ODD
1304 0x80808080,0x80808080,0x80808080,0x80808080 // QW6-7, DQ7-ODD
1306 static const uint32_t TestPatternJD1b[] = {
1307 0x00000000,0x00000000,0x00000000,0x00000000, // QW0,CHA-B, ALL-EVEN
1308 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, // QW1,CHA-B, ALL-EVEN
1309 0x00000000,0x00000000,0x00000000,0x00000000, // QW2,CHA-B, ALL-EVEN
1310 0x00000000,0x00000000,0x00000000,0x00000000, // QW3,CHA-B, ALL-EVEN
1311 0x00000000,0x00000000,0x00000000,0x00000000, // QW4,CHA-B, ALL-EVEN
1312 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, // QW5,CHA-B, ALL-EVEN
1313 0x00000000,0x00000000,0x00000000,0x00000000, // QW6,CHA-B, ALL-EVEN
1314 0x00000000,0x00000000,0x00000000,0x00000000, // QW7,CHA-B, ALL-EVEN
1315 0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, // QW0,CHA-B, DQ0-ODD
1316 0x01010101,0x01010101,0x01010101,0x01010101, // QW1,CHA-B, DQ0-ODD
1317 0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, // QW2,CHA-B, DQ0-ODD
1318 0x01010101,0x01010101,0x01010101,0x01010101, // QW3,CHA-B, DQ0-ODD
1319 0x01010101,0x01010101,0x01010101,0x01010101, // QW4,CHA-B, DQ0-ODD
1320 0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, // QW5,CHA-B, DQ0-ODD
1321 0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, // QW6,CHA-B, DQ0-ODD
1322 0x01010101,0x01010101,0x01010101,0x01010101, // QW7,CHA-B, DQ0-ODD
1323 0x02020202,0x02020202,0x02020202,0x02020202, // QW0,CHA-B, DQ1-ODD
1324 0x02020202,0x02020202,0x02020202,0x02020202, // QW1,CHA-B, DQ1-ODD
1325 0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, // QW2,CHA-B, DQ1-ODD
1326 0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, // QW3,CHA-B, DQ1-ODD
1327 0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, // QW4,CHA-B, DQ1-ODD
1328 0x02020202,0x02020202,0x02020202,0x02020202, // QW5,CHA-B, DQ1-ODD
1329 0x02020202,0x02020202,0x02020202,0x02020202, // QW6,CHA-B, DQ1-ODD
1330 0x02020202,0x02020202,0x02020202,0x02020202, // QW7,CHA-B, DQ1-ODD
1331 0x04040404,0x04040404,0x04040404,0x04040404, // QW0,CHA-B, DQ2-ODD
1332 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW1,CHA-B, DQ2-ODD
1333 0x04040404,0x04040404,0x04040404,0x04040404, // QW2,CHA-B, DQ2-ODD
1334 0x04040404,0x04040404,0x04040404,0x04040404, // QW3,CHA-B, DQ2-ODD
1335 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW4,CHA-B, DQ2-ODD
1336 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW5,CHA-B, DQ2-ODD
1337 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW6,CHA-B, DQ2-ODD
1338 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW7,CHA-B, DQ2-ODD
1339 0x08080808,0x08080808,0x08080808,0x08080808, // QW0,CHA-B, DQ3-ODD
1340 0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, // QW1,CHA-B, DQ3-ODD
1341 0x08080808,0x08080808,0x08080808,0x08080808, // QW2,CHA-B, DQ3-ODD
1342 0x08080808,0x08080808,0x08080808,0x08080808, // QW3,CHA-B, DQ3-ODD
1343 0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, // QW4,CHA-B, DQ3-ODD
1344 0x08080808,0x08080808,0x08080808,0x08080808, // QW5,CHA-B, DQ3-ODD
1345 0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, // QW6,CHA-B, DQ3-ODD
1346 0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, // QW7,CHA-B, DQ3-ODD
1347 0x10101010,0x10101010,0x10101010,0x10101010, // QW0,CHA-B, DQ4-ODD
1348 0x10101010,0x10101010,0x10101010,0x10101010, // QW1,CHA-B, DQ4-ODD
1349 0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, // QW2,CHA-B, DQ4-ODD
1350 0x10101010,0x10101010,0x10101010,0x10101010, // QW3,CHA-B, DQ4-ODD
1351 0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, // QW4,CHA-B, DQ4-ODD
1352 0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, // QW5,CHA-B, DQ4-ODD
1353 0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, // QW6,CHA-B, DQ4-ODD
1354 0x10101010,0x10101010,0x10101010,0x10101010, // QW7,CHA-B, DQ4-ODD
1355 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW0,CHA-B, DQ5-ODD
1356 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW1,CHA-B, DQ5-ODD
1357 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW2,CHA-B, DQ5-ODD
1358 0x20202020,0x20202020,0x20202020,0x20202020, // QW3,CHA-B, DQ5-ODD
1359 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW4,CHA-B, DQ5-ODD
1360 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW5,CHA-B, DQ5-ODD
1361 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW6,CHA-B, DQ5-ODD
1362 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW7,CHA-B, DQ5-ODD
1363 0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW0,CHA-B, DQ6-ODD
1364 0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW1,CHA-B, DQ6-ODD
1365 0x40404040,0x40404040,0x40404040,0x40404040, // QW2,CHA-B, DQ6-ODD
1366 0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW3,CHA-B, DQ6-ODD
1367 0x40404040,0x40404040,0x40404040,0x40404040, // QW4,CHA-B, DQ6-ODD
1368 0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW5,CHA-B, DQ6-ODD
1369 0x40404040,0x40404040,0x40404040,0x40404040, // QW6,CHA-B, DQ6-ODD
1370 0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW7,CHA-B, DQ6-ODD
1371 0x80808080,0x80808080,0x80808080,0x80808080, // QW0,CHA-B, DQ7-ODD
1372 0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F, // QW1,CHA-B, DQ7-ODD
1373 0x80808080,0x80808080,0x80808080,0x80808080, // QW2,CHA-B, DQ7-ODD
1374 0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F, // QW3,CHA-B, DQ7-ODD
1375 0x80808080,0x80808080,0x80808080,0x80808080, // QW4,CHA-B, DQ7-ODD
1376 0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F, // QW5,CHA-B, DQ7-ODD
1377 0x80808080,0x80808080,0x80808080,0x80808080, // QW6,CHA-B, DQ7-ODD
1378 0x80808080,0x80808080,0x80808080,0x80808080 // QW7,CHA-B, DQ7-ODD
1380 uint8_t pattern_buf_x[64 * 18 + 16]; // We need to two cache line So have more 16 bytes to keep 16 byte alignment */
1389 unsigned DQSWrDelay;
1390 unsigned is_Width128 = sysinfo->meminfo[ctrl->node_id].is_Width128;
1391 uint8_t *dqs_delay_a = &sysinfo->dqs_delay_a[ctrl->node_id * 2*2*9]; //channel 2, direction 2 , bytelane *9
1400 dword = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
1401 ecc_bit = dword & DCL_DimmEccEn;
1402 dword &= ~(DCL_DimmEccEn);
1403 pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dword);
1406 buf_a = (uint8_t *)(((uint32_t)(&pattern_buf_x[0]) + 0x10) & (~0xf));
1410 for(i=0;i<16*18;i++) {
1411 *((uint32_t *)(buf_a + i*4)) = TestPatternJD1b[i];
1416 for(i=0; i<16*9;i++) {
1417 *((uint32_t *)(buf_a + i*4)) = TestPatternJD1a[i];
1422 print_debug_dqs("\r\nTrainDQSRdWrPos: 0 ctrl ", ctrl->node_id, 0);
1424 print_debug_addr("TrainDQSRdWrPos: buf_a:", buf_a);
1429 if (!(sysinfo->meminfo[ctrl->node_id].dimm_mask & 0x0F) &&
1430 (sysinfo->meminfo[ctrl->node_id].dimm_mask & 0xF0)) { /* channelB only? */
1434 while( (channel<2) && (!Errors)) {
1435 print_debug_dqs("\tTrainDQSRdWrPos: 1 channel ",channel, 1);
1436 for(DQSWrDelay = 0; DQSWrDelay < 48; DQSWrDelay++) {
1438 SetDQSDelayAllCSR(ctrl, channel, DQS_WRITEDIR, DQSWrDelay);
1439 print_debug_dqs("\t\tTrainDQSRdWrPos: 21 DQSWrDelay ", DQSWrDelay, 2);
1440 err= TrainReadDQS(ctrl, channel, pattern, buf_a, dqs_delay_a, sysinfo);
1441 print_debug_dqs("\t\tTrainDQSRdWrPos: 22 err ",err, 2);
1446 print_debug_dqs("\tTrainDQSRdWrPos: 3 DQSWrDelay ", DQSWrDelay, 1);
1448 if(DQSWrDelay < 48) {
1449 Errors = TrainWriteDQS(ctrl, channel, pattern, buf_a, dqs_delay_a, sysinfo);
1450 print_debug_dqs("\tTrainDQSRdWrPos: 4 Errors ", Errors, 1);
1455 //FIXME: 64MuxMode??
1456 channel++; // skip channel if 64-bit mode
1461 dword = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
1462 dword &= ~(DCL_DimmEccEn);
1464 pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dword);
1470 //restore SSE2 setting
1473 print_debug_dqs("TrainDQSRdWrPos: ", 5, 0);
1478 static inline uint8_t get_dqs_delay(unsigned channel, unsigned bytelane, unsigned direction, uint8_t *dqs_delay_a)
1480 return dqs_delay_a[channel * 2*9 + direction * 9 + bytelane];
1483 static unsigned CalcEccDQSPos(unsigned channel,unsigned ByteLane0, unsigned ByteLane1, unsigned InterFactor, unsigned Direction, uint8_t *dqs_delay_a)
1484 /* InterFactor: 0: 100% ByteLane 0
1485 0x80: 50% between ByteLane 0 and 1
1486 0xff: 99.6% ByteLane 1 and 0.4% like 0
1489 unsigned DQSDelay0, DQSDelay1;
1492 DQSDelay0 = get_dqs_delay(channel, ByteLane0, Direction, dqs_delay_a);
1493 DQSDelay1 = get_dqs_delay(channel, ByteLane1, Direction, dqs_delay_a);
1495 if(DQSDelay0>DQSDelay1) {
1496 DQSDelay = DQSDelay0 - DQSDelay1;
1497 InterFactor = 0xff - InterFactor;
1500 DQSDelay = DQSDelay1 - DQSDelay0;
1503 DQSDelay *= InterFactor;
1505 DQSDelay >>= 8; // /255
1507 if(DQSDelay0>DQSDelay1) {
1508 DQSDelay += DQSDelay1;
1511 DQSDelay += DQSDelay0;
1518 static void SetEccDQSRdWrPos(const struct mem_controller *ctrl, struct sys_info *sysinfo)
1523 unsigned lane0, lane1, ratio;
1526 unsigned direction[] = { DQS_READDIR, DQS_WRITEDIR };
1528 uint8_t *dqs_delay_a = &sysinfo->dqs_delay_a[ctrl->node_id * 2*2*9]; //channel 2, direction 2 , bytelane *9
1532 for(channel = 0; channel < 2; channel++) {
1534 Direction = direction[i];
1535 lane0 = 4; lane1 = 5; ratio = 0;
1536 dqs_delay = CalcEccDQSPos(channel, lane0, lane1, ratio, Direction, dqs_delay_a);
1537 print_debug_dqs_pair("\t\tSetEccDQSRdWrPos: channel ", channel, Direction==DQS_READDIR? " R dqs_delay":" W dqs_delay", dqs_delay, 2);
1538 SetDQSDelayCSR(ctrl, channel, ByteLane, Direction, dqs_delay);
1539 save_dqs_delay(channel, ByteLane, Direction, dqs_delay_a, dqs_delay);
1544 static unsigned train_DqsRcvrEn(const struct mem_controller *ctrl, unsigned Pass, struct sys_info *sysinfo)
1546 print_debug_dqs("\r\ntrain_DqsRcvrEn: begin ctrl ", ctrl->node_id, 0);
1547 if(TrainRcvrEn(ctrl, Pass, sysinfo)) {
1550 print_debug_dqs("\r\ntrain_DqsRcvrEn: end ctrl ", ctrl->node_id, 0);
1554 static unsigned train_DqsPos(const struct mem_controller *ctrl, struct sys_info *sysinfo)
1556 print_debug_dqs("\r\ntrain_DqsPos: begin ctrl ", ctrl->node_id, 0);
1557 if(TrainDQSRdWrPos(ctrl, sysinfo) != 0) {
1558 print_err("\r\nDQS Training Rd Wr failed ctrl"); print_err_hex8(ctrl->node_id); print_err("\r\n");
1562 SetEccDQSRdWrPos(ctrl, sysinfo);
1564 print_debug_dqs("\r\ntrain_DqsPos: end ctrl ", ctrl->node_id, 0);
1569 #if K8_REV_F_SUPPORT_F0_F1_WORKAROUND == 1
1570 static void f0_svm_workaround(int controllers, const struct mem_controller *ctrl, tsc_t *tsc0, struct sys_info *sysinfo)
1573 unsigned cpu_f0_f1[8];
1576 print_debug_addr("dqs_timing: tsc1[8] :", tsc1);
1578 for(i = 0; i < controllers; i++) {
1579 if (!sysinfo->ctrl_present[i])
1582 /* Skip everything if I don't have any memory on this controller */
1583 if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
1587 cpu_f0_f1[i] = is_cpu_pre_f2_in_bsp(i);
1589 if(!cpu_f0_f1[i]) continue;
1591 dword = pci_read_config32(ctrl[i].f2, DRAM_CTRL);
1592 dword &= ~DC_DqsRcvEnTrain;
1593 pci_write_config32(ctrl[i].f2, DRAM_CTRL, dword);
1595 dword = pci_read_config32(ctrl[i].f2, DRAM_INIT);
1596 dword |= DI_EnDramInit;
1597 pci_write_config32(ctrl[i].f2, DRAM_INIT, dword);
1598 dword &= ~DI_EnDramInit;
1599 pci_write_config32(ctrl[i].f2, DRAM_INIT, dword);
1602 print_debug_dqs_tsc("begin: tsc1", i, tsc1[i].hi, tsc1[i].lo, 2);
1604 dword = tsc1[i].lo + tsc0[i].lo;
1605 if((dword<tsc1[i].lo) || (dword<tsc0[i].lo)) {
1609 tsc1[i].hi+= tsc0[i].hi;
1611 print_debug_dqs_tsc("end : tsc1", i, tsc1[i].hi, tsc1[i].lo, 2);
1615 for(i = 0; i < controllers; i++) {
1616 if (!sysinfo->ctrl_present[i])
1619 /* Skip everything if I don't have any memory on this controller */
1620 if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
1622 if(!cpu_f0_f1[i]) continue;
1628 } while ((tsc1[i].hi>tsc.hi) || ((tsc1[i].hi==tsc.hi) && (tsc1[i].lo>tsc.lo)));
1630 print_debug_dqs_tsc("end : tsc ", i, tsc.hi, tsc.lo, 2);
1638 /* setting variable mtrr, comes from linux kernel source */
1639 static void set_var_mtrr_dqs(
1640 unsigned int reg, unsigned long basek, unsigned long sizek,
1641 unsigned char type, unsigned address_bits)
1644 unsigned address_mask_high;
1646 address_mask_high = ((1u << (address_bits - 32u)) - 1u);
1648 base.hi = basek >> 22;
1649 base.lo = basek << 10;
1651 if (sizek < 4*1024*1024) {
1652 mask.hi = address_mask_high;
1653 mask.lo = ~((sizek << 10) -1);
1656 mask.hi = address_mask_high & (~((sizek >> 22) -1));
1665 zero.lo = zero.hi = 0;
1666 /* The invalid bit is kept in the mask, so we simply clear the
1667 relevant mask register to disable a range. */
1668 wrmsr (MTRRphysMask_MSR(reg), zero);
1670 /* Bit 32-35 of MTRRphysMask should be set to 1 */
1673 wrmsr (MTRRphysBase_MSR(reg), base);
1674 wrmsr (MTRRphysMask_MSR(reg), mask);
1679 /* fms: find most sigificant bit set, stolen from Linux Kernel Source. */
1680 static inline unsigned int fms(unsigned int x)
1684 __asm__("bsrl %1,%0\n\t"
1687 "1:" : "=r" (r) : "g" (x));
1691 /* fms: find least sigificant bit set */
1692 static inline unsigned int fls(unsigned int x)
1696 __asm__("bsfl %1,%0\n\t"
1699 "1:" : "=r" (r) : "g" (x));
1703 static unsigned int range_to_mtrr(unsigned int reg,
1704 unsigned long range_startk, unsigned long range_sizek,
1705 unsigned long next_range_startk, unsigned char type, unsigned address_bits)
1707 if (!range_sizek || (reg >= 8)) {
1710 while(range_sizek) {
1711 unsigned long max_align, align;
1712 unsigned long sizek;
1713 /* Compute the maximum size I can make a range */
1714 max_align = fls(range_startk);
1715 align = fms(range_sizek);
1716 if (align > max_align) {
1720 #if MEM_TRAIN_SEQ != 1
1721 #if CONFIG_USE_PRINTK_IN_CAR
1722 printk_debug("Setting variable MTRR %d, base: %4dMB, range: %4dMB, type %s\r\n",
1723 reg, range_startk >>10, sizek >> 10,
1724 (type==MTRR_TYPE_UNCACHEABLE)?"UC":
1725 ((type==MTRR_TYPE_WRBACK)?"WB":"Other")
1728 print_debug("Setting variable MTRR "); print_debug_hex8(reg); print_debug(", base: "); print_debug_hex16(range_startk>>10);
1729 print_debug("MB, range: "); print_debug_hex16(sizek >> 10); print_debug("MB, type ");
1730 print_debug( (type==MTRR_TYPE_UNCACHEABLE)?"UC\r\n":
1731 ((type==MTRR_TYPE_WRBACK)?"WB\r\n":"Other\r\n")
1735 set_var_mtrr_dqs(reg++, range_startk, sizek, type, address_bits);
1736 range_startk += sizek;
1737 range_sizek -= sizek;
1744 static void set_top_mem_ap(unsigned tom_k, unsigned tom2_k)
1748 /* Now set top of memory */
1749 msr.lo = (tom2_k & 0x003fffff) << 10;
1750 msr.hi = (tom2_k & 0xffc00000) >> 22;
1751 wrmsr(TOP_MEM2, msr);
1753 msr.lo = (tom_k & 0x003fffff) << 10;
1754 msr.hi = (tom_k & 0xffc00000) >> 22;
1755 wrmsr(TOP_MEM, msr);
1758 static void setup_mtrr_dqs(unsigned tom_k, unsigned tom2_k){
1763 //still enable from cache_as_ram.inc
1764 msr = rdmsr(SYSCFG_MSR);
1765 msr.lo |= SYSCFG_MSR_MtrrFixDramModEn;
1766 wrmsr(SYSCFG_MSR,msr);
1769 //[0,512k), [512k, 640k)
1770 msr.hi = 0x1e1e1e1e;
1776 reg = range_to_mtrr(2, 0, tom_k,4*1024*1024, MTRR_TYPE_WRBACK, 40);
1780 //enable tom2 and type
1781 msr = rdmsr(SYSCFG_MSR);
1782 msr.lo |= (1<<21) | (1<<22); //MtrrTom2En and Tom2ForceMemTypeWB
1783 wrmsr(SYSCFG_MSR, msr);
1788 static void clear_mtrr_dqs(unsigned tom2_k){
1792 //still enable from cache_as_ram.inc
1793 msr = rdmsr(SYSCFG_MSR);
1794 msr.lo |= SYSCFG_MSR_MtrrFixDramModEn;
1795 wrmsr(SYSCFG_MSR,msr);
1797 //[0,512k), [512k, 640k)
1804 for(i=0x204;i<0x210;i++) {
1810 //enable tom2 and type
1811 msr = rdmsr(SYSCFG_MSR);
1812 msr.lo &= ~((1<<21) | (1<<22)); //MtrrTom2En and Tom2ForceMemTypeWB
1813 wrmsr(SYSCFG_MSR, msr);
1817 static void set_htic_bit(unsigned i, unsigned val, unsigned bit)
1820 dword = pci_read_config32(PCI_DEV(0, 0x18+i, 0), HT_INIT_CONTROL);
1822 dword |= ((val & 1) <<bit);
1823 pci_write_config32(PCI_DEV(0, 0x18+i, 0), HT_INIT_CONTROL, dword);
1827 static unsigned get_htic_bit(unsigned i, unsigned bit)
1830 dword = pci_read_config32(PCI_DEV(0, 0x18+i, 0), HT_INIT_CONTROL);
1835 static void wait_till_sysinfo_in_ram(void)
1838 if(get_htic_bit(0, 9)) return;
1842 static void set_sysinfo_in_ram(unsigned val)
1844 set_htic_bit(0, val, 9);
1848 #if MEM_TRAIN_SEQ == 0
1851 #if K8_REV_F_SUPPORT_F0_F1_WORKAROUND == 1
1852 static void dqs_timing(int controllers, const struct mem_controller *ctrl, tsc_t *tsc0, struct sys_info *sysinfo)
1854 static void dqs_timing(int controllers, const struct mem_controller *ctrl, struct sys_info *sysinfo)
1861 //need to enable mtrr, so dqs training could access the test address
1862 setup_mtrr_dqs(sysinfo->tom_k, sysinfo->tom2_k);
1864 for(i = 0; i < controllers; i++) {
1865 if (!sysinfo->ctrl_present[ i ])
1868 /* Skip everything if I don't have any memory on this controller */
1869 if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
1871 fill_mem_cs_sysinfo(i, ctrl+i, sysinfo);
1875 for(i = 0; i < controllers; i++) {
1876 if (!sysinfo->ctrl_present[ i ])
1879 /* Skip everything if I don't have any memory on this controller */
1880 if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
1882 print_debug("DQS Training:RcvrEn:Pass1: ");
1883 print_debug_hex8(i);
1884 if(train_DqsRcvrEn(ctrl+i, 1, sysinfo)) goto out;
1885 print_debug(" done\r\n");
1889 #if K8_REV_F_SUPPORT_F0_F1_WORKAROUND == 1
1890 f0_svm_workaround(controllers, ctrl, tsc0, sysinfo);
1894 for(i = 0; i < controllers; i++) {
1895 if (!sysinfo->ctrl_present[i])
1898 /* Skip everything if I don't have any memory on this controller */
1899 if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
1901 print_debug("DQS Training:DQSPos: ");
1902 print_debug_hex8(i);
1903 if(train_DqsPos(ctrl+i, sysinfo)) goto out;
1904 print_debug(" done\r\n");
1908 for(i = 0; i < controllers; i++) {
1909 if (!sysinfo->ctrl_present[i])
1912 /* Skip everything if I don't have any memory on this controller */
1913 if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
1915 print_debug("DQS Training:RcvrEn:Pass2: ");
1916 print_debug_hex8(i);
1917 if(train_DqsRcvrEn(ctrl+i, 2, sysinfo)) goto out;
1918 print_debug(" done\r\n");
1919 sysinfo->mem_trained[i]=1;
1924 clear_mtrr_dqs(sysinfo->tom2_k);
1928 print_debug_dqs_tsc_x("DQS Training:tsc", i, tsc[i].hi, tsc[i].lo);
1938 #if MEM_TRAIN_SEQ > 0
1940 static void dqs_timing(int i, const struct mem_controller *ctrl, struct sys_info *sysinfo, unsigned v)
1947 if(sysinfo->mem_trained[i] != 0x80) return;
1949 #if MEM_TRAIN_SEQ == 1
1950 //need to enable mtrr, so dqs training could access the test address
1951 setup_mtrr_dqs(sysinfo->tom_k, sysinfo->tom2_k);
1954 fill_mem_cs_sysinfo(i, ctrl, sysinfo);
1959 print_debug("set DQS timing:RcvrEn:Pass1: ");
1960 print_debug_hex8(i);
1962 if(train_DqsRcvrEn(ctrl, 1, sysinfo)) {
1963 sysinfo->mem_trained[i]=0x81; //
1968 print_debug(" done\r\n");
1970 print_debug("set DQS timing:DQSPos: ");
1971 print_debug_hex8(i);
1974 if(train_DqsPos(ctrl, sysinfo)) {
1975 sysinfo->mem_trained[i]=0x82; //
1980 print_debug(" done\r\n");
1983 print_debug("set DQS timing:RcvrEn:Pass2: ");
1984 print_debug_hex8(i);
1986 if(train_DqsRcvrEn(ctrl, 2, sysinfo)){
1987 sysinfo->mem_trained[i]=0x83; //
1992 print_debug(" done\r\n");
1998 #if MEM_TRAIN_SEQ == 1
1999 clear_mtrr_dqs(sysinfo->tom2_k);
2003 for(ii=0;ii<4;ii++) {
2004 print_debug_dqs_tsc_x("Total DQS Training : tsc ", ii, tsc[ii].hi, tsc[ii].lo);
2008 if(sysinfo->mem_trained[i] == 0x80) {
2009 sysinfo->mem_trained[i]=1;
2015 #if MEM_TRAIN_SEQ == 1
2016 static void train_ram(unsigned nodeid, struct sys_info *sysinfo, struct sys_info *sysinfox)
2018 dqs_timing(nodeid, &sysinfo->ctrl[nodeid], sysinfo, 0); // keep the output tidy
2019 // memcpy(&sysinfox->dqs_rcvr_dly_a[nodeid * 2 * 8],&sysinfo->dqs_rcvr_dly_a[nodeid * 2 * 8], 2*8);
2020 // memcpy(&sysinfox->dqs_delay_a[nodeid * 2 * 2 * 9], &sysinfo->dqs_delay_a[nodeid * 2 * 2 * 9], 2 * 2 * 9);
2021 sysinfox->mem_trained[nodeid] = sysinfo->mem_trained[nodeid];
2024 static void copy_and_run_ap_code_in_car(unsigned ret_addr);
2025 static inline void train_ram_on_node(unsigned nodeid, unsigned coreid, struct sys_info *sysinfo, unsigned retcall)
2027 if(coreid) return; // only do it on core0
2028 struct sys_info *sysinfox = ((CONFIG_LB_MEM_TOPK<<10) - DCACHE_RAM_GLOBAL_VAR_SIZE);
2029 wait_till_sysinfo_in_ram(); // use pci to get it
2031 if(sysinfox->mem_trained[nodeid] == 0x80) {
2033 sysinfo->tom_k = sysinfox->tom_k;
2034 sysinfo->tom2_k = sysinfox->tom2_k;
2035 sysinfo->meminfo[nodeid].is_Width128 = sysinfox->meminfo[nodeid].is_Width128;
2036 sysinfo->mem_trained[nodeid] = sysinfox->mem_trained[nodeid];
2037 memcpy(&sysinfo->ctrl[nodeid], &sysinfox->ctrl[nodeid], sizeof(struct mem_controller));
2039 memcpy(sysinfo, sysinfox, DCACHE_RAM_GLOBAL_VAR_SIZE);
2041 set_top_mem_ap(sysinfo->tom_k, sysinfo->tom2_k); // keep the ap's tom consistent with bsp's
2042 #if CONFIG_AP_CODE_IN_CAR == 0
2043 print_debug("CODE IN ROM AND RUN ON NODE:"); print_debug_hex8(nodeid); print_debug("\r\n");
2044 train_ram(nodeid, sysinfo, sysinfox);
2046 /* Can copy dqs_timing to ap cache and run from cache?
2047 * we need coreboot_ap_car.rom? and treat it as coreboot_ram.rom for ap ?
2049 copy_and_run_ap_code_in_car(retcall);
2050 // will go back by jump