printk_foo -> printk(BIOS_FOO, ...)
[coreboot.git] / src / northbridge / amd / amdk8 / raminit_f_dqs.c
1 /*
2  * This file is part of the coreboot project.
3  *
4  * Copyright (C) 2005 YingHai Lu
5  * Copyright (C) 2008 Advanced Micro Devices, Inc.
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; version 2 of the License.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
19 */
20
21 //0: mean no debug info
22 #define DQS_TRAIN_DEBUG 0
23
24 static inline void print_debug_dqs(const char *str, unsigned val, unsigned level)
25 {
26 #if DQS_TRAIN_DEBUG > 0
27         if(DQS_TRAIN_DEBUG > level) {
28                 printk(BIOS_DEBUG, "%s%x\r\n", str, val);
29         }
30 #endif
31 }
32
33 static inline void print_debug_dqs_pair(const char *str, unsigned val, const char *str2, unsigned val2, unsigned level)
34 {
35 #if DQS_TRAIN_DEBUG > 0
36         if(DQS_TRAIN_DEBUG > level) {
37                 printk(BIOS_DEBUG, "%s%08x%s%08x\r\n", str, val, str2, val2);
38         }
39 #endif
40 }
41
42 static inline void print_debug_dqs_tsc(const char *str, unsigned i, unsigned val, unsigned val2, unsigned level)
43 {
44 #if DQS_TRAIN_DEBUG > 0
45         if(DQS_TRAIN_DEBUG > level) {
46                 printk(BIOS_DEBUG, "%s[%02x]=%08x%08x\r\n", str, i, val, val2);
47         }
48 #endif
49 }
50
51 static inline void print_debug_dqs_tsc_x(const char *str, unsigned i, unsigned val, unsigned val2)
52 {
53         printk(BIOS_DEBUG, "%s[%02x]=%08x%08x\r\n", str, i, val, val2);
54
55 }
56
57 static void fill_mem_cs_sysinfo(unsigned nodeid, const struct mem_controller *ctrl, struct sys_info *sysinfo)
58 {
59
60         int i;
61         sysinfo->mem_base[nodeid] = pci_read_config32(ctrl->f1, 0x40 + (nodeid<<3));
62
63         for(i=0;i<8; i++) {
64                 sysinfo->cs_base[nodeid*8+i] = pci_read_config32(ctrl->f2, 0x40 + (i<<2));
65         }
66
67         sysinfo->hole_reg[nodeid] = pci_read_config32(ctrl->f1, 0xf0);
68
69 }
70 static unsigned Get_MCTSysAddr(const struct mem_controller *ctrl,  unsigned cs_idx, struct sys_info *sysinfo)
71 {
72         uint32_t dword;
73         uint32_t mem_base;
74         unsigned nodeid = ctrl->node_id;
75
76 #if CONFIG_HW_MEM_HOLE_SIZEK != 0
77         uint32_t hole_reg;
78 #endif
79
80         //get the local base addr of the chipselect
81         dword = sysinfo->cs_base[nodeid * 8 + cs_idx];
82         dword &= 0xfffffff0;
83
84         //sys addr= node base + local cs base
85         mem_base = sysinfo->mem_base[nodeid];
86         mem_base &= 0xffff0000;
87
88         dword += mem_base;
89 #if CONFIG_HW_MEM_HOLE_SIZEK != 0
90         hole_reg = sysinfo->hole_reg[nodeid];
91         if(hole_reg & 1) {
92                 unsigned hole_startk;
93                 hole_startk = (hole_reg & (0xff<<24)) >> 10;
94                 if( (dword >= (hole_startk<<2)) && (dword < ((4*1024*1024)<<2))) {
95                         dword += ((4*1024*1024 - hole_startk)<<2);
96                 }
97         }
98 #endif
99
100         //add 1MB offset to avoid compat area
101         dword += (1<<(20-8));
102
103         //So final result is upper 32 bit addr
104
105         return dword;
106
107 }
108
109 static unsigned Get_RcvrSysAddr(const struct mem_controller * ctrl, unsigned channel, unsigned cs_idx, struct sys_info *sysinfo)
110 {
111         return Get_MCTSysAddr(ctrl, cs_idx, sysinfo);
112
113 }
114
115 static inline unsigned long read_cr4(void)
116 {
117         unsigned long cr4;
118         asm volatile ("movl %%cr4, %0" : "=r" (cr4));
119         return cr4;
120 }
121
122 static inline void write_cr4(unsigned long cr4)
123 {
124         asm volatile ("movl %0, %%cr4" : : "r" (cr4));
125 }
126
127
128 static inline void enable_sse2()
129 {
130         unsigned long cr4;
131         cr4 = read_cr4();
132         cr4 |= (1<<9);
133         write_cr4(cr4);
134 }
135
136 static inline void disable_sse2()
137 {
138         unsigned long cr4;
139         cr4 = read_cr4();
140         cr4 &= ~(1<<9);
141         write_cr4(cr4);
142 }
143
144
145 static void set_wrap32dis(void) {
146         msr_t msr;
147
148         msr = rdmsr(0xc0010015);
149         msr.lo |= (1<<17);
150
151         wrmsr(0xc0010015, msr);
152
153 }
154
155 static void clear_wrap32dis(void) {
156         msr_t msr;
157
158         msr = rdmsr(0xc0010015);
159         msr.lo &= ~(1<<17);
160
161         wrmsr(0xc0010015, msr);
162
163 }
164
165 static void set_FSBASE(uint32_t addr_hi)
166 {
167         msr_t msr;
168
169         //set fs and use fs prefix to access the mem
170         msr.hi = addr_hi;
171         msr.lo = 0;
172         wrmsr(0xc0000100, msr); //FS_BASE
173
174 }
175
176 static unsigned ChipSelPresent(const struct mem_controller *ctrl, unsigned cs_idx, struct sys_info *sysinfo)
177 {
178         unsigned enabled;
179         unsigned nodeid = ctrl->node_id;
180
181
182         enabled = sysinfo->cs_base[nodeid * 8 + cs_idx];
183         enabled &= 1;
184
185         return enabled;
186
187 }
188
189 static unsigned RcvrRankEnabled(const struct mem_controller *ctrl, int channel, int cs_idx, unsigned is_Width128, struct sys_info *sysinfo)
190 {
191         return ChipSelPresent(ctrl, cs_idx, sysinfo);
192 }
193
194 static void WriteLNTestPattern(unsigned addr_lo, uint8_t *buf_a, unsigned line_num)
195 {
196         __asm__ volatile (
197                 "1:\n\t"
198                 "movdqa (%3), %%xmm0\n\t"
199                 "movntdq %%xmm0, %%fs:(%0)\n\t" /* xmm0 is 128 bit */
200                 "addl %1, %0\n\t"
201                 "addl %1, %3\n\t"
202                 "loop 1b\n\t"
203
204                 :: "a" (addr_lo), "d" (16), "c" (line_num * 4), "b"(buf_a)
205         );
206
207
208 }
209
210 static void Write1LTestPattern(unsigned addr, unsigned p, uint8_t *buf_a, uint8_t *buf_b)
211 {
212         uint8_t *buf;
213         if(p==1) { buf = buf_b; }
214         else { buf = buf_a; }
215
216         set_FSBASE (addr>>24);
217
218         WriteLNTestPattern(addr<<8, buf, 1);
219 }
220
221 static void Read1LTestPattern(unsigned addr)
222 {
223         unsigned value;
224
225         set_FSBASE(addr>>24);
226
227         /* 1st move causes read fill (to exclusive or shared)*/
228         __asm__ volatile (
229                 "movl %%fs:(%1), %0\n\t"
230                 :"=b"(value): "a" (addr<<8)
231         );
232
233 }
234
235 #define DQS_PASS 0
236 #define DQS_FAIL 1
237
238 #define DQS_FIRST_PASS 1
239 #define DQS_SECOND_PASS 2
240
241 #define SB_NORCVREN 11
242 #define RCVREN_MARGIN 6
243 #define SB_SmallRCVR 13
244 #define SB_CHA2BRCVREN 12
245 #define SB_NODQSPOS  14
246 #define MIN_DQS_WNDW 3
247 #define SB_SMALLDQS 15
248
249
250 static unsigned CompareTestPatternQW0(unsigned channel, unsigned addr, unsigned pattern, const uint32_t *TestPattern0, const uint32_t *TestPattern1, const uint32_t *TestPattern2, unsigned Pass, unsigned is_Width128)
251 {
252         uint32_t addr_lo;
253         uint32_t *test_buf;
254         uint32_t value;
255         uint32_t value_test;
256         unsigned result = DQS_FAIL;
257
258         if(Pass == DQS_FIRST_PASS) {
259                 if(pattern==1) {
260                         test_buf = (uint32_t *)TestPattern1;
261                 }
262                 else {
263                         test_buf = (uint32_t *)TestPattern0;
264                 }
265         }
266         else {
267                 test_buf = (uint32_t *)TestPattern2;
268         }
269
270         set_FSBASE(addr>>24);
271
272         addr_lo = addr<<8;
273
274         if(is_Width128 && (channel == 1)) {
275                 addr_lo += 8; //second channel
276                 test_buf += 2;
277         }
278
279         __asm__ volatile (
280                 "movl %%fs:(%1), %0\n\t"
281                 :"=b"(value): "a" (addr_lo)
282         );
283
284         value_test = *test_buf;
285
286
287         print_debug_dqs_pair("\t\t\t\t\t\tQW0.lo : test_buf= ", (unsigned)test_buf, " value = ", value_test, 4);
288         print_debug_dqs_pair("\t\t\t\t\t\tQW0.lo : addr_lo = ", addr_lo, " value = ", value, 4);
289
290         if(value == value_test) {
291                 addr_lo += 4;
292                 test_buf++;
293                 __asm__ volatile (
294                         "movl %%fs:(%1), %0\n\t"
295                         :"=b"(value): "a" (addr_lo)
296                 );
297                 value_test = *test_buf;
298                 print_debug_dqs_pair("\t\t\t\t\t\tQW0.hi : test_buf= ", (unsigned)test_buf, " value = ", value_test, 4);
299                 print_debug_dqs_pair("\t\t\t\t\t\tQW0.hi : addr_lo = ", addr_lo, " value = ", value, 4);
300
301                 if(value == value_test){
302                         result =  DQS_PASS;
303                 }
304         }
305
306         if(Pass == DQS_SECOND_PASS) { // second pass need to be inverted
307                 if(result==DQS_PASS) {
308                         result = DQS_FAIL;
309                 }
310                 else {
311                         result = DQS_PASS;
312                 }
313         }
314
315         return result;
316
317 }
318
319 static void SetMaxAL_RcvrDly(const struct mem_controller *ctrl, unsigned dly)
320 {
321         uint32_t reg;
322
323         dly += (20-1); // round it
324         dly /= 20; // convert from unit 50ps to 1ns
325
326         dly += 6;
327
328
329         reg = pci_read_config32(ctrl->f2, DRAM_CONFIG_HIGH);
330         reg &= ~(DCH_MaxAsyncLat_MASK <<DCH_MaxAsyncLat_SHIFT);
331         reg |= ((dly - DCH_MaxAsyncLat_BASE) << DCH_MaxAsyncLat_SHIFT);
332         pci_write_config32(ctrl->f2, DRAM_CONFIG_HIGH, reg);
333
334 }
335
336 /*
337         Set the Target range to WT IO (using an IORR overlapping the already existing
338         WB dram type). Use IORR0
339 */
340 static void SetTargetWTIO(unsigned addr)
341 {
342         msr_t msr;
343         msr.hi = addr>>24;
344         msr.lo = addr<<8;
345         wrmsr(0xc0010016, msr); //IORR0 BASE
346
347         msr.hi = 0xff;
348         msr.lo = 0xfc000800;  // 64MB Mask
349         wrmsr(0xc0010017, msr); // IORR0 Mask
350 }
351
352 static void ResetTargetWTIO(void)
353 {
354         msr_t msr;
355
356         msr.hi = 0;
357         msr.lo = 0;
358         wrmsr(0xc0010017, msr); // IORR0 Mask
359 }
360
361 static void proc_CLFLUSH(unsigned addr)
362 {
363
364         set_FSBASE(addr>>24);
365
366         /* 1st move causes read fill (to exclusive or shared)*/
367         __asm__ volatile (
368                         /* clflush fs:[eax] */
369                 "clflush %%fs:(%0)\n\t"
370                 ::"a" (addr<<8)
371         );
372
373 }
374 static void proc_IOCLFLUSH(unsigned addr)
375 {
376         SetTargetWTIO(addr);
377         proc_CLFLUSH(addr);
378         ResetTargetWTIO();
379 }
380
381 static void ResetDCTWrPtr(const struct mem_controller *ctrl)
382 {
383         uint32_t dword;
384         unsigned index = 0x10;
385
386         dword = pci_read_config32_index_wait(ctrl->f2, 0x98, index);
387         pci_write_config32_index_wait(ctrl->f2, 0x98, index, dword);
388
389         index += 0x20;
390         dword = pci_read_config32_index_wait(ctrl->f2, 0x98, index);
391         pci_write_config32_index_wait(ctrl->f2, 0x98, index, dword);
392
393 }
394
395
396 static uint16_t get_exact_T1000(unsigned i)
397 {
398         //                                 200   266,   333,  400
399         static const uint16_t T1000_a[]= { 5000, 3759, 3003, 2500 };
400
401         static const uint16_t TT_a[] = {
402                  /*200   266   333   400 */
403          /*4 */   6250, 6250, 6250, 6250,
404          /*5 */   5000, 5000, 5000, 2500,
405          /*6 */   5000, 4166, 4166, 2500,
406          /*7 */   5000, 4285, 3571, 2500,
407
408          /*8 */   5000, 3750, 3125, 2500,
409          /*9 */   5000, 3888, 3333, 2500,
410          /*10*/   5000, 4000, 3000, 2500,
411          /*11*/   5000, 4090, 3181, 2500,
412
413          /*12*/   5000, 3750, 3333, 2500,
414          /*13*/   5000, 3846, 3076, 2500,
415          /*14*/   5000, 3928, 3214, 2500,
416          /*15*/   5000, 4000, 3000, 2500,
417         };
418
419         int index;
420         msr_t msr;
421
422         /* Check for FID control support */
423         struct cpuid_result cpuid1;
424         cpuid1 = cpuid(0x80000007);
425         if( cpuid1.edx & 0x02 ) {
426                 /* Use current FID */
427                 unsigned fid_cur;
428                 msr = rdmsr(0xc0010042);
429                 fid_cur = msr.lo & 0x3f;
430
431                 index = fid_cur>>1;
432         } else {
433                 /* Use startup FID */
434                 unsigned fid_start;
435                 msr = rdmsr(0xc0010015);
436                 fid_start = (msr.lo & (0x3f << 24));
437                 
438                 index = fid_start>>25;
439         }
440
441         if(index>12) return T1000_a[i];
442
443         return TT_a[index * 4+i];
444
445 }
446
447 static void InitDQSPos4RcvrEn(const struct mem_controller *ctrl)
448 {
449         int i;
450         uint32_t dword;
451
452         dword = 0x00000000;
453         for(i=1; i<=3; i++) {
454                 /* Program the DQS Write Timing Control Registers (Function 2:Offset 0x9c, index 0x01-0x03, 0x21-0x23) to 0x00 for all bytes */
455                 pci_write_config32_index_wait(ctrl->f2, 0x98, i, dword);
456                 pci_write_config32_index_wait(ctrl->f2, 0x98, i+0x20, dword);
457         }
458
459         dword = 0x2f2f2f2f;
460         for(i=5; i<=7; i++) {
461                 /* Program the DQS Write Timing Control Registers (Function 2:Offset 0x9c, index 0x05-0x07, 0x25-0x27) to 0x2f for all bytes */
462                 pci_write_config32_index_wait(ctrl->f2, 0x98, i, dword);
463                 pci_write_config32_index_wait(ctrl->f2, 0x98, i+0x20, dword);
464         }
465
466
467 }
468 #ifndef K8_REV_F_SUPPORT_F0_F1_WORKAROUND
469 #define K8_REV_F_SUPPORT_F0_F1_WORKAROUND 1
470 #endif
471
472 static unsigned TrainRcvrEn(const struct mem_controller *ctrl, unsigned Pass, struct sys_info *sysinfo)
473 {
474
475         static const uint32_t TestPattern0[] = {
476                         0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
477                         0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
478                         0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
479                         0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
480                 };
481         static const uint32_t TestPattern1[] = {
482                         0x55555555, 0x55555555, 0x55555555, 0x55555555,
483                         0x55555555, 0x55555555, 0x55555555, 0x55555555,
484                         0x55555555, 0x55555555, 0x55555555, 0x55555555,
485                         0x55555555, 0x55555555, 0x55555555, 0x55555555,
486                 };
487         static const uint32_t TestPattern2[] = {
488                         0x12345678, 0x87654321, 0x23456789, 0x98765432,
489                         0x59385824, 0x30496724, 0x24490795, 0x99938733,
490                         0x40385642, 0x38465245, 0x29432163, 0x05067894,
491                         0x12349045, 0x98723467, 0x12387634, 0x34587623,
492                 };
493
494         uint8_t pattern_buf_x[64 * 4 + 16]; // We need to two cache line So have more 16 bytes to keep 16 byte alignment */
495         uint8_t *buf_a, *buf_b;
496         uint32_t ecc_bit;
497         uint32_t dword;
498         uint8_t *dqs_rcvr_dly_a = &sysinfo->dqs_rcvr_dly_a[ctrl->node_id * 2* 8] ; //8 node, channel 2, receiver 8
499
500         int i;
501
502         unsigned channel, receiver;
503
504         unsigned Errors;
505         unsigned CTLRMaxDelay;
506         unsigned T1000;
507
508         unsigned LastTest;
509         unsigned CurrTest;
510         unsigned Test0, Test1;
511
512         unsigned RcvrEnDlyRmin;
513
514         unsigned two_ranks;
515         unsigned RcvrEnDly;
516
517         unsigned PatternA;
518         unsigned PatternB;
519
520         unsigned TestAddr0, TestAddr0B, TestAddr1 = 0, TestAddr1B = 0;
521
522         unsigned CurrRcvrCHADelay = 0;
523
524         unsigned tmp;
525
526         unsigned is_Width128 = sysinfo->meminfo[ctrl->node_id].is_Width128;
527
528 #if K8_REV_F_SUPPORT_F0_F1_WORKAROUND == 1
529         unsigned cpu_f0_f1;
530 #endif
531
532         if(Pass == DQS_FIRST_PASS) {
533                 InitDQSPos4RcvrEn(ctrl);
534         }
535
536         //enable SSE2
537         enable_sse2();
538
539         //wrap32dis
540         set_wrap32dis();
541
542         //disable ECC temp
543         dword = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
544         ecc_bit = dword & DCL_DimmEccEn;
545         dword &= ~(DCL_DimmEccEn);
546         pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dword);
547
548
549         if(Pass == DQS_FIRST_PASS) {
550 #if K8_REV_F_SUPPORT_F0_F1_WORKAROUND == 1
551         cpu_f0_f1 = is_cpu_pre_f2_in_bsp(ctrl->node_id);
552         if(!cpu_f0_f1)
553 #endif
554         {
555 #if 1
556                 /* Set the DqsRcvEnTrain bit */
557                 dword = pci_read_config32(ctrl->f2, DRAM_CTRL);
558                 dword |= DC_DqsRcvEnTrain;
559                 pci_write_config32(ctrl->f2, DRAM_CTRL, dword);
560 #endif
561         }
562         }
563
564         //get T1000 figures (cycle time (ns)) * 1K
565         dword = pci_read_config32(ctrl->f2, DRAM_CONFIG_HIGH);
566         dword &= DCH_MemClkFreq_MASK;
567
568         T1000 = get_exact_T1000(dword);
569
570         // SetupRcvrPattern
571         buf_a = (uint8_t *)(((uint32_t)(&pattern_buf_x[0]) + 0x10) & (0xfffffff0));
572         buf_b = buf_a + 128; //??
573         if(Pass==DQS_FIRST_PASS) {
574                 for(i=0;i<16;i++) {
575                         *((uint32_t *)(buf_a + i*4)) = TestPattern0[i];
576                         *((uint32_t *)(buf_b + i*4)) = TestPattern1[i];
577                 }
578         }
579         else {
580                 for(i=0;i<16;i++) {
581                         *((uint32_t *)(buf_a + i*4)) = TestPattern2[i];
582                         *((uint32_t *)(buf_b + i*4)) = TestPattern2[i];
583                 }
584         }
585
586         print_debug_dqs("\r\nTrainRcvEn: 0 ctrl", ctrl->node_id, 0);
587
588         print_debug_addr("TrainRcvEn: buf_a:", buf_a);
589
590         Errors = 0;
591         /* for each channel */
592         CTLRMaxDelay = 0;
593         channel = 0;
594
595         if (!(sysinfo->meminfo[ctrl->node_id].dimm_mask & 0x0F) &&
596              (sysinfo->meminfo[ctrl->node_id].dimm_mask & 0xF0)) { /* channelB only? */
597                 channel = 1;
598         }
599
600         for ( ; (channel < 2) && (!Errors); channel++)
601         { 
602                 print_debug_dqs("\tTrainRcvEn51: channel ",channel, 1); 
603                 
604                 /* for each rank */ 
605                 /* there are four recriver pairs, loosely associated with CS */ 
606                 for( receiver = 0; (receiver < 8) && (!Errors); receiver+=2) 
607                 {
608
609                         unsigned index=(receiver>>1) * 3 + 0x10;
610
611                         print_debug_dqs("\t\tTrainRcvEn52: index ", index, 2);
612
613                         if(is_Width128) {
614                                 if(channel) {
615                                         dword = pci_read_config32_index_wait(ctrl->f2, 0x98, index);
616                                         CurrRcvrCHADelay= dword & 0xff;
617                                 }
618                         }
619                         else {
620                                 if(channel) {
621                                         index += 0x20;
622                                 }
623                         }
624
625                         LastTest = DQS_FAIL;
626                         RcvrEnDlyRmin = 0xaf;
627
628                         if(!RcvrRankEnabled(ctrl, channel, receiver, is_Width128, sysinfo)) continue;
629
630                         /* for each DQS receiver enable setting */
631
632                         TestAddr0 = Get_RcvrSysAddr(ctrl, channel, receiver, sysinfo);
633
634                         TestAddr0B = TestAddr0 + (1<<(20+2-8)); // 4MB
635
636                         if(RcvrRankEnabled(ctrl, channel, receiver+1, is_Width128, sysinfo)) {
637                                 TestAddr1 = Get_RcvrSysAddr(ctrl, channel, receiver+1, sysinfo);
638                                 TestAddr1B = TestAddr1 + (1<<(20+2-8)); //4MB
639                                 two_ranks = 1;
640                         }
641                         else {
642                                 two_ranks = 0;
643                         }
644
645                         print_debug_dqs("\t\tTrainRcvEn53: TestAddr0B ", TestAddr0B, 2);
646
647                         Write1LTestPattern(TestAddr0, 0, buf_a, buf_b); // rank0 of dimm, test p0
648                         Write1LTestPattern(TestAddr0B, 1, buf_a, buf_b); //rank0 of dimm, test p1
649
650                         if(two_ranks == 1) {
651                                 Write1LTestPattern(TestAddr1, 0, buf_a, buf_b); //rank 1 of dimm
652                                 Write1LTestPattern(TestAddr1B, 1, buf_a, buf_b);//rank 1 of dimm
653                         }
654
655                         if(Pass == DQS_FIRST_PASS) {
656                                 RcvrEnDly = 0;
657                         } else {
658                                 RcvrEnDly = dqs_rcvr_dly_a[channel * 8 + receiver];
659                         }
660
661                         while ( RcvrEnDly < 0xaf) { // Sweep Delay value here
662                                 print_debug_dqs("\t\t\tTrainRcvEn541: RcvrEnDly ", RcvrEnDly, 3);
663
664                                 if(RcvrEnDly & 1) {
665                                         /* Odd steps get another pattern such that even
666                                            and odd steps alternate.
667                                            The pointers to the patterns will be swapped
668                                            at the end of the loop so they are correspond
669                                         */
670                                         PatternA = 1;
671                                         PatternB = 0;
672                                 }
673                                 else {
674                                         /* Even step */
675                                         PatternA = 0;
676                                         PatternB = 1;
677                                 }
678
679                                 /* Program current Receiver enable delay */
680                                 pci_write_config32_index_wait(ctrl->f2, 0x98, index, RcvrEnDly);
681                                 /* FIXME: 64bit MUX */
682
683                                 if(is_Width128) {
684                                         /* Program current Receiver enable delay chaannel b */
685                                         pci_write_config32_index_wait(ctrl->f2, 0x98, index+ 0x20, RcvrEnDly);
686                                 }
687
688                                 /* Program the MaxAsyncLat filed with the
689                                    current DQS receiver enable setting plus 6ns
690                                 */
691                                 /*Porgram MaxAsyncLat to correspond with current delay */
692                                 SetMaxAL_RcvrDly(ctrl, RcvrEnDly);
693
694                                 CurrTest = DQS_FAIL;
695
696                                 Read1LTestPattern(TestAddr0);  //Cache Fill
697                                 /* ROM vs cache compare */
698                                 Test0 = CompareTestPatternQW0(channel, TestAddr0, PatternA, TestPattern0, TestPattern1, TestPattern2, Pass, is_Width128);
699                                 proc_IOCLFLUSH(TestAddr0);
700
701                                 ResetDCTWrPtr(ctrl);
702
703                                 print_debug_dqs("\t\t\tTrainRcvEn542: Test0 ", Test0, 3);
704
705                                 if(Test0 == DQS_PASS) {
706
707                                         Read1LTestPattern(TestAddr0B);
708                                         Test1 = CompareTestPatternQW0(channel, TestAddr0B, PatternB, TestPattern0, TestPattern1, TestPattern2, Pass, is_Width128);
709                                         proc_IOCLFLUSH(TestAddr0B);
710
711                                         ResetDCTWrPtr(ctrl);
712
713                                         print_debug_dqs("\t\t\tTrainRcvEn543: Test1 ", Test1, 3);
714
715                                         if(Test1 == DQS_PASS) {
716                                                 if(two_ranks) {
717                                                         Read1LTestPattern(TestAddr1);
718                                                         Test0 = CompareTestPatternQW0(channel, TestAddr1, PatternA, TestPattern0, TestPattern1, TestPattern2, Pass, is_Width128);
719                                                         proc_IOCLFLUSH(TestAddr1);
720                                                         ResetDCTWrPtr(ctrl);
721
722                                                         if(Test0 == DQS_PASS) {
723                                                                 Read1LTestPattern(TestAddr1B);
724                                                                 Test1 = CompareTestPatternQW0(channel, TestAddr1B, PatternB, TestPattern0, TestPattern1, TestPattern2, Pass, is_Width128);
725                                                                 proc_IOCLFLUSH(TestAddr1B);
726                                                                 ResetDCTWrPtr(ctrl);
727
728                                                                 if(Test1 == DQS_PASS) {
729                                                                         CurrTest = DQS_PASS;
730                                                                 }
731                                                         }
732                                                         print_debug_dqs("\t\t\tTrainRcvEn544: Test0 ", Test0, 3);
733                                                 }
734                                                 else {
735                                                         CurrTest = DQS_PASS;
736                                                 }
737                                         }
738                                 }
739
740                                 print_debug_dqs("\t\t\tTrainRcvEn55: RcvrEnDly ", RcvrEnDly, 3);
741
742                                 if(CurrTest == DQS_PASS) {
743                                         if(LastTest == DQS_FAIL) {
744                                                 RcvrEnDlyRmin = RcvrEnDly;
745                                                 break;
746                                         }
747                                 }
748
749                                 LastTest = CurrTest;
750
751                                 /* swap the rank 0 pointers */
752                                 tmp = TestAddr0;
753                                 TestAddr0 = TestAddr0B;
754                                 TestAddr0B = tmp;
755
756                                 /* swap the rank 1 pointers */
757                                 tmp = TestAddr1;
758                                 TestAddr1 = TestAddr1B;
759                                 TestAddr1B = tmp;
760
761                                 print_debug_dqs("\t\t\tTrainRcvEn56: RcvrEnDly ", RcvrEnDly, 3);
762
763                                 RcvrEnDly++;
764
765                         } // while RcvrEnDly
766
767                         print_debug_dqs("\t\tTrainRcvEn61: RcvrEnDly ", RcvrEnDly, 2);
768
769                         if(RcvrEnDlyRmin == 0xaf) {
770                                 //no passing window
771                                 Errors |= SB_NORCVREN;
772                         }
773
774                         if(Pass == DQS_FIRST_PASS) {
775                                 // We need a better value for DQSPos trainning
776                                 RcvrEnDly = RcvrEnDlyRmin /* + RCVREN_MARGIN * T1000/64/50 */;
777                         } else {
778                                 RcvrEnDly = RcvrEnDlyRmin;
779                         }
780
781                         if(RcvrEnDly > 0xae) {
782                                 //passing window too narrow, too far delayed
783                                 Errors |= SB_SmallRCVR;
784                                 RcvrEnDly = 0xae;
785                         }
786
787                         if(Pass == DQS_SECOND_PASS) { //second pass must average vales
788                                 RcvrEnDly += dqs_rcvr_dly_a[channel * 8 + receiver] /* - (RCVREN_MARGIN * T1000/64/50)*/;
789                                 RcvrEnDly >>= 1;
790                         }
791
792                         dqs_rcvr_dly_a[channel * 8 + receiver] = RcvrEnDly;
793
794                         //Set final RcvrEnDly for this DIMM and Channel
795                         pci_write_config32_index_wait(ctrl->f2, 0x98, index, RcvrEnDly);
796
797                         if(is_Width128) {
798                                 pci_write_config32_index_wait(ctrl->f2, 0x98, index+0x20, RcvrEnDly); // channel B
799                                 if(channel) {
800                                         pci_write_config32_index_wait(ctrl->f2, 0x98, index, CurrRcvrCHADelay);
801                                         if(RcvrEnDly > CurrRcvrCHADelay) {
802                                                 dword = RcvrEnDly - CurrRcvrCHADelay;
803                                         }
804                                         else {
805                                                 dword = CurrRcvrCHADelay - RcvrEnDly;
806                                         }
807                                         dword *= 50;
808                                         if(dword > T1000) {
809                                                 Errors |= SB_CHA2BRCVREN;
810                                         }
811                                 }
812                         }
813
814                         print_debug_dqs("\t\tTrainRcvEn63: RcvrEnDly ", RcvrEnDly, 2);
815
816                         if(RcvrEnDly > CTLRMaxDelay) {
817                                 CTLRMaxDelay = RcvrEnDly;
818                         }
819
820                         print_debug_dqs("\t\tTrainRcvEn64: CTLRMaxDelay ", CTLRMaxDelay, 2);
821
822                 } /* receiver */
823         } /* channel */
824
825         print_debug_dqs("\tTrainRcvEn65: CTLRMaxDelay ", CTLRMaxDelay, 1);
826
827         /* Program the MaxAsysncLat field with the largest DQS Receiver Enable setting */
828         SetMaxAL_RcvrDly(ctrl, CTLRMaxDelay);
829         ResetDCTWrPtr(ctrl);
830
831         //Enable ECC again
832         dword = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
833         dword &= ~(DCL_DimmEccEn);
834         dword |= ecc_bit;
835         pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dword);
836
837         if(Pass == DQS_FIRST_PASS) {
838 #if K8_REV_F_SUPPORT_F0_F1_WORKAROUND == 1
839         if(!cpu_f0_f1)
840 #endif
841         {
842                 dword = pci_read_config32(ctrl->f2, DRAM_CTRL);
843                 dword &= ~DC_DqsRcvEnTrain;
844                 pci_write_config32(ctrl->f2, DRAM_CTRL, dword);
845         }
846         }
847
848         //Clear wrap32dis
849
850         clear_wrap32dis();
851
852         //restore SSE2 setting
853         disable_sse2();
854
855 #if CONFIG_MEM_TRAIN_SEQ != 1
856         /* We need tidy output for type 1 */
857         printk(BIOS_DEBUG, " CTLRMaxDelay=%02x\n", CTLRMaxDelay);
858 #endif
859
860         return (CTLRMaxDelay==0xae)?1:0;
861
862 }
863
864 #define DQS_READDIR 1
865 #define DQS_WRITEDIR 0
866
867
868 static void SetDQSDelayCSR(const struct mem_controller *ctrl, unsigned channel, unsigned bytelane, unsigned direction, unsigned dqs_delay)
869 { //ByteLane could be 0-8, last is for ECC
870         unsigned index;
871         uint32_t dword;
872         unsigned shift;
873
874         dqs_delay &= 0xff;
875
876         index = (bytelane>>2) + 1 + channel * 0x20 + (direction << 2);
877         shift = bytelane;
878         while(shift>3) {
879                 shift-=4;
880         }
881         shift <<= 3; // 8 bit
882
883         dword = pci_read_config32_index_wait(ctrl->f2, 0x98, index);
884         dword &= ~(0x3f<<shift);
885         dword |= (dqs_delay<<shift);
886         pci_write_config32_index_wait(ctrl->f2, 0x98, index, dword);
887
888 }
889
890 static void SetDQSDelayAllCSR(const struct mem_controller *ctrl, unsigned channel, unsigned direction, unsigned dqs_delay)
891 {
892         unsigned index;
893         uint32_t dword;
894         int i;
895
896         dword = 0;
897         dqs_delay &= 0xff;
898         for(i=0;i<4;i++) {
899                 dword |= dqs_delay<<(i*8);
900         }
901
902         index = 1 + channel * 0x20 + direction * 4;
903
904         for(i=0; i<2; i++) {
905                 pci_write_config32_index_wait(ctrl->f2, 0x98, index + i, dword);
906         }
907
908 }
909
910 static unsigned MiddleDQS(unsigned min_d, unsigned max_d)
911 {
912         unsigned size_d;
913         size_d = max_d-min_d;
914         if(size_d & 1) { //need round up
915                 min_d++;
916         }
917         return ( min_d + (size_d>>1));
918 }
919
920 static  inline void save_dqs_delay(unsigned channel, unsigned bytelane, unsigned direction, uint8_t *dqs_delay_a, uint8_t dqs_delay)
921 {
922         dqs_delay_a[channel * 2*9 + direction * 9 + bytelane] = dqs_delay;
923 }
924
925 static void WriteDQSTestPattern(unsigned addr_lo, unsigned pattern , uint8_t *buf_a)
926 {
927         WriteLNTestPattern(addr_lo, buf_a, (pattern+1) * 9);
928 }
929
930 static void ReadL18TestPattern(unsigned addr_lo)
931 {
932         //set fs and use fs prefix to access the mem
933         __asm__ volatile (
934                 "movl %%fs:-128(%%esi), %%eax\n\t"  //TestAddr cache line
935                 "movl %%fs:-64(%%esi), %%eax\n\t"   //+1
936                 "movl %%fs:(%%esi), %%eax\n\t"  //+2
937                 "movl %%fs:64(%%esi), %%eax\n\t"   //+3
938
939                 "movl %%fs:-128(%%edi), %%eax\n\t"      //+4
940                 "movl %%fs:-64(%%edi), %%eax\n\t"       //+5
941                 "movl %%fs:(%%edi), %%eax\n\t"  //+6
942                 "movl %%fs:64(%%edi), %%eax\n\t"        //+7
943
944                 "movl %%fs:-128(%%ebx), %%eax\n\t"  //+8
945                 "movl %%fs:-64(%%ebx), %%eax\n\t"       //+9
946                 "movl %%fs:(%%ebx), %%eax\n\t"  //+10
947                 "movl %%fs:64(%%ebx), %%eax\n\t"        //+11
948
949                 "movl %%fs:-128(%%ecx), %%eax\n\t"      //+12
950                 "movl %%fs:-64(%%ecx), %%eax\n\t"       //+13
951                 "movl %%fs:(%%ecx), %%eax\n\t"  //+14
952                 "movl %%fs:64(%%ecx), %%eax\n\t"        //+15
953
954                 "movl %%fs:-128(%%edx), %%eax\n\t"      //+16
955                 "movl %%fs:-64(%%edx), %%eax\n\t"       //+17
956
957                 :: "a"(0), "b" (addr_lo+128+8*64), "c" (addr_lo+128+12*64), "d" (addr_lo +128+16*64), "S"(addr_lo+128), "D"(addr_lo+128+4*64)
958         );
959
960 }
961
962 static void ReadL9TestPattern(unsigned addr_lo)
963 {
964
965         //set fs and use fs prefix to access the mem
966         __asm__ volatile (
967
968                 "movl %%fs:-128(%%ecx), %%eax\n\t"  //TestAddr cache line
969                 "movl %%fs:-64(%%ecx), %%eax\n\t"   //+1
970                 "movl %%fs:(%%ecx), %%eax\n\t"      //+2
971                 "movl %%fs:64(%%ecx), %%eax\n\t"   //+3
972
973                 "movl %%fs:-128(%%edx), %%eax\n\t"  //+4
974                 "movl %%fs:-64(%%edx), %%eax\n\t"   //+5
975                 "movl %%fs:(%%edx), %%eax\n\t"      //+6
976                 "movl %%fs:64(%%edx), %%eax\n\t"   //+7
977
978                 "movl %%fs:-128(%%ebx), %%eax\n\t"      //+8
979
980                 :: "a"(0), "b" (addr_lo+128+8*64), "c"(addr_lo+128), "d"(addr_lo+128+4*64)
981         );
982
983 }
984
985
986 static void ReadDQSTestPattern(unsigned addr_lo, unsigned pattern)
987 {
988         if(pattern == 0) {
989                 ReadL9TestPattern(addr_lo);
990         }
991         else {
992                 ReadL18TestPattern(addr_lo);
993         }
994 }
995
996 static void FlushDQSTestPattern_L9(unsigned addr_lo)
997 {
998         __asm__ volatile (
999                 "clflush %%fs:-128(%%ecx)\n\t"
1000                 "clflush %%fs:-64(%%ecx)\n\t"
1001                 "clflush %%fs:(%%ecx)\n\t"
1002                 "clflush %%fs:64(%%ecx)\n\t"
1003
1004                 "clflush %%fs:-128(%%eax)\n\t"
1005                 "clflush %%fs:-64(%%eax)\n\t"
1006                 "clflush %%fs:(%%eax)\n\t"
1007                 "clflush %%fs:64(%%eax)\n\t"
1008
1009                 "clflush %%fs:-128(%%ebx)\n\t"
1010
1011                 ::  "b" (addr_lo+128+8*64), "c"(addr_lo+128), "a"(addr_lo+128+4*64)
1012         );
1013
1014 }
1015 static __attribute__((noinline)) void FlushDQSTestPattern_L18(unsigned addr_lo)
1016 {
1017        __asm__ volatile (
1018                 "clflush %%fs:-128(%%eax)\n\t"
1019                 "clflush %%fs:-64(%%eax)\n\t"
1020                 "clflush %%fs:(%%eax)\n\t"
1021                 "clflush %%fs:64(%%eax)\n\t"
1022
1023                 "clflush %%fs:-128(%%edi)\n\t"
1024                 "clflush %%fs:-64(%%edi)\n\t"
1025                 "clflush %%fs:(%%edi)\n\t"
1026                 "clflush %%fs:64(%%edi)\n\t"
1027
1028                 "clflush %%fs:-128(%%ebx)\n\t"
1029                 "clflush %%fs:-64(%%ebx)\n\t"
1030                 "clflush %%fs:(%%ebx)\n\t"
1031                 "clflush %%fs:64(%%ebx)\n\t"
1032
1033                 "clflush %%fs:-128(%%ecx)\n\t"
1034                 "clflush %%fs:-64(%%ecx)\n\t"
1035                 "clflush %%fs:(%%ecx)\n\t"
1036                 "clflush %%fs:64(%%ecx)\n\t"
1037
1038                 "clflush %%fs:-128(%%edx)\n\t"
1039                 "clflush %%fs:-64(%%edx)\n\t"
1040
1041                 :: "b" (addr_lo+128+8*64), "c" (addr_lo+128+12*64), "d" (addr_lo +128+16*64), "a"(addr_lo+128), "D"(addr_lo+128+4*64)
1042         );
1043 }
1044
1045 static void FlushDQSTestPattern(unsigned addr_lo, unsigned pattern )
1046 {
1047
1048         if(pattern == 0){
1049                 FlushDQSTestPattern_L9(addr_lo);
1050         }
1051         else {
1052                 FlushDQSTestPattern_L18(addr_lo);
1053         }
1054 }
1055
1056 static unsigned CompareDQSTestPattern(unsigned channel, unsigned addr_lo, unsigned pattern, uint8_t *buf_a)
1057 {
1058         uint32_t *test_buf;
1059         unsigned bitmap = 0xff;
1060         unsigned bytelane;
1061         int i;
1062         uint32_t value;
1063         int j;
1064         uint32_t value_test;
1065
1066         test_buf = (uint32_t *)buf_a;
1067
1068
1069         if(pattern && channel) {
1070                 addr_lo += 8; //second channel
1071                 test_buf+= 2;
1072         }
1073
1074         bytelane = 0;
1075         for(i=0;i<9*64/4;i++) {
1076                 __asm__ volatile (
1077                         "movl %%fs:(%1), %0\n\t"
1078                         :"=b"(value): "a" (addr_lo)
1079                 );
1080                 value_test = *test_buf;
1081
1082                 print_debug_dqs_pair("\t\t\t\t\t\ttest_buf= ", (unsigned)test_buf, " value = ", value_test, 7);
1083                 print_debug_dqs_pair("\t\t\t\t\t\ttaddr_lo = ",addr_lo, " value = ", value, 7);
1084
1085                 for(j=0;j<4*8;j+=8) {
1086                         if(((value>>j)&0xff) != ((value_test>>j)& 0xff)) {
1087                                 bitmap &= ~(1<<bytelane);
1088                         }
1089
1090                         bytelane++;
1091                         bytelane &= 0x7;
1092                 }
1093                 print_debug_dqs("\t\t\t\t\t\tbitmap = ", bitmap, 7);
1094
1095                 if(bytelane == 0) {
1096                         if(pattern == 1) { //dual channel
1097                                 addr_lo += 8; //skip over other channel's data
1098                                 test_buf += 2;
1099                         }
1100                 }
1101                 addr_lo += 4;
1102                 test_buf +=1;
1103
1104         }
1105
1106
1107         return bitmap;
1108
1109 }
1110
1111 static unsigned TrainDQSPos(const struct mem_controller *ctrl, unsigned channel, unsigned Direction, unsigned Pattern, uint8_t *buf_a, uint8_t *dqs_delay_a, struct sys_info *sysinfo)
1112 {
1113         unsigned ByteLane;
1114         unsigned Errors;
1115         unsigned BanksPresent;
1116
1117         unsigned MutualCSPassW[48];
1118
1119         unsigned ChipSel;
1120         unsigned DQSDelay;
1121
1122         unsigned TestAddr;
1123
1124         unsigned LastTest;
1125         unsigned RnkDlyFilterMax, RnkDlyFilterMin = 0;
1126         unsigned RnkDlySeqPassMax, RnkDlySeqPassMin = 0;
1127
1128         Errors = 0;
1129         BanksPresent = 0;
1130
1131         print_debug_dqs("\t\t\tTrainDQSPos begin ", 0, 3);
1132
1133         printk(BIOS_DEBUG, "TrainDQSPos: MutualCSPassW[48] :%p\n", MutualCSPassW);
1134
1135         for(DQSDelay=0; DQSDelay<48; DQSDelay++) {
1136                 MutualCSPassW[DQSDelay] = 0xff; // Bitmapped status per delay setting, 0xff=All positions passing (1= PASS)
1137         }
1138
1139         for(ChipSel = 0; ChipSel < 8; ChipSel++) { //logical register chipselects 0..7
1140                 print_debug_dqs("\t\t\t\tTrainDQSPos: 11 ChipSel ", ChipSel, 4);
1141                 //FIXME: process 64MUXedMode
1142                 if(!ChipSelPresent(ctrl, ChipSel, sysinfo)) continue;
1143                 BanksPresent  = 1;
1144
1145                 TestAddr = Get_MCTSysAddr(ctrl, ChipSel, sysinfo);
1146
1147                 print_debug_dqs("\t\t\t\tTrainDQSPos: 12 TestAddr ", TestAddr, 4);
1148
1149                 //set fs and use fs prefix to access the mem
1150                 set_FSBASE(TestAddr>>24);
1151
1152                 if(Direction == DQS_READDIR) {
1153                         print_debug_dqs("\t\t\t\tTrainDQSPos: 13 for read so write at first", 0, 4);
1154                         WriteDQSTestPattern(TestAddr<<8, Pattern, buf_a);
1155                 }
1156
1157                 for(DQSDelay = 0; DQSDelay < 48; DQSDelay++ ){
1158                         print_debug_dqs("\t\t\t\t\tTrainDQSPos: 141 DQSDelay ", DQSDelay, 5);
1159                         if(MutualCSPassW[DQSDelay] == 0) continue; //skip current delay value if other chipselects have failed all 8 bytelanes
1160                         SetDQSDelayAllCSR(ctrl, channel, Direction, DQSDelay);
1161                         print_debug_dqs("\t\t\t\t\tTrainDQSPos: 142 MutualCSPassW ", MutualCSPassW[DQSDelay], 5);
1162                         if(Direction == DQS_WRITEDIR) {
1163                                 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 143 for write", 0, 5);
1164                                 WriteDQSTestPattern(TestAddr<<8, Pattern, buf_a);
1165                         }
1166                         print_debug_dqs("\t\t\t\t\tTrainDQSPos: 144 Pattern ", Pattern, 5);
1167                         ReadDQSTestPattern(TestAddr<<8, Pattern);
1168                         print_debug_dqs("\t\t\t\t\tTrainDQSPos: 145 MutualCSPassW ", MutualCSPassW[DQSDelay], 5);
1169                         MutualCSPassW[DQSDelay] &= CompareDQSTestPattern(channel, TestAddr<<8, Pattern, buf_a); //0: fail, 1=pass
1170                         print_debug_dqs("\t\t\t\t\tTrainDQSPos: 146 MutualCSPassW ", MutualCSPassW[DQSDelay], 5);
1171                         SetTargetWTIO(TestAddr);
1172                         FlushDQSTestPattern(TestAddr<<8, Pattern);
1173                         ResetTargetWTIO();
1174                 }
1175         }
1176
1177         if(BanksPresent)
1178         for(ByteLane = 0; ByteLane < 8; ByteLane++) {
1179                 print_debug_dqs("\t\t\t\tTrainDQSPos: 31 ByteLane ",ByteLane, 4);
1180
1181                 LastTest = DQS_FAIL;
1182                 RnkDlySeqPassMax = 0;
1183                 RnkDlyFilterMax = 0;
1184                 RnkDlyFilterMin = 0;
1185                 for(DQSDelay=0; DQSDelay<48; DQSDelay++) {
1186                         if(MutualCSPassW[DQSDelay] & (1<<ByteLane)) {
1187
1188                                 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 321 DQSDelay ", DQSDelay, 5);
1189                                 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 322 MutualCSPassW ", MutualCSPassW[DQSDelay], 5);
1190
1191                                 RnkDlySeqPassMax = DQSDelay;
1192                                 if(LastTest == DQS_FAIL) {
1193                                         RnkDlySeqPassMin = DQSDelay; //start sequential run
1194                                 }
1195                                 if((RnkDlySeqPassMax - RnkDlySeqPassMin)>(RnkDlyFilterMax-RnkDlyFilterMin)){
1196                                         RnkDlyFilterMin = RnkDlySeqPassMin;
1197                                         RnkDlyFilterMax = RnkDlySeqPassMax;
1198                                 }
1199                                 LastTest = DQS_PASS;
1200                         }
1201                         else {
1202                                 LastTest = DQS_FAIL;
1203                         }
1204                 }
1205                 print_debug_dqs("\t\t\t\tTrainDQSPos: 33 RnkDlySeqPassMax ", RnkDlySeqPassMax, 4);
1206
1207                 if(RnkDlySeqPassMax == 0) {
1208                         Errors |= SB_NODQSPOS; // no passing window
1209                 }
1210                 else {
1211                         print_debug_dqs("\t\t\t\tTrainDQSPos: 34 RnkDlyFilterMax ", RnkDlyFilterMax, 4);
1212                         print_debug_dqs("\t\t\t\tTrainDQSPos: 34 RnkDlyFilterMin ", RnkDlyFilterMin, 4);
1213                         if((RnkDlyFilterMax - RnkDlyFilterMin)< MIN_DQS_WNDW){
1214                                 Errors |= SB_SMALLDQS;
1215                         }
1216                         else {
1217                                 unsigned middle_dqs;
1218                                 middle_dqs = MiddleDQS(RnkDlyFilterMin, RnkDlyFilterMax);
1219                                 print_debug_dqs("\t\t\t\tTrainDQSPos: 35 middle_dqs ",middle_dqs, 4);
1220                                 SetDQSDelayCSR(ctrl, channel, ByteLane, Direction, middle_dqs);
1221                                 save_dqs_delay(channel, ByteLane, Direction, dqs_delay_a, middle_dqs);
1222                         }
1223                 }
1224
1225         }
1226
1227         print_debug_dqs("\t\t\tTrainDQSPos: end", 0xff, 3);
1228
1229         return Errors;
1230
1231
1232 }
1233
1234 static unsigned TrainReadDQS(const struct mem_controller *ctrl, unsigned channel, unsigned pattern, uint8_t *buf_a, uint8_t *dqs_delay_a, struct sys_info *sysinfo)
1235 {
1236         print_debug_dqs("\t\tTrainReadPos", 0, 2);
1237         return TrainDQSPos(ctrl, channel, DQS_READDIR, pattern, buf_a, dqs_delay_a, sysinfo);
1238 }
1239
1240 static unsigned TrainWriteDQS(const struct mem_controller *ctrl, unsigned channel, unsigned pattern, uint8_t *buf_a, uint8_t *dqs_delay_a, struct sys_info *sysinfo)
1241 {
1242         print_debug_dqs("\t\tTrainWritePos", 0, 2);
1243         return TrainDQSPos(ctrl, channel, DQS_WRITEDIR, pattern, buf_a, dqs_delay_a, sysinfo);
1244 }
1245
1246
1247
1248 static unsigned TrainDQSRdWrPos(const struct mem_controller *ctrl, struct sys_info *sysinfo)
1249 {
1250         static const uint32_t TestPatternJD1a[] = {
1251                                         0x00000000,0x00000000,0xFFFFFFFF,0xFFFFFFFF, // QW0-1, ALL-EVEN
1252                                         0x00000000,0x00000000,0x00000000,0x00000000, // QW2-3, ALL-EVEN
1253                                         0x00000000,0x00000000,0xFFFFFFFF,0xFFFFFFFF, // QW4-5, ALL-EVEN
1254                                         0x00000000,0x00000000,0x00000000,0x00000000, // QW6-7, ALL-EVEN
1255                                         0xFeFeFeFe,0xFeFeFeFe,0x01010101,0x01010101, // QW0-1, DQ0-ODD
1256                                         0xFeFeFeFe,0xFeFeFeFe,0x01010101,0x01010101, // QW2-3, DQ0-ODD
1257                                         0x01010101,0x01010101,0xFeFeFeFe,0xFeFeFeFe, // QW4-5, DQ0-ODD
1258                                         0xFeFeFeFe,0xFeFeFeFe,0x01010101,0x01010101, // QW6-7, DQ0-ODD
1259                                         0x02020202,0x02020202,0x02020202,0x02020202, // QW0-1, DQ1-ODD
1260                                         0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, // QW2-3, DQ1-ODD
1261                                         0xFdFdFdFd,0xFdFdFdFd,0x02020202,0x02020202, // QW4-5, DQ1-ODD
1262                                         0x02020202,0x02020202,0x02020202,0x02020202, // QW6-7, DQ1-ODD
1263                                         0x04040404,0x04040404,0xfBfBfBfB,0xfBfBfBfB, // QW0-1, DQ2-ODD
1264                                         0x04040404,0x04040404,0x04040404,0x04040404, // QW2-3, DQ2-ODD
1265                                         0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW4-5, DQ2-ODD
1266                                         0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW6-7, DQ2-ODD
1267                                         0x08080808,0x08080808,0xF7F7F7F7,0xF7F7F7F7, // QW0-1, DQ3-ODD
1268                                         0x08080808,0x08080808,0x08080808,0x08080808, // QW2-3, DQ3-ODD
1269                                         0xF7F7F7F7,0xF7F7F7F7,0x08080808,0x08080808, // QW4-5, DQ3-ODD
1270                                         0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, // QW6-7, DQ3-ODD
1271                                         0x10101010,0x10101010,0x10101010,0x10101010, // QW0-1, DQ4-ODD
1272                                         0xeFeFeFeF,0xeFeFeFeF,0x10101010,0x10101010, // QW2-3, DQ4-ODD
1273                                         0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, // QW4-5, DQ4-ODD
1274                                         0xeFeFeFeF,0xeFeFeFeF,0x10101010,0x10101010, // QW6-7, DQ4-ODD
1275                                         0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW0-1, DQ5-ODD
1276                                         0xdFdFdFdF,0xdFdFdFdF,0x20202020,0x20202020, // QW2-3, DQ5-ODD
1277                                         0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW4-5, DQ5-ODD
1278                                         0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW6-7, DQ5-ODD
1279                                         0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW0-1, DQ6-ODD
1280                                         0x40404040,0x40404040,0xBfBfBfBf,0xBfBfBfBf, // QW2-3, DQ6-ODD
1281                                         0x40404040,0x40404040,0xBfBfBfBf,0xBfBfBfBf, // QW4-5, DQ6-ODD
1282                                         0x40404040,0x40404040,0xBfBfBfBf,0xBfBfBfBf, // QW6-7, DQ6-ODD
1283                                         0x80808080,0x80808080,0x7F7F7F7F,0x7F7F7F7F, // QW0-1, DQ7-ODD
1284                                         0x80808080,0x80808080,0x7F7F7F7F,0x7F7F7F7F, // QW2-3, DQ7-ODD
1285                                         0x80808080,0x80808080,0x7F7F7F7F,0x7F7F7F7F, // QW4-5, DQ7-ODD
1286                                         0x80808080,0x80808080,0x80808080,0x80808080  // QW6-7, DQ7-ODD
1287                 };
1288         static const uint32_t TestPatternJD1b[] = {
1289                                         0x00000000,0x00000000,0x00000000,0x00000000, // QW0,CHA-B, ALL-EVEN
1290                                         0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, // QW1,CHA-B, ALL-EVEN
1291                                         0x00000000,0x00000000,0x00000000,0x00000000, // QW2,CHA-B, ALL-EVEN
1292                                         0x00000000,0x00000000,0x00000000,0x00000000, // QW3,CHA-B, ALL-EVEN
1293                                         0x00000000,0x00000000,0x00000000,0x00000000, // QW4,CHA-B, ALL-EVEN
1294                                         0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, // QW5,CHA-B, ALL-EVEN
1295                                         0x00000000,0x00000000,0x00000000,0x00000000, // QW6,CHA-B, ALL-EVEN
1296                                         0x00000000,0x00000000,0x00000000,0x00000000, // QW7,CHA-B, ALL-EVEN
1297                                         0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, // QW0,CHA-B, DQ0-ODD
1298                                         0x01010101,0x01010101,0x01010101,0x01010101, // QW1,CHA-B, DQ0-ODD
1299                                         0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, // QW2,CHA-B, DQ0-ODD
1300                                         0x01010101,0x01010101,0x01010101,0x01010101, // QW3,CHA-B, DQ0-ODD
1301                                         0x01010101,0x01010101,0x01010101,0x01010101, // QW4,CHA-B, DQ0-ODD
1302                                         0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, // QW5,CHA-B, DQ0-ODD
1303                                         0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, // QW6,CHA-B, DQ0-ODD
1304                                         0x01010101,0x01010101,0x01010101,0x01010101, // QW7,CHA-B, DQ0-ODD
1305                                         0x02020202,0x02020202,0x02020202,0x02020202, // QW0,CHA-B, DQ1-ODD
1306                                         0x02020202,0x02020202,0x02020202,0x02020202, // QW1,CHA-B, DQ1-ODD
1307                                         0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, // QW2,CHA-B, DQ1-ODD
1308                                         0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, // QW3,CHA-B, DQ1-ODD
1309                                         0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, // QW4,CHA-B, DQ1-ODD
1310                                         0x02020202,0x02020202,0x02020202,0x02020202, // QW5,CHA-B, DQ1-ODD
1311                                         0x02020202,0x02020202,0x02020202,0x02020202, // QW6,CHA-B, DQ1-ODD
1312                                         0x02020202,0x02020202,0x02020202,0x02020202, // QW7,CHA-B, DQ1-ODD
1313                                         0x04040404,0x04040404,0x04040404,0x04040404, // QW0,CHA-B, DQ2-ODD
1314                                         0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW1,CHA-B, DQ2-ODD
1315                                         0x04040404,0x04040404,0x04040404,0x04040404, // QW2,CHA-B, DQ2-ODD
1316                                         0x04040404,0x04040404,0x04040404,0x04040404, // QW3,CHA-B, DQ2-ODD
1317                                         0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW4,CHA-B, DQ2-ODD
1318                                         0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW5,CHA-B, DQ2-ODD
1319                                         0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW6,CHA-B, DQ2-ODD
1320                                         0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW7,CHA-B, DQ2-ODD
1321                                         0x08080808,0x08080808,0x08080808,0x08080808, // QW0,CHA-B, DQ3-ODD
1322                                         0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, // QW1,CHA-B, DQ3-ODD
1323                                         0x08080808,0x08080808,0x08080808,0x08080808, // QW2,CHA-B, DQ3-ODD
1324                                         0x08080808,0x08080808,0x08080808,0x08080808, // QW3,CHA-B, DQ3-ODD
1325                                         0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, // QW4,CHA-B, DQ3-ODD
1326                                         0x08080808,0x08080808,0x08080808,0x08080808, // QW5,CHA-B, DQ3-ODD
1327                                         0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, // QW6,CHA-B, DQ3-ODD
1328                                         0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, // QW7,CHA-B, DQ3-ODD
1329                                         0x10101010,0x10101010,0x10101010,0x10101010, // QW0,CHA-B, DQ4-ODD
1330                                         0x10101010,0x10101010,0x10101010,0x10101010, // QW1,CHA-B, DQ4-ODD
1331                                         0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, // QW2,CHA-B, DQ4-ODD
1332                                         0x10101010,0x10101010,0x10101010,0x10101010, // QW3,CHA-B, DQ4-ODD
1333                                         0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, // QW4,CHA-B, DQ4-ODD
1334                                         0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, // QW5,CHA-B, DQ4-ODD
1335                                         0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, // QW6,CHA-B, DQ4-ODD
1336                                         0x10101010,0x10101010,0x10101010,0x10101010, // QW7,CHA-B, DQ4-ODD
1337                                         0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW0,CHA-B, DQ5-ODD
1338                                         0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW1,CHA-B, DQ5-ODD
1339                                         0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW2,CHA-B, DQ5-ODD
1340                                         0x20202020,0x20202020,0x20202020,0x20202020, // QW3,CHA-B, DQ5-ODD
1341                                         0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW4,CHA-B, DQ5-ODD
1342                                         0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW5,CHA-B, DQ5-ODD
1343                                         0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW6,CHA-B, DQ5-ODD
1344                                         0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW7,CHA-B, DQ5-ODD
1345                                         0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW0,CHA-B, DQ6-ODD
1346                                         0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW1,CHA-B, DQ6-ODD
1347                                         0x40404040,0x40404040,0x40404040,0x40404040, // QW2,CHA-B, DQ6-ODD
1348                                         0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW3,CHA-B, DQ6-ODD
1349                                         0x40404040,0x40404040,0x40404040,0x40404040, // QW4,CHA-B, DQ6-ODD
1350                                         0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW5,CHA-B, DQ6-ODD
1351                                         0x40404040,0x40404040,0x40404040,0x40404040, // QW6,CHA-B, DQ6-ODD
1352                                         0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW7,CHA-B, DQ6-ODD
1353                                         0x80808080,0x80808080,0x80808080,0x80808080, // QW0,CHA-B, DQ7-ODD
1354                                         0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F, // QW1,CHA-B, DQ7-ODD
1355                                         0x80808080,0x80808080,0x80808080,0x80808080, // QW2,CHA-B, DQ7-ODD
1356                                         0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F, // QW3,CHA-B, DQ7-ODD
1357                                         0x80808080,0x80808080,0x80808080,0x80808080, // QW4,CHA-B, DQ7-ODD
1358                                         0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F, // QW5,CHA-B, DQ7-ODD
1359                                         0x80808080,0x80808080,0x80808080,0x80808080, // QW6,CHA-B, DQ7-ODD
1360                                         0x80808080,0x80808080,0x80808080,0x80808080  // QW7,CHA-B, DQ7-ODD
1361                 };
1362         uint8_t pattern_buf_x[64 * 18 + 16]; // We need to two cache line So have more 16 bytes to keep 16 byte alignment */
1363         uint8_t *buf_a;
1364
1365         unsigned pattern;
1366         uint32_t dword;
1367         uint32_t ecc_bit;
1368         unsigned Errors;
1369         unsigned channel;
1370         int i;
1371         unsigned DQSWrDelay;
1372         unsigned is_Width128 = sysinfo->meminfo[ctrl->node_id].is_Width128;
1373         uint8_t *dqs_delay_a = &sysinfo->dqs_delay_a[ctrl->node_id * 2*2*9]; //channel 2, direction 2 , bytelane *9
1374
1375         //enable SSE2
1376         enable_sse2();
1377
1378         //wrap32dis
1379         set_wrap32dis();
1380
1381         //disable ECC temp
1382         dword = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
1383         ecc_bit = dword & DCL_DimmEccEn;
1384         dword &= ~(DCL_DimmEccEn);
1385         pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dword);
1386
1387         //SetupDqsPattern
1388         buf_a = (uint8_t *)(((uint32_t)(&pattern_buf_x[0]) + 0x10) & (~0xf));
1389
1390         if(is_Width128){
1391                 pattern = 1;
1392                 for(i=0;i<16*18;i++) {
1393                         *((uint32_t *)(buf_a + i*4)) = TestPatternJD1b[i];
1394                  }
1395         }
1396         else {
1397                 pattern = 0;
1398                 for(i=0; i<16*9;i++) {
1399                         *((uint32_t *)(buf_a + i*4)) = TestPatternJD1a[i];
1400                 }
1401
1402         }
1403
1404         print_debug_dqs("\r\nTrainDQSRdWrPos: 0 ctrl ", ctrl->node_id, 0);
1405
1406         printk(BIOS_DEBUG, "TrainDQSRdWrPos: buf_a:%p\n", buf_a);
1407
1408         Errors = 0;
1409         channel = 0;
1410
1411         if (!(sysinfo->meminfo[ctrl->node_id].dimm_mask & 0x0F) &&
1412              (sysinfo->meminfo[ctrl->node_id].dimm_mask & 0xF0)) { /* channelB only? */
1413                 channel = 1;
1414         }
1415
1416         while( (channel<2) && (!Errors)) {
1417                 print_debug_dqs("\tTrainDQSRdWrPos: 1 channel ",channel, 1);
1418                 for(DQSWrDelay = 0; DQSWrDelay < 48; DQSWrDelay++) {
1419                         unsigned err;
1420                         SetDQSDelayAllCSR(ctrl, channel, DQS_WRITEDIR, DQSWrDelay);
1421                         print_debug_dqs("\t\tTrainDQSRdWrPos: 21 DQSWrDelay ", DQSWrDelay, 2);
1422                         err= TrainReadDQS(ctrl, channel, pattern, buf_a, dqs_delay_a, sysinfo);
1423                         print_debug_dqs("\t\tTrainDQSRdWrPos: 22 err ",err, 2);
1424                         if(err == 0) break;
1425                         Errors |= err;
1426                 }
1427
1428                 print_debug_dqs("\tTrainDQSRdWrPos: 3 DQSWrDelay ", DQSWrDelay, 1);
1429
1430                 if(DQSWrDelay < 48) {
1431                         Errors = TrainWriteDQS(ctrl, channel, pattern, buf_a, dqs_delay_a, sysinfo);
1432                         print_debug_dqs("\tTrainDQSRdWrPos: 4 Errors ", Errors, 1);
1433
1434                 }
1435                 channel++;
1436                 if(!is_Width128){
1437                         //FIXME: 64MuxMode??
1438                         channel++; // skip channel if 64-bit mode
1439                 }
1440         }
1441
1442         //Enable ECC again
1443         dword = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
1444         dword &= ~(DCL_DimmEccEn);
1445         dword |= ecc_bit;
1446         pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dword);
1447
1448         //Clear wrap32dis
1449
1450         clear_wrap32dis();
1451
1452         //restore SSE2 setting
1453         disable_sse2();
1454
1455         print_debug_dqs("TrainDQSRdWrPos: ", 5, 0);
1456
1457         return Errors;
1458
1459 }
1460 static inline uint8_t get_dqs_delay(unsigned channel, unsigned bytelane, unsigned direction, uint8_t *dqs_delay_a)
1461 {
1462         return dqs_delay_a[channel * 2*9 + direction * 9 + bytelane];
1463 }
1464
1465 static unsigned CalcEccDQSPos(unsigned channel,unsigned ByteLane0, unsigned ByteLane1, unsigned InterFactor, unsigned Direction, uint8_t *dqs_delay_a)
1466 /* InterFactor: 0: 100% ByteLane 0
1467                 0x80: 50% between ByteLane 0 and 1
1468                 0xff: 99.6% ByteLane 1 and 0.4% like 0
1469 */
1470 {
1471         unsigned DQSDelay0, DQSDelay1;
1472         unsigned DQSDelay;
1473
1474         DQSDelay0 = get_dqs_delay(channel, ByteLane0, Direction, dqs_delay_a);
1475         DQSDelay1 = get_dqs_delay(channel, ByteLane1, Direction, dqs_delay_a);
1476
1477         if(DQSDelay0>DQSDelay1) {
1478                 DQSDelay = DQSDelay0 - DQSDelay1;
1479                 InterFactor = 0xff - InterFactor;
1480         }
1481         else {
1482                 DQSDelay = DQSDelay1 - DQSDelay0;
1483         }
1484
1485         DQSDelay *= InterFactor;
1486
1487         DQSDelay >>= 8; // /255
1488
1489         if(DQSDelay0>DQSDelay1) {
1490                 DQSDelay += DQSDelay1;
1491         }
1492         else {
1493                 DQSDelay += DQSDelay0;
1494         }
1495
1496         return DQSDelay;
1497
1498 }
1499
1500 static void SetEccDQSRdWrPos(const struct mem_controller *ctrl, struct sys_info *sysinfo)
1501 {
1502         unsigned channel;
1503         unsigned ByteLane;
1504         unsigned Direction;
1505         unsigned lane0, lane1, ratio;
1506         unsigned dqs_delay;
1507
1508         unsigned direction[] = { DQS_READDIR, DQS_WRITEDIR };
1509         int i;
1510         uint8_t *dqs_delay_a = &sysinfo->dqs_delay_a[ctrl->node_id * 2*2*9]; //channel 2, direction 2 , bytelane *9
1511
1512         ByteLane = 8;
1513
1514         for(channel = 0; channel < 2; channel++) {
1515                 for(i=0;i<2;i++) {
1516                         Direction = direction[i];
1517                         lane0 = 4; lane1 = 5; ratio = 0;
1518                         dqs_delay = CalcEccDQSPos(channel, lane0, lane1, ratio, Direction, dqs_delay_a);
1519                         print_debug_dqs_pair("\t\tSetEccDQSRdWrPos: channel ", channel, Direction==DQS_READDIR? " R dqs_delay":" W dqs_delay",  dqs_delay, 2);
1520                         SetDQSDelayCSR(ctrl, channel, ByteLane, Direction, dqs_delay);
1521                         save_dqs_delay(channel, ByteLane, Direction, dqs_delay_a, dqs_delay);
1522                 }
1523         }
1524 }
1525
1526 static unsigned train_DqsRcvrEn(const struct mem_controller *ctrl, unsigned Pass, struct sys_info *sysinfo)
1527 {
1528         print_debug_dqs("\r\ntrain_DqsRcvrEn: begin ctrl ", ctrl->node_id, 0);
1529         if(TrainRcvrEn(ctrl, Pass, sysinfo)) {
1530                 return 1;
1531         }
1532         print_debug_dqs("\r\ntrain_DqsRcvrEn: end ctrl ", ctrl->node_id, 0);
1533         return 0;
1534
1535 }
1536 static unsigned train_DqsPos(const struct mem_controller *ctrl, struct sys_info *sysinfo)
1537 {
1538         print_debug_dqs("\r\ntrain_DqsPos: begin ctrl ", ctrl->node_id, 0);
1539         if(TrainDQSRdWrPos(ctrl, sysinfo) != 0) {
1540                 printk(BIOS_ERR, "\r\nDQS Training Rd Wr failed ctrl%02x\r\n", ctrl->node_id);
1541                 return 1;
1542         }
1543         else {
1544                 SetEccDQSRdWrPos(ctrl, sysinfo);
1545         }
1546         print_debug_dqs("\r\ntrain_DqsPos: end ctrl ", ctrl->node_id, 0);
1547         return 0;
1548
1549 }
1550
1551 #if K8_REV_F_SUPPORT_F0_F1_WORKAROUND == 1
1552 static void f0_svm_workaround(int controllers, const struct mem_controller *ctrl, tsc_t *tsc0, struct sys_info *sysinfo)
1553 {
1554         tsc_t tsc1[8];
1555         unsigned cpu_f0_f1[8];
1556         int i;
1557
1558         print_debug_addr("dqs_timing: tsc1[8] :", tsc1);
1559
1560         for(i = 0; i < controllers; i++) {
1561                 if (!sysinfo->ctrl_present[i])
1562                         continue;
1563
1564                 /* Skip everything if I don't have any memory on this controller */
1565                 if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
1566
1567                 uint32_t dword;
1568
1569                 cpu_f0_f1[i] = is_cpu_pre_f2_in_bsp(i);
1570
1571                 if(!cpu_f0_f1[i]) continue;
1572
1573                 dword = pci_read_config32(ctrl[i].f2, DRAM_CTRL);
1574                 dword &= ~DC_DqsRcvEnTrain;
1575                 pci_write_config32(ctrl[i].f2, DRAM_CTRL, dword);
1576
1577                 dword = pci_read_config32(ctrl[i].f2, DRAM_INIT);
1578                 dword |= DI_EnDramInit;
1579                 pci_write_config32(ctrl[i].f2, DRAM_INIT, dword);
1580                 dword &= ~DI_EnDramInit;
1581                 pci_write_config32(ctrl[i].f2, DRAM_INIT, dword);
1582
1583                 tsc1[i] = rdtsc();
1584                 print_debug_dqs_tsc("begin: tsc1", i, tsc1[i].hi, tsc1[i].lo, 2);
1585
1586                 dword = tsc1[i].lo + tsc0[i].lo;
1587                 if((dword<tsc1[i].lo) || (dword<tsc0[i].lo)) {
1588                         tsc1[i].hi++;
1589                 }
1590                 tsc1[i].lo = dword;
1591                 tsc1[i].hi+= tsc0[i].hi;
1592
1593                 print_debug_dqs_tsc("end  : tsc1", i, tsc1[i].hi, tsc1[i].lo, 2);
1594
1595         }
1596
1597         for(i = 0; i < controllers; i++) {
1598                 if (!sysinfo->ctrl_present[i])
1599                         continue;
1600
1601                 /* Skip everything if I don't have any memory on this controller */
1602                 if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
1603
1604                 if(!cpu_f0_f1[i]) continue;
1605
1606                 tsc_t tsc;
1607
1608                 do {
1609                         tsc = rdtsc();
1610                 } while ((tsc1[i].hi>tsc.hi) || ((tsc1[i].hi==tsc.hi) && (tsc1[i].lo>tsc.lo)));
1611
1612                 print_debug_dqs_tsc("end  : tsc ", i, tsc.hi, tsc.lo, 2);
1613         }
1614
1615 }
1616
1617 #endif
1618
1619
1620 /* setting variable mtrr, comes from linux kernel source */
1621 static void set_var_mtrr_dqs(
1622         unsigned int reg, unsigned long basek, unsigned long sizek,
1623         unsigned char type, unsigned address_bits)
1624 {
1625         msr_t base, mask;
1626         unsigned address_mask_high;
1627
1628         address_mask_high = ((1u << (address_bits - 32u)) - 1u);
1629
1630         base.hi = basek >> 22;
1631         base.lo  = basek << 10;
1632
1633         if (sizek < 4*1024*1024) {
1634                 mask.hi = address_mask_high;
1635                 mask.lo = ~((sizek << 10) -1);
1636         }
1637         else {
1638                 mask.hi = address_mask_high & (~((sizek >> 22) -1));
1639                 mask.lo = 0;
1640         }
1641
1642         if (reg >= 8)
1643                 return;
1644
1645         if (sizek == 0) {
1646                 msr_t zero;
1647                 zero.lo = zero.hi = 0;
1648                 /* The invalid bit is kept in the mask, so we simply clear the
1649                    relevant mask register to disable a range. */
1650                 wrmsr (MTRRphysMask_MSR(reg), zero);
1651         } else {
1652                 /* Bit 32-35 of MTRRphysMask should be set to 1 */
1653                 base.lo |= type;
1654                 mask.lo |= 0x800;
1655                 wrmsr (MTRRphysBase_MSR(reg), base);
1656                 wrmsr (MTRRphysMask_MSR(reg), mask);
1657         }
1658 }
1659
1660
1661 /* fms: find most sigificant bit set, stolen from Linux Kernel Source. */
1662 static inline unsigned int fms(unsigned int x)
1663 {
1664         int r;
1665
1666         __asm__("bsrl %1,%0\n\t"
1667                 "jnz 1f\n\t"
1668                 "movl $0,%0\n"
1669                 "1:" : "=r" (r) : "g" (x));
1670         return r;
1671 }
1672
1673 /* fls: find least sigificant bit set */
1674 static inline unsigned int fls(unsigned int x)
1675 {
1676         int r;
1677
1678         __asm__("bsfl %1,%0\n\t"
1679                 "jnz 1f\n\t"
1680                 "movl $32,%0\n"
1681                 "1:" : "=r" (r) : "g" (x));
1682         return r;
1683 }
1684
1685 static unsigned int range_to_mtrr(unsigned int reg,
1686         unsigned long range_startk, unsigned long range_sizek,
1687         unsigned long next_range_startk, unsigned char type, unsigned address_bits)
1688 {
1689         if (!range_sizek || (reg >= 8)) {
1690                 return reg;
1691         }
1692         while(range_sizek) {
1693                 unsigned long max_align, align;
1694                 unsigned long sizek;
1695                 /* Compute the maximum size I can make a range */
1696                 max_align = fls(range_startk);
1697                 align = fms(range_sizek);
1698                 if (align > max_align) {
1699                         align = max_align;
1700                 }
1701                 sizek = 1 << align;
1702 #if CONFIG_MEM_TRAIN_SEQ != 1
1703                 printk(BIOS_DEBUG, "Setting variable MTRR %d, base: %4dMB, range: %4dMB, type %s\r\n",
1704                         reg, range_startk >>10, sizek >> 10,
1705                         (type==MTRR_TYPE_UNCACHEABLE)?"UC":
1706                             ((type==MTRR_TYPE_WRBACK)?"WB":"Other")
1707                         );
1708 #endif
1709                 set_var_mtrr_dqs(reg++, range_startk, sizek, type, address_bits);
1710                 range_startk += sizek;
1711                 range_sizek -= sizek;
1712                 if (reg >= 8)
1713                         break;
1714         }
1715         return reg;
1716 }
1717
1718 static void set_top_mem_ap(unsigned tom_k, unsigned tom2_k)
1719 {
1720         msr_t msr;
1721
1722         /* Now set top of memory */
1723         msr.lo = (tom2_k & 0x003fffff) << 10;
1724         msr.hi = (tom2_k & 0xffc00000) >> 22;
1725         wrmsr(TOP_MEM2, msr);
1726
1727         msr.lo = (tom_k & 0x003fffff) << 10;
1728         msr.hi = (tom_k & 0xffc00000) >> 22;
1729         wrmsr(TOP_MEM, msr);
1730 }
1731
1732 static void setup_mtrr_dqs(unsigned tom_k, unsigned tom2_k)
1733 {
1734         unsigned reg;
1735         msr_t msr;
1736
1737 #if 0
1738         //still enable from cache_as_ram.inc
1739         msr = rdmsr(SYSCFG_MSR);
1740         msr.lo |= SYSCFG_MSR_MtrrFixDramModEn;
1741         wrmsr(SYSCFG_MSR,msr);
1742 #endif
1743
1744         //[0,512k), [512k, 640k)
1745         msr.hi = 0x1e1e1e1e;
1746         msr.lo = msr.hi;
1747         wrmsr(0x250, msr);
1748         wrmsr(0x258, msr);
1749
1750         //[1M, TOM)
1751         reg = range_to_mtrr(2, 0, tom_k,4*1024*1024, MTRR_TYPE_WRBACK, 40);
1752
1753         //[4G, TOM2)
1754         if(tom2_k) {
1755                 //enable tom2 and type
1756                 msr = rdmsr(SYSCFG_MSR);
1757                 msr.lo |= (1<<21) | (1<<22); //MtrrTom2En and Tom2ForceMemTypeWB
1758                 wrmsr(SYSCFG_MSR, msr);
1759         }
1760
1761 }
1762
1763 static void clear_mtrr_dqs(unsigned tom2_k)
1764 {
1765         msr_t msr;
1766         unsigned i;
1767
1768         //still enable from cache_as_ram.inc
1769         msr = rdmsr(SYSCFG_MSR);
1770         msr.lo |= SYSCFG_MSR_MtrrFixDramModEn;
1771         wrmsr(SYSCFG_MSR,msr);
1772
1773         //[0,512k), [512k, 640k)
1774         msr.hi = 0;
1775         msr.lo = msr.hi;
1776         wrmsr(0x250, msr);
1777         wrmsr(0x258, msr);
1778
1779         //[1M, TOM)
1780         for(i=0x204;i<0x210;i++) {
1781                 wrmsr(i, msr);
1782         }
1783
1784         //[4G, TOM2)
1785         if(tom2_k) {
1786                 //enable tom2 and type
1787                 msr = rdmsr(SYSCFG_MSR);
1788                 msr.lo &= ~((1<<21) | (1<<22)); //MtrrTom2En and Tom2ForceMemTypeWB
1789                 wrmsr(SYSCFG_MSR, msr);
1790         }
1791 }
1792
1793 static void set_htic_bit(unsigned i, unsigned val, unsigned bit)
1794 {
1795         uint32_t dword;
1796         dword = pci_read_config32(PCI_DEV(0, 0x18+i, 0), HT_INIT_CONTROL);
1797         dword &= ~(1<<bit);
1798         dword |= ((val & 1) <<bit);
1799         pci_write_config32(PCI_DEV(0, 0x18+i, 0), HT_INIT_CONTROL, dword);
1800 }
1801
1802
1803 static unsigned get_htic_bit(unsigned i, unsigned bit)
1804 {
1805         uint32_t dword;
1806         dword = pci_read_config32(PCI_DEV(0, 0x18+i, 0), HT_INIT_CONTROL);
1807         dword &= (1<<bit);
1808         return dword;
1809 }
1810
1811 static void wait_till_sysinfo_in_ram(void)
1812 {
1813         while(1) {
1814                 if(get_htic_bit(0, 9)) return;
1815         }
1816 }
1817
1818 static void set_sysinfo_in_ram(unsigned val)
1819 {
1820         set_htic_bit(0, val, 9);
1821 }
1822
1823 #ifdef S3_NVRAM_EARLY
1824 int s3_save_nvram_early(u32 dword, int size, int  nvram_pos);
1825 int s3_load_nvram_early(int size, u32 *old_dword, int nvram_pos);
1826 #else
1827 int s3_save_nvram_early(u32 dword, int size, int  nvram_pos)
1828 {
1829         return nvram_pos;
1830 }
1831
1832 int s3_load_nvram_early(int size, u32 *old_dword, int nvram_pos)
1833 {
1834         die("No memory NVRAM loader for DQS data! Unable to restore memory state\n");
1835
1836         return nvram_pos; /* Make GCC happy */
1837 }
1838 #endif
1839
1840 static int save_index_to_pos(unsigned int dev, int size, int index, int nvram_pos)
1841 {
1842         u32 dword = pci_read_config32_index_wait(dev, 0x98, index);
1843
1844         return s3_save_nvram_early(dword, size, nvram_pos);
1845 }
1846
1847 static int load_index_to_pos(unsigned int dev, int size, int index, int nvram_pos)
1848 {
1849
1850         u32 old_dword = pci_read_config32_index_wait(dev, 0x98, index);
1851         nvram_pos = s3_load_nvram_early(size, &old_dword, nvram_pos);
1852         pci_write_config32_index_wait(dev, 0x98, index, old_dword);
1853         return nvram_pos;
1854 }
1855
1856 static int dqs_load_MC_NVRAM_ch(unsigned int dev, int ch, int pos)
1857 {
1858         /* 30 bytes per channel */
1859         ch *= 0x20;
1860         pos = load_index_to_pos(dev, 4, 0x00 + ch, pos);
1861         pos = load_index_to_pos(dev, 4, 0x01 + ch, pos);
1862         pos = load_index_to_pos(dev, 4, 0x02 + ch, pos);
1863         pos = load_index_to_pos(dev, 1, 0x03 + ch, pos);
1864         pos = load_index_to_pos(dev, 4, 0x04 + ch, pos);
1865         pos = load_index_to_pos(dev, 4, 0x05 + ch, pos);
1866         pos = load_index_to_pos(dev, 4, 0x06 + ch, pos);
1867         pos = load_index_to_pos(dev, 1, 0x07 + ch, pos);
1868         pos = load_index_to_pos(dev, 1, 0x10 + ch, pos);
1869         pos = load_index_to_pos(dev, 1, 0x13 + ch, pos);
1870         pos = load_index_to_pos(dev, 1, 0x16 + ch, pos);
1871         pos = load_index_to_pos(dev, 1, 0x19 + ch, pos);
1872         return pos;
1873 }
1874
1875 static int dqs_save_MC_NVRAM_ch(unsigned int dev, int ch, int pos)
1876 {
1877         /* 30 bytes per channel */
1878         ch *= 0x20;
1879         pos = save_index_to_pos(dev, 4, 0x00 + ch, pos);
1880         pos = save_index_to_pos(dev, 4, 0x01 + ch, pos);
1881         pos = save_index_to_pos(dev, 4, 0x02 + ch, pos);
1882         pos = save_index_to_pos(dev, 1, 0x03 + ch, pos);
1883         pos = save_index_to_pos(dev, 4, 0x04 + ch, pos);
1884         pos = save_index_to_pos(dev, 4, 0x05 + ch, pos);
1885         pos = save_index_to_pos(dev, 4, 0x06 + ch, pos);
1886         pos = save_index_to_pos(dev, 1, 0x07 + ch, pos);
1887         pos = save_index_to_pos(dev, 1, 0x10 + ch, pos);
1888         pos = save_index_to_pos(dev, 1, 0x13 + ch, pos);
1889         pos = save_index_to_pos(dev, 1, 0x16 + ch, pos);
1890         pos = save_index_to_pos(dev, 1, 0x19 + ch, pos);
1891         return pos;
1892 }
1893
1894 static void dqs_save_MC_NVRAM(unsigned int dev)
1895 {
1896         int pos = 0;
1897         u32 reg;
1898         printk(BIOS_DEBUG, "DQS SAVE NVRAM: %x\n", dev);
1899         pos = dqs_save_MC_NVRAM_ch(dev, 0, pos);
1900         pos = dqs_save_MC_NVRAM_ch(dev, 1, pos);
1901         /* save the maxasync lat here */
1902         reg = pci_read_config32(dev, DRAM_CONFIG_HIGH);
1903         pos = s3_save_nvram_early(reg, 4, pos);
1904 }
1905
1906 static void dqs_restore_MC_NVRAM(unsigned int dev)
1907 {
1908         int pos = 0;
1909         u32 reg;
1910
1911         printk(BIOS_DEBUG, "DQS RESTORE FROM NVRAM: %x\n", dev);
1912         pos = dqs_load_MC_NVRAM_ch(dev, 0, pos);
1913         pos = dqs_load_MC_NVRAM_ch(dev, 1, pos);
1914         /* load the maxasync lat here */
1915         pos = s3_load_nvram_early(4, &reg, pos);
1916         reg &= (DCH_MaxAsyncLat_MASK <<DCH_MaxAsyncLat_SHIFT);
1917         reg |= pci_read_config32(dev, DRAM_CONFIG_HIGH);
1918         pci_write_config32(dev, DRAM_CONFIG_HIGH, reg);
1919 }
1920
1921 #if CONFIG_MEM_TRAIN_SEQ == 0
1922 #if K8_REV_F_SUPPORT_F0_F1_WORKAROUND == 1
1923 static void dqs_timing(int controllers, const struct mem_controller *ctrl, tsc_t *tsc0, struct sys_info *sysinfo)
1924 #else
1925 static void dqs_timing(int controllers, const struct mem_controller *ctrl, struct sys_info *sysinfo)
1926 #endif
1927 {
1928         int  i;
1929
1930         tsc_t tsc[5];
1931
1932         //need to enable mtrr, so dqs training could access the test address
1933         setup_mtrr_dqs(sysinfo->tom_k, sysinfo->tom2_k);
1934
1935         for(i = 0; i < controllers; i++) {
1936                 if (!sysinfo->ctrl_present[ i ])
1937                         continue;
1938
1939                 /* Skip everything if I don't have any memory on this controller */
1940                 if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
1941
1942                 fill_mem_cs_sysinfo(i, ctrl+i, sysinfo);
1943         }
1944
1945         tsc[0] = rdtsc();
1946         for(i = 0; i < controllers; i++) {
1947                 if (!sysinfo->ctrl_present[ i ])
1948                         continue;
1949
1950                 /* Skip everything if I don't have any memory on this controller */
1951                 if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
1952
1953                 printk(BIOS_DEBUG, "DQS Training:RcvrEn:Pass1: %02x\n", i);
1954                 if(train_DqsRcvrEn(ctrl+i, 1, sysinfo)) goto out;
1955                 printk(BIOS_DEBUG, " done\r\n");
1956         }
1957
1958         tsc[1] = rdtsc();
1959 #if K8_REV_F_SUPPORT_F0_F1_WORKAROUND == 1
1960         f0_svm_workaround(controllers, ctrl, tsc0, sysinfo);
1961 #endif
1962
1963         tsc[2] = rdtsc();
1964         for(i = 0; i < controllers; i++) {
1965                 if (!sysinfo->ctrl_present[i])
1966                         continue;
1967
1968                 /* Skip everything if I don't have any memory on this controller */
1969                 if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
1970
1971                 printk(BIOS_DEBUG, "DQS Training:DQSPos: %02x\n", i);
1972                 if(train_DqsPos(ctrl+i, sysinfo)) goto out;
1973                 printk(BIOS_DEBUG, " done\r\n");
1974         }
1975
1976         tsc[3] = rdtsc();
1977         for(i = 0; i < controllers; i++) {
1978                 if (!sysinfo->ctrl_present[i])
1979                         continue;
1980
1981                 /* Skip everything if I don't have any memory on this controller */
1982                 if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
1983
1984                 printk(BIOS_DEBUG, "DQS Training:RcvrEn:Pass2: %02x\n", i);
1985                 if(train_DqsRcvrEn(ctrl+i, 2, sysinfo)) goto out;
1986                 printk(BIOS_DEBUG, " done\r\n");
1987                 sysinfo->mem_trained[i]=1;
1988                 dqs_save_MC_NVRAM((ctrl+i)->f2);
1989         }
1990
1991 out:
1992         tsc[4] = rdtsc();
1993         clear_mtrr_dqs(sysinfo->tom2_k);
1994
1995
1996         for(i=0;i<5;i++) {
1997                 print_debug_dqs_tsc_x("DQS Training:tsc", i,  tsc[i].hi, tsc[i].lo);
1998         }
1999
2000
2001
2002 }
2003
2004 #endif
2005
2006
2007 #if CONFIG_MEM_TRAIN_SEQ > 0
2008
2009 static void dqs_timing(int i, const struct mem_controller *ctrl, struct sys_info *sysinfo, unsigned v)
2010 {
2011
2012         int ii;
2013
2014          tsc_t tsc[4];
2015
2016         if(sysinfo->mem_trained[i] != 0x80) return;
2017
2018 #if CONFIG_MEM_TRAIN_SEQ == 1
2019         //need to enable mtrr, so dqs training could access the test address
2020         setup_mtrr_dqs(sysinfo->tom_k, sysinfo->tom2_k);
2021 #endif
2022
2023         fill_mem_cs_sysinfo(i, ctrl, sysinfo);
2024
2025         if(v) {
2026                 tsc[0] = rdtsc();
2027
2028                 printk(BIOS_DEBUG, "set DQS timing:RcvrEn:Pass1: %02x\n", i);
2029         }
2030         if(train_DqsRcvrEn(ctrl, 1,  sysinfo)) {
2031                 sysinfo->mem_trained[i]=0x81; //
2032                 goto out;
2033         }
2034
2035         if(v) {
2036                 printk(BIOS_DEBUG, " done\r\n");
2037                 tsc[1] = rdtsc();
2038                 printk(BIOS_DEBUG, "set DQS timing:DQSPos: %02x\n", i);
2039         }
2040
2041         if(train_DqsPos(ctrl, sysinfo)) {
2042                 sysinfo->mem_trained[i]=0x82; //
2043                 goto out;
2044         }
2045
2046         if(v) {
2047                 printk(BIOS_DEBUG, " done\r\n");
2048                 tsc[2] = rdtsc();
2049
2050                 printk(BIOS_DEBUG, "set DQS timing:RcvrEn:Pass2: %02x\n", i);
2051         }
2052         if(train_DqsRcvrEn(ctrl, 2,  sysinfo)){
2053                 sysinfo->mem_trained[i]=0x83; //
2054                 goto out;
2055         }
2056
2057         if(v) {
2058                 printk(BIOS_DEBUG, " done\r\n");
2059
2060                 tsc[3] = rdtsc();
2061         }
2062
2063 out:
2064 #if CONFIG_MEM_TRAIN_SEQ == 1
2065         clear_mtrr_dqs(sysinfo->tom2_k);
2066 #endif
2067
2068         if(v) {
2069                 for(ii=0;ii<4;ii++) {
2070                       print_debug_dqs_tsc_x("Total DQS Training : tsc ", ii,  tsc[ii].hi, tsc[ii].lo);
2071                 }
2072         }
2073
2074         if(sysinfo->mem_trained[i] == 0x80) {
2075                 sysinfo->mem_trained[i]=1;
2076         }
2077
2078 }
2079 #endif
2080
2081 #if CONFIG_MEM_TRAIN_SEQ == 1
2082 static void train_ram(unsigned nodeid, struct sys_info *sysinfo, struct sys_info *sysinfox)
2083 {
2084         dqs_timing(nodeid, &sysinfo->ctrl[nodeid], sysinfo, 0); // keep the output tidy
2085 //      memcpy(&sysinfox->dqs_rcvr_dly_a[nodeid * 2 * 8],&sysinfo->dqs_rcvr_dly_a[nodeid * 2 * 8], 2*8);
2086 //      memcpy(&sysinfox->dqs_delay_a[nodeid * 2 * 2 * 9], &sysinfo->dqs_delay_a[nodeid * 2 * 2 * 9], 2 * 2 * 9);
2087         sysinfox->mem_trained[nodeid] = sysinfo->mem_trained[nodeid];
2088
2089 }
2090 static void copy_and_run_ap_code_in_car(unsigned ret_addr);
2091 static inline void train_ram_on_node(unsigned nodeid, unsigned coreid, struct sys_info *sysinfo, unsigned retcall)
2092 {
2093         if(coreid) return; // only do it on core0
2094         struct sys_info *sysinfox = (void*)((CONFIG_RAMTOP) - CONFIG_DCACHE_RAM_GLOBAL_VAR_SIZE);
2095         wait_till_sysinfo_in_ram(); // use pci to get it
2096
2097         if(sysinfox->mem_trained[nodeid] == 0x80) {
2098         #if 0
2099                 sysinfo->tom_k = sysinfox->tom_k;
2100                 sysinfo->tom2_k = sysinfox->tom2_k;
2101                 sysinfo->meminfo[nodeid].is_Width128 = sysinfox->meminfo[nodeid].is_Width128;
2102                 sysinfo->mem_trained[nodeid] = sysinfox->mem_trained[nodeid];
2103                 memcpy(&sysinfo->ctrl[nodeid], &sysinfox->ctrl[nodeid], sizeof(struct mem_controller));
2104         #else
2105                 memcpy(sysinfo, sysinfox, CONFIG_DCACHE_RAM_GLOBAL_VAR_SIZE);
2106         #endif
2107                 set_top_mem_ap(sysinfo->tom_k, sysinfo->tom2_k); // keep the ap's tom consistent with bsp's
2108         #if CONFIG_AP_CODE_IN_CAR == 0
2109                 printk(BIOS_DEBUG, "CODE IN ROM AND RUN ON NODE: %02x\n", nodeid);
2110                 train_ram(nodeid, sysinfo, sysinfox);
2111         #else
2112                 /* Can copy dqs_timing to ap cache and run from cache?
2113                 * we need coreboot_ap_car.rom? and treat it as coreboot_ram.rom for ap ?
2114                 */
2115                 copy_and_run_ap_code_in_car(retcall);
2116                 // will go back by jump
2117         #endif
2118         }
2119 }
2120 #endif