16faf489cff4f5256f74a2384ff8def7be924bd1
[coreboot.git] / src / northbridge / amd / amdk8 / raminit_f_dqs.c
1 /*
2  * This file is part of the coreboot project.
3  *
4  * Copyright (C) 2005 YingHai Lu
5  * Copyright (C) 2008 Advanced Micro Devices, Inc.
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; version 2 of the License.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
19 */
20
21 //0: mean no debug info
22 #define DQS_TRAIN_DEBUG 0
23
24 #if CONFIG_USE_PRINTK_IN_CAR
25 #else
26 #error This file needs CONFIG_USE_PRINTK_IN_CAR
27 #endif
28
29 static inline void print_debug_dqs(const char *str, unsigned val, unsigned level)
30 {
31 #if DQS_TRAIN_DEBUG > 0
32         if(DQS_TRAIN_DEBUG > level) {
33                 printk_debug("%s%x\r\n", str, val);
34         }
35 #endif
36 }
37
38 static inline void print_debug_dqs_pair(const char *str, unsigned val, const char *str2, unsigned val2, unsigned level)
39 {
40 #if DQS_TRAIN_DEBUG > 0
41         if(DQS_TRAIN_DEBUG > level) {
42                 printk_debug("%s%08x%s%08x\r\n", str, val, str2, val2);
43         }
44 #endif
45 }
46
47 static inline void print_debug_dqs_tsc(const char *str, unsigned i, unsigned val, unsigned val2, unsigned level)
48 {
49 #if DQS_TRAIN_DEBUG > 0
50         if(DQS_TRAIN_DEBUG > level) {
51                 printk_debug("%s[%02x]=%08x%08x\r\n", str, i, val, val2);
52         }
53 #endif
54 }
55
56 static inline void print_debug_dqs_tsc_x(const char *str, unsigned i, unsigned val, unsigned val2)
57 {
58         printk_debug("%s[%02x]=%08x%08x\r\n", str, i, val, val2);
59
60 }
61
62 static void fill_mem_cs_sysinfo(unsigned nodeid, const struct mem_controller *ctrl, struct sys_info *sysinfo)
63 {
64
65         int i;
66         sysinfo->mem_base[nodeid] = pci_read_config32(ctrl->f1, 0x40 + (nodeid<<3));
67
68         for(i=0;i<8; i++) {
69                 sysinfo->cs_base[nodeid*8+i] = pci_read_config32(ctrl->f2, 0x40 + (i<<2));
70         }
71
72         sysinfo->hole_reg[nodeid] = pci_read_config32(ctrl->f1, 0xf0);
73
74 }
75 static unsigned Get_MCTSysAddr(const struct mem_controller *ctrl,  unsigned cs_idx, struct sys_info *sysinfo)
76 {
77         uint32_t dword;
78         uint32_t mem_base;
79         unsigned nodeid = ctrl->node_id;
80
81 #if CONFIG_HW_MEM_HOLE_SIZEK != 0
82         uint32_t hole_reg;
83 #endif
84
85         //get the local base addr of the chipselect
86         dword = sysinfo->cs_base[nodeid * 8 + cs_idx];
87         dword &= 0xfffffff0;
88
89         //sys addr= node base + local cs base
90         mem_base = sysinfo->mem_base[nodeid];
91         mem_base &= 0xffff0000;
92
93         dword += mem_base;
94 #if CONFIG_HW_MEM_HOLE_SIZEK != 0
95         hole_reg = sysinfo->hole_reg[nodeid];
96         if(hole_reg & 1) {
97                 unsigned hole_startk;
98                 hole_startk = (hole_reg & (0xff<<24)) >> 10;
99                 if( (dword >= (hole_startk<<2)) && (dword < ((4*1024*1024)<<2))) {
100                         dword += ((4*1024*1024 - hole_startk)<<2);
101                 }
102         }
103 #endif
104
105         //add 1MB offset to avoid compat area
106         dword += (1<<(20-8));
107
108         //So final result is upper 32 bit addr
109
110         return dword;
111
112 }
113
114 static unsigned Get_RcvrSysAddr(const struct mem_controller * ctrl, unsigned channel, unsigned cs_idx, struct sys_info *sysinfo)
115 {
116         return Get_MCTSysAddr(ctrl, cs_idx, sysinfo);
117
118 }
119
120 static inline unsigned long read_cr4(void)
121 {
122         unsigned long cr4;
123         asm volatile ("movl %%cr4, %0" : "=r" (cr4));
124         return cr4;
125 }
126
127 static inline void write_cr4(unsigned long cr4)
128 {
129         asm volatile ("movl %0, %%cr4" : : "r" (cr4));
130 }
131
132
133 static inline void enable_sse2()
134 {
135         unsigned long cr4;
136         cr4 = read_cr4();
137         cr4 |= (1<<9);
138         write_cr4(cr4);
139 }
140
141 static inline void disable_sse2()
142 {
143         unsigned long cr4;
144         cr4 = read_cr4();
145         cr4 &= ~(1<<9);
146         write_cr4(cr4);
147 }
148
149
150 static void set_wrap32dis(void) {
151         msr_t msr;
152
153         msr = rdmsr(0xc0010015);
154         msr.lo |= (1<<17);
155
156         wrmsr(0xc0010015, msr);
157
158 }
159
160 static void clear_wrap32dis(void) {
161         msr_t msr;
162
163         msr = rdmsr(0xc0010015);
164         msr.lo &= ~(1<<17);
165
166         wrmsr(0xc0010015, msr);
167
168 }
169
170 static void set_FSBASE(uint32_t addr_hi)
171 {
172         msr_t msr;
173
174         //set fs and use fs prefix to access the mem
175         msr.hi = addr_hi;
176         msr.lo = 0;
177         wrmsr(0xc0000100, msr); //FS_BASE
178
179 }
180
181 static unsigned ChipSelPresent(const struct mem_controller *ctrl, unsigned cs_idx, struct sys_info *sysinfo)
182 {
183         unsigned enabled;
184         unsigned nodeid = ctrl->node_id;
185
186
187         enabled = sysinfo->cs_base[nodeid * 8 + cs_idx];
188         enabled &= 1;
189
190         return enabled;
191
192 }
193
194 static unsigned RcvrRankEnabled(const struct mem_controller *ctrl, int channel, int cs_idx, unsigned is_Width128, struct sys_info *sysinfo)
195 {
196         return ChipSelPresent(ctrl, cs_idx, sysinfo);
197 }
198
199 static void WriteLNTestPattern(unsigned addr_lo, uint8_t *buf_a, unsigned line_num)
200 {
201         __asm__ volatile (
202                 "1:\n\t"
203                 "movdqa (%3), %%xmm0\n\t"
204                 "movntdq %%xmm0, %%fs:(%0)\n\t" /* xmm0 is 128 bit */
205                 "addl %1, %0\n\t"
206                 "addl %1, %3\n\t"
207                 "loop 1b\n\t"
208
209                 :: "a" (addr_lo), "d" (16), "c" (line_num * 4), "b"(buf_a)
210         );
211
212
213 }
214
215 static void Write1LTestPattern(unsigned addr, unsigned p, uint8_t *buf_a, uint8_t *buf_b)
216 {
217         uint8_t *buf;
218         if(p==1) { buf = buf_b; }
219         else { buf = buf_a; }
220
221         set_FSBASE (addr>>24);
222
223         WriteLNTestPattern(addr<<8, buf, 1);
224 }
225
226 static void Read1LTestPattern(unsigned addr)
227 {
228         unsigned value;
229
230         set_FSBASE(addr>>24);
231
232         /* 1st move causes read fill (to exclusive or shared)*/
233         __asm__ volatile (
234                 "movl %%fs:(%1), %0\n\t"
235                 :"=b"(value): "a" (addr<<8)
236         );
237
238 }
239
240 #define DQS_PASS 0
241 #define DQS_FAIL 1
242
243 #define DQS_FIRST_PASS 1
244 #define DQS_SECOND_PASS 2
245
246 #define SB_NORCVREN 11
247 #define RCVREN_MARGIN 6
248 #define SB_SmallRCVR 13
249 #define SB_CHA2BRCVREN 12
250 #define SB_NODQSPOS  14
251 #define MIN_DQS_WNDW 3
252 #define SB_SMALLDQS 15
253
254
255 static unsigned CompareTestPatternQW0(unsigned channel, unsigned addr, unsigned pattern, const uint32_t *TestPattern0, const uint32_t *TestPattern1, const uint32_t *TestPattern2, unsigned Pass, unsigned is_Width128)
256 {
257         uint32_t addr_lo;
258         uint32_t *test_buf;
259         uint32_t value;
260         uint32_t value_test;
261         unsigned result = DQS_FAIL;
262
263         if(Pass == DQS_FIRST_PASS) {
264                 if(pattern==1) {
265                         test_buf = (uint32_t *)TestPattern1;
266                 }
267                 else {
268                         test_buf = (uint32_t *)TestPattern0;
269                 }
270         }
271         else {
272                 test_buf = (uint32_t *)TestPattern2;
273         }
274
275         set_FSBASE(addr>>24);
276
277         addr_lo = addr<<8;
278
279         if(is_Width128 && (channel == 1)) {
280                 addr_lo += 8; //second channel
281                 test_buf += 2;
282         }
283
284         __asm__ volatile (
285                 "movl %%fs:(%1), %0\n\t"
286                 :"=b"(value): "a" (addr_lo)
287         );
288
289         value_test = *test_buf;
290
291
292         print_debug_dqs_pair("\t\t\t\t\t\tQW0.lo : test_buf= ", (unsigned)test_buf, " value = ", value_test, 4);
293         print_debug_dqs_pair("\t\t\t\t\t\tQW0.lo : addr_lo = ", addr_lo, " value = ", value, 4);
294
295         if(value == value_test) {
296                 addr_lo += 4;
297                 test_buf++;
298                 __asm__ volatile (
299                         "movl %%fs:(%1), %0\n\t"
300                         :"=b"(value): "a" (addr_lo)
301                 );
302                 value_test = *test_buf;
303                 print_debug_dqs_pair("\t\t\t\t\t\tQW0.hi : test_buf= ", (unsigned)test_buf, " value = ", value_test, 4);
304                 print_debug_dqs_pair("\t\t\t\t\t\tQW0.hi : addr_lo = ", addr_lo, " value = ", value, 4);
305
306                 if(value == value_test){
307                         result =  DQS_PASS;
308                 }
309         }
310
311         if(Pass == DQS_SECOND_PASS) { // second pass need to be inverted
312                 if(result==DQS_PASS) {
313                         result = DQS_FAIL;
314                 }
315                 else {
316                         result = DQS_PASS;
317                 }
318         }
319
320         return result;
321
322 }
323
324 static void SetMaxAL_RcvrDly(const struct mem_controller *ctrl, unsigned dly)
325 {
326         uint32_t reg;
327
328         dly += (20-1); // round it
329         dly /= 20; // convert from unit 50ps to 1ns
330
331         dly += 6;
332
333
334         reg = pci_read_config32(ctrl->f2, DRAM_CONFIG_HIGH);
335         reg &= ~(DCH_MaxAsyncLat_MASK <<DCH_MaxAsyncLat_SHIFT);
336         reg |= ((dly - DCH_MaxAsyncLat_BASE) << DCH_MaxAsyncLat_SHIFT);
337         pci_write_config32(ctrl->f2, DRAM_CONFIG_HIGH, reg);
338
339 }
340
341 /*
342         Set the Target range to WT IO (using an IORR overlapping the already existing
343         WB dram type). Use IORR0
344 */
345 static void SetTargetWTIO(unsigned addr)
346 {
347         msr_t msr;
348         msr.hi = addr>>24;
349         msr.lo = addr<<8;
350         wrmsr(0xc0010016, msr); //IORR0 BASE
351
352         msr.hi = 0xff;
353         msr.lo = 0xfc000800;  // 64MB Mask
354         wrmsr(0xc0010017, msr); // IORR0 Mask
355 }
356
357 static void ResetTargetWTIO(void)
358 {
359         msr_t msr;
360
361         msr.hi = 0;
362         msr.lo = 0;
363         wrmsr(0xc0010017, msr); // IORR0 Mask
364 }
365
366 static void proc_CLFLUSH(unsigned addr)
367 {
368
369         set_FSBASE(addr>>24);
370
371         /* 1st move causes read fill (to exclusive or shared)*/
372         __asm__ volatile (
373                         /* clflush fs:[eax] */
374                 "clflush %%fs:(%0)\n\t"
375                 ::"a" (addr<<8)
376         );
377
378 }
379 static void proc_IOCLFLUSH(unsigned addr)
380 {
381         SetTargetWTIO(addr);
382         proc_CLFLUSH(addr);
383         ResetTargetWTIO();
384 }
385
386 static void ResetDCTWrPtr(const struct mem_controller *ctrl)
387 {
388         uint32_t dword;
389         unsigned index = 0x10;
390
391         dword = pci_read_config32_index_wait(ctrl->f2, 0x98, index);
392         pci_write_config32_index_wait(ctrl->f2, 0x98, index, dword);
393
394         index += 0x20;
395         dword = pci_read_config32_index_wait(ctrl->f2, 0x98, index);
396         pci_write_config32_index_wait(ctrl->f2, 0x98, index, dword);
397
398 }
399
400
401 static uint16_t get_exact_T1000(unsigned i)
402 {
403         //                                 200   266,   333,  400
404         static const uint16_t T1000_a[]= { 5000, 3759, 3003, 2500 };
405
406         static const uint16_t TT_a[] = {
407                  /*200   266   333   400 */
408          /*4 */   6250, 6250, 6250, 6250,
409          /*5 */   5000, 5000, 5000, 2500,
410          /*6 */   5000, 4166, 4166, 2500,
411          /*7 */   5000, 4285, 3571, 2500,
412
413          /*8 */   5000, 3750, 3125, 2500,
414          /*9 */   5000, 3888, 3333, 2500,
415          /*10*/   5000, 4000, 3000, 2500,
416          /*11*/   5000, 4090, 3181, 2500,
417
418          /*12*/   5000, 3750, 3333, 2500,
419          /*13*/   5000, 3846, 3076, 2500,
420          /*14*/   5000, 3928, 3214, 2500,
421          /*15*/   5000, 4000, 3000, 2500,
422         };
423
424         int index;
425         msr_t msr;
426
427         /* Check for FID control support */
428         struct cpuid_result cpuid1;
429         cpuid1 = cpuid(0x80000007);
430         if( cpuid1.edx & 0x02 ) {
431                 /* Use current FID */
432                 unsigned fid_cur;
433                 msr = rdmsr(0xc0010042);
434                 fid_cur = msr.lo & 0x3f;
435
436                 index = fid_cur>>1;
437         } else {
438                 /* Use startup FID */
439                 unsigned fid_start;
440                 msr = rdmsr(0xc0010015);
441                 fid_start = (msr.lo & (0x3f << 24));
442                 
443                 index = fid_start>>25;
444         }
445
446         if(index>12) return T1000_a[i];
447
448         return TT_a[index * 4+i];
449
450 }
451
452 static void InitDQSPos4RcvrEn(const struct mem_controller *ctrl)
453 {
454         int i;
455         uint32_t dword;
456
457         dword = 0x00000000;
458         for(i=1; i<=3; i++) {
459                 /* Program the DQS Write Timing Control Registers (Function 2:Offset 0x9c, index 0x01-0x03, 0x21-0x23) to 0x00 for all bytes */
460                 pci_write_config32_index_wait(ctrl->f2, 0x98, i, dword);
461                 pci_write_config32_index_wait(ctrl->f2, 0x98, i+0x20, dword);
462         }
463
464         dword = 0x2f2f2f2f;
465         for(i=5; i<=7; i++) {
466                 /* Program the DQS Write Timing Control Registers (Function 2:Offset 0x9c, index 0x05-0x07, 0x25-0x27) to 0x2f for all bytes */
467                 pci_write_config32_index_wait(ctrl->f2, 0x98, i, dword);
468                 pci_write_config32_index_wait(ctrl->f2, 0x98, i+0x20, dword);
469         }
470
471
472 }
473 #ifndef K8_REV_F_SUPPORT_F0_F1_WORKAROUND
474 #define K8_REV_F_SUPPORT_F0_F1_WORKAROUND 1
475 #endif
476
477 static unsigned TrainRcvrEn(const struct mem_controller *ctrl, unsigned Pass, struct sys_info *sysinfo)
478 {
479
480         static const uint32_t TestPattern0[] = {
481                         0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
482                         0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
483                         0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
484                         0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
485                 };
486         static const uint32_t TestPattern1[] = {
487                         0x55555555, 0x55555555, 0x55555555, 0x55555555,
488                         0x55555555, 0x55555555, 0x55555555, 0x55555555,
489                         0x55555555, 0x55555555, 0x55555555, 0x55555555,
490                         0x55555555, 0x55555555, 0x55555555, 0x55555555,
491                 };
492         static const uint32_t TestPattern2[] = {
493                         0x12345678, 0x87654321, 0x23456789, 0x98765432,
494                         0x59385824, 0x30496724, 0x24490795, 0x99938733,
495                         0x40385642, 0x38465245, 0x29432163, 0x05067894,
496                         0x12349045, 0x98723467, 0x12387634, 0x34587623,
497                 };
498
499         uint8_t pattern_buf_x[64 * 4 + 16]; // We need to two cache line So have more 16 bytes to keep 16 byte alignment */
500         uint8_t *buf_a, *buf_b;
501         uint32_t ecc_bit;
502         uint32_t dword;
503         uint8_t *dqs_rcvr_dly_a = &sysinfo->dqs_rcvr_dly_a[ctrl->node_id * 2* 8] ; //8 node, channel 2, receiver 8
504
505         int i;
506
507         unsigned channel, receiver;
508
509         unsigned Errors;
510         unsigned CTLRMaxDelay;
511         unsigned T1000;
512
513         unsigned LastTest;
514         unsigned CurrTest;
515         unsigned Test0, Test1;
516
517         unsigned RcvrEnDlyRmin;
518
519         unsigned two_ranks;
520         unsigned RcvrEnDly;
521
522         unsigned PatternA;
523         unsigned PatternB;
524
525         unsigned TestAddr0, TestAddr0B, TestAddr1 = 0, TestAddr1B = 0;
526
527         unsigned CurrRcvrCHADelay = 0;
528
529         unsigned tmp;
530
531         unsigned is_Width128 = sysinfo->meminfo[ctrl->node_id].is_Width128;
532
533 #if K8_REV_F_SUPPORT_F0_F1_WORKAROUND == 1
534         unsigned cpu_f0_f1;
535 #endif
536
537         if(Pass == DQS_FIRST_PASS) {
538                 InitDQSPos4RcvrEn(ctrl);
539         }
540
541         //enable SSE2
542         enable_sse2();
543
544         //wrap32dis
545         set_wrap32dis();
546
547         //disable ECC temp
548         dword = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
549         ecc_bit = dword & DCL_DimmEccEn;
550         dword &= ~(DCL_DimmEccEn);
551         pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dword);
552
553
554         if(Pass == DQS_FIRST_PASS) {
555 #if K8_REV_F_SUPPORT_F0_F1_WORKAROUND == 1
556         cpu_f0_f1 = is_cpu_pre_f2_in_bsp(ctrl->node_id);
557         if(!cpu_f0_f1)
558 #endif
559         {
560 #if 1
561                 /* Set the DqsRcvEnTrain bit */
562                 dword = pci_read_config32(ctrl->f2, DRAM_CTRL);
563                 dword |= DC_DqsRcvEnTrain;
564                 pci_write_config32(ctrl->f2, DRAM_CTRL, dword);
565 #endif
566         }
567         }
568
569         //get T1000 figures (cycle time (ns)) * 1K
570         dword = pci_read_config32(ctrl->f2, DRAM_CONFIG_HIGH);
571         dword &= DCH_MemClkFreq_MASK;
572
573         T1000 = get_exact_T1000(dword);
574
575         // SetupRcvrPattern
576         buf_a = (uint8_t *)(((uint32_t)(&pattern_buf_x[0]) + 0x10) & (0xfffffff0));
577         buf_b = buf_a + 128; //??
578         if(Pass==DQS_FIRST_PASS) {
579                 for(i=0;i<16;i++) {
580                         *((uint32_t *)(buf_a + i*4)) = TestPattern0[i];
581                         *((uint32_t *)(buf_b + i*4)) = TestPattern1[i];
582                 }
583         }
584         else {
585                 for(i=0;i<16;i++) {
586                         *((uint32_t *)(buf_a + i*4)) = TestPattern2[i];
587                         *((uint32_t *)(buf_b + i*4)) = TestPattern2[i];
588                 }
589         }
590
591         print_debug_dqs("\r\nTrainRcvEn: 0 ctrl", ctrl->node_id, 0);
592
593         print_debug_addr("TrainRcvEn: buf_a:", buf_a);
594
595         Errors = 0;
596         /* for each channel */
597         CTLRMaxDelay = 0;
598         channel = 0;
599
600         if (!(sysinfo->meminfo[ctrl->node_id].dimm_mask & 0x0F) &&
601              (sysinfo->meminfo[ctrl->node_id].dimm_mask & 0xF0)) { /* channelB only? */
602                 channel = 1;
603         }
604
605         for ( ; (channel < 2) && (!Errors); channel++)
606         { 
607                 print_debug_dqs("\tTrainRcvEn51: channel ",channel, 1); 
608                 
609                 /* for each rank */ 
610                 /* there are four recriver pairs, loosely associated with CS */ 
611                 for( receiver = 0; (receiver < 8) && (!Errors); receiver+=2) 
612                 {
613
614                         unsigned index=(receiver>>1) * 3 + 0x10;
615
616                         print_debug_dqs("\t\tTrainRcvEn52: index ", index, 2);
617
618                         if(is_Width128) {
619                                 if(channel) {
620                                         dword = pci_read_config32_index_wait(ctrl->f2, 0x98, index);
621                                         CurrRcvrCHADelay= dword & 0xff;
622                                 }
623                         }
624                         else {
625                                 if(channel) {
626                                         index += 0x20;
627                                 }
628                         }
629
630                         LastTest = DQS_FAIL;
631                         RcvrEnDlyRmin = 0xaf;
632
633                         if(!RcvrRankEnabled(ctrl, channel, receiver, is_Width128, sysinfo)) continue;
634
635                         /* for each DQS receiver enable setting */
636
637                         TestAddr0 = Get_RcvrSysAddr(ctrl, channel, receiver, sysinfo);
638
639                         TestAddr0B = TestAddr0 + (1<<(20+2-8)); // 4MB
640
641                         if(RcvrRankEnabled(ctrl, channel, receiver+1, is_Width128, sysinfo)) {
642                                 TestAddr1 = Get_RcvrSysAddr(ctrl, channel, receiver+1, sysinfo);
643                                 TestAddr1B = TestAddr1 + (1<<(20+2-8)); //4MB
644                                 two_ranks = 1;
645                         }
646                         else {
647                                 two_ranks = 0;
648                         }
649
650                         print_debug_dqs("\t\tTrainRcvEn53: TestAddr0B ", TestAddr0B, 2);
651
652                         Write1LTestPattern(TestAddr0, 0, buf_a, buf_b); // rank0 of dimm, test p0
653                         Write1LTestPattern(TestAddr0B, 1, buf_a, buf_b); //rank0 of dimm, test p1
654
655                         if(two_ranks == 1) {
656                                 Write1LTestPattern(TestAddr1, 0, buf_a, buf_b); //rank 1 of dimm
657                                 Write1LTestPattern(TestAddr1B, 1, buf_a, buf_b);//rank 1 of dimm
658                         }
659
660                         if(Pass == DQS_FIRST_PASS) {
661                                 RcvrEnDly = 0;
662                         } else {
663                                 RcvrEnDly = dqs_rcvr_dly_a[channel * 8 + receiver];
664                         }
665
666                         while ( RcvrEnDly < 0xaf) { // Sweep Delay value here
667                                 print_debug_dqs("\t\t\tTrainRcvEn541: RcvrEnDly ", RcvrEnDly, 3);
668
669                                 if(RcvrEnDly & 1) {
670                                         /* Odd steps get another pattern such that even
671                                            and odd steps alternate.
672                                            The pointers to the patterns will be swapped
673                                            at the end of the loop so they are correspond
674                                         */
675                                         PatternA = 1;
676                                         PatternB = 0;
677                                 }
678                                 else {
679                                         /* Even step */
680                                         PatternA = 0;
681                                         PatternB = 1;
682                                 }
683
684                                 /* Program current Receiver enable delay */
685                                 pci_write_config32_index_wait(ctrl->f2, 0x98, index, RcvrEnDly);
686                                 /* FIXME: 64bit MUX */
687
688                                 if(is_Width128) {
689                                         /* Program current Receiver enable delay chaannel b */
690                                         pci_write_config32_index_wait(ctrl->f2, 0x98, index+ 0x20, RcvrEnDly);
691                                 }
692
693                                 /* Program the MaxAsyncLat filed with the
694                                    current DQS receiver enable setting plus 6ns
695                                 */
696                                 /*Porgram MaxAsyncLat to correspond with current delay */
697                                 SetMaxAL_RcvrDly(ctrl, RcvrEnDly);
698
699                                 CurrTest = DQS_FAIL;
700
701                                 Read1LTestPattern(TestAddr0);  //Cache Fill
702                                 /* ROM vs cache compare */
703                                 Test0 = CompareTestPatternQW0(channel, TestAddr0, PatternA, TestPattern0, TestPattern1, TestPattern2, Pass, is_Width128);
704                                 proc_IOCLFLUSH(TestAddr0);
705
706                                 ResetDCTWrPtr(ctrl);
707
708                                 print_debug_dqs("\t\t\tTrainRcvEn542: Test0 ", Test0, 3);
709
710                                 if(Test0 == DQS_PASS) {
711
712                                         Read1LTestPattern(TestAddr0B);
713                                         Test1 = CompareTestPatternQW0(channel, TestAddr0B, PatternB, TestPattern0, TestPattern1, TestPattern2, Pass, is_Width128);
714                                         proc_IOCLFLUSH(TestAddr0B);
715
716                                         ResetDCTWrPtr(ctrl);
717
718                                         print_debug_dqs("\t\t\tTrainRcvEn543: Test1 ", Test1, 3);
719
720                                         if(Test1 == DQS_PASS) {
721                                                 if(two_ranks) {
722                                                         Read1LTestPattern(TestAddr1);
723                                                         Test0 = CompareTestPatternQW0(channel, TestAddr1, PatternA, TestPattern0, TestPattern1, TestPattern2, Pass, is_Width128);
724                                                         proc_IOCLFLUSH(TestAddr1);
725                                                         ResetDCTWrPtr(ctrl);
726
727                                                         if(Test0 == DQS_PASS) {
728                                                                 Read1LTestPattern(TestAddr1B);
729                                                                 Test1 = CompareTestPatternQW0(channel, TestAddr1B, PatternB, TestPattern0, TestPattern1, TestPattern2, Pass, is_Width128);
730                                                                 proc_IOCLFLUSH(TestAddr1B);
731                                                                 ResetDCTWrPtr(ctrl);
732
733                                                                 if(Test1 == DQS_PASS) {
734                                                                         CurrTest = DQS_PASS;
735                                                                 }
736                                                         }
737                                                         print_debug_dqs("\t\t\tTrainRcvEn544: Test0 ", Test0, 3);
738                                                 }
739                                                 else {
740                                                         CurrTest = DQS_PASS;
741                                                 }
742                                         }
743                                 }
744
745                                 print_debug_dqs("\t\t\tTrainRcvEn55: RcvrEnDly ", RcvrEnDly, 3);
746
747                                 if(CurrTest == DQS_PASS) {
748                                         if(LastTest == DQS_FAIL) {
749                                                 RcvrEnDlyRmin = RcvrEnDly;
750                                                 break;
751                                         }
752                                 }
753
754                                 LastTest = CurrTest;
755
756                                 /* swap the rank 0 pointers */
757                                 tmp = TestAddr0;
758                                 TestAddr0 = TestAddr0B;
759                                 TestAddr0B = tmp;
760
761                                 /* swap the rank 1 pointers */
762                                 tmp = TestAddr1;
763                                 TestAddr1 = TestAddr1B;
764                                 TestAddr1B = tmp;
765
766                                 print_debug_dqs("\t\t\tTrainRcvEn56: RcvrEnDly ", RcvrEnDly, 3);
767
768                                 RcvrEnDly++;
769
770                         } // while RcvrEnDly
771
772                         print_debug_dqs("\t\tTrainRcvEn61: RcvrEnDly ", RcvrEnDly, 2);
773
774                         if(RcvrEnDlyRmin == 0xaf) {
775                                 //no passing window
776                                 Errors |= SB_NORCVREN;
777                         }
778
779                         if(Pass == DQS_FIRST_PASS) {
780                                 // We need a better value for DQSPos trainning
781                                 RcvrEnDly = RcvrEnDlyRmin /* + RCVREN_MARGIN * T1000/64/50 */;
782                         } else {
783                                 RcvrEnDly = RcvrEnDlyRmin;
784                         }
785
786                         if(RcvrEnDly > 0xae) {
787                                 //passing window too narrow, too far delayed
788                                 Errors |= SB_SmallRCVR;
789                                 RcvrEnDly = 0xae;
790                         }
791
792                         if(Pass == DQS_SECOND_PASS) { //second pass must average vales
793                                 RcvrEnDly += dqs_rcvr_dly_a[channel * 8 + receiver] /* - (RCVREN_MARGIN * T1000/64/50)*/;
794                                 RcvrEnDly >>= 1;
795                         }
796
797                         dqs_rcvr_dly_a[channel * 8 + receiver] = RcvrEnDly;
798
799                         //Set final RcvrEnDly for this DIMM and Channel
800                         pci_write_config32_index_wait(ctrl->f2, 0x98, index, RcvrEnDly);
801
802                         if(is_Width128) {
803                                 pci_write_config32_index_wait(ctrl->f2, 0x98, index+0x20, RcvrEnDly); // channel B
804                                 if(channel) {
805                                         pci_write_config32_index_wait(ctrl->f2, 0x98, index, CurrRcvrCHADelay);
806                                         if(RcvrEnDly > CurrRcvrCHADelay) {
807                                                 dword = RcvrEnDly - CurrRcvrCHADelay;
808                                         }
809                                         else {
810                                                 dword = CurrRcvrCHADelay - RcvrEnDly;
811                                         }
812                                         dword *= 50;
813                                         if(dword > T1000) {
814                                                 Errors |= SB_CHA2BRCVREN;
815                                         }
816                                 }
817                         }
818
819                         print_debug_dqs("\t\tTrainRcvEn63: RcvrEnDly ", RcvrEnDly, 2);
820
821                         if(RcvrEnDly > CTLRMaxDelay) {
822                                 CTLRMaxDelay = RcvrEnDly;
823                         }
824
825                         print_debug_dqs("\t\tTrainRcvEn64: CTLRMaxDelay ", CTLRMaxDelay, 2);
826
827                 } /* receiver */
828         } /* channel */
829
830         print_debug_dqs("\tTrainRcvEn65: CTLRMaxDelay ", CTLRMaxDelay, 1);
831
832         /* Program the MaxAsysncLat field with the largest DQS Receiver Enable setting */
833         SetMaxAL_RcvrDly(ctrl, CTLRMaxDelay);
834         ResetDCTWrPtr(ctrl);
835
836         //Enable ECC again
837         dword = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
838         dword &= ~(DCL_DimmEccEn);
839         dword |= ecc_bit;
840         pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dword);
841
842         if(Pass == DQS_FIRST_PASS) {
843 #if K8_REV_F_SUPPORT_F0_F1_WORKAROUND == 1
844         if(!cpu_f0_f1)
845 #endif
846         {
847                 dword = pci_read_config32(ctrl->f2, DRAM_CTRL);
848                 dword &= ~DC_DqsRcvEnTrain;
849                 pci_write_config32(ctrl->f2, DRAM_CTRL, dword);
850         }
851         }
852
853         //Clear wrap32dis
854
855         clear_wrap32dis();
856
857         //restore SSE2 setting
858         disable_sse2();
859
860 #if CONFIG_MEM_TRAIN_SEQ != 1
861         /* We need tidy output for type 1 */
862         printk_debug(" CTLRMaxDelay=%02x\n", CTLRMaxDelay);
863 #endif
864
865         return (CTLRMaxDelay==0xae)?1:0;
866
867 }
868
869 #define DQS_READDIR 1
870 #define DQS_WRITEDIR 0
871
872
873 static void SetDQSDelayCSR(const struct mem_controller *ctrl, unsigned channel, unsigned bytelane, unsigned direction, unsigned dqs_delay)
874 { //ByteLane could be 0-8, last is for ECC
875         unsigned index;
876         uint32_t dword;
877         unsigned shift;
878
879         dqs_delay &= 0xff;
880
881         index = (bytelane>>2) + 1 + channel * 0x20 + (direction << 2);
882         shift = bytelane;
883         while(shift>3) {
884                 shift-=4;
885         }
886         shift <<= 3; // 8 bit
887
888         dword = pci_read_config32_index_wait(ctrl->f2, 0x98, index);
889         dword &= ~(0x3f<<shift);
890         dword |= (dqs_delay<<shift);
891         pci_write_config32_index_wait(ctrl->f2, 0x98, index, dword);
892
893 }
894
895 static void SetDQSDelayAllCSR(const struct mem_controller *ctrl, unsigned channel, unsigned direction, unsigned dqs_delay)
896 {
897         unsigned index;
898         uint32_t dword;
899         int i;
900
901         dword = 0;
902         dqs_delay &= 0xff;
903         for(i=0;i<4;i++) {
904                 dword |= dqs_delay<<(i*8);
905         }
906
907         index = 1 + channel * 0x20 + direction * 4;
908
909         for(i=0; i<2; i++) {
910                 pci_write_config32_index_wait(ctrl->f2, 0x98, index + i, dword);
911         }
912
913 }
914
915 static unsigned MiddleDQS(unsigned min_d, unsigned max_d)
916 {
917         unsigned size_d;
918         size_d = max_d-min_d;
919         if(size_d & 1) { //need round up
920                 min_d++;
921         }
922         return ( min_d + (size_d>>1));
923 }
924
925 static  inline void save_dqs_delay(unsigned channel, unsigned bytelane, unsigned direction, uint8_t *dqs_delay_a, uint8_t dqs_delay)
926 {
927         dqs_delay_a[channel * 2*9 + direction * 9 + bytelane] = dqs_delay;
928 }
929
930 static void WriteDQSTestPattern(unsigned addr_lo, unsigned pattern , uint8_t *buf_a)
931 {
932         WriteLNTestPattern(addr_lo, buf_a, (pattern+1) * 9);
933 }
934
935 static void ReadL18TestPattern(unsigned addr_lo)
936 {
937         //set fs and use fs prefix to access the mem
938         __asm__ volatile (
939                 "movl %%fs:-128(%%esi), %%eax\n\t"  //TestAddr cache line
940                 "movl %%fs:-64(%%esi), %%eax\n\t"   //+1
941                 "movl %%fs:(%%esi), %%eax\n\t"  //+2
942                 "movl %%fs:64(%%esi), %%eax\n\t"   //+3
943
944                 "movl %%fs:-128(%%edi), %%eax\n\t"      //+4
945                 "movl %%fs:-64(%%edi), %%eax\n\t"       //+5
946                 "movl %%fs:(%%edi), %%eax\n\t"  //+6
947                 "movl %%fs:64(%%edi), %%eax\n\t"        //+7
948
949                 "movl %%fs:-128(%%ebx), %%eax\n\t"  //+8
950                 "movl %%fs:-64(%%ebx), %%eax\n\t"       //+9
951                 "movl %%fs:(%%ebx), %%eax\n\t"  //+10
952                 "movl %%fs:64(%%ebx), %%eax\n\t"        //+11
953
954                 "movl %%fs:-128(%%ecx), %%eax\n\t"      //+12
955                 "movl %%fs:-64(%%ecx), %%eax\n\t"       //+13
956                 "movl %%fs:(%%ecx), %%eax\n\t"  //+14
957                 "movl %%fs:64(%%ecx), %%eax\n\t"        //+15
958
959                 "movl %%fs:-128(%%edx), %%eax\n\t"      //+16
960                 "movl %%fs:-64(%%edx), %%eax\n\t"       //+17
961
962                 :: "a"(0), "b" (addr_lo+128+8*64), "c" (addr_lo+128+12*64), "d" (addr_lo +128+16*64), "S"(addr_lo+128), "D"(addr_lo+128+4*64)
963         );
964
965 }
966
967 static void ReadL9TestPattern(unsigned addr_lo)
968 {
969
970         //set fs and use fs prefix to access the mem
971         __asm__ volatile (
972
973                 "movl %%fs:-128(%%ecx), %%eax\n\t"  //TestAddr cache line
974                 "movl %%fs:-64(%%ecx), %%eax\n\t"   //+1
975                 "movl %%fs:(%%ecx), %%eax\n\t"      //+2
976                 "movl %%fs:64(%%ecx), %%eax\n\t"   //+3
977
978                 "movl %%fs:-128(%%edx), %%eax\n\t"  //+4
979                 "movl %%fs:-64(%%edx), %%eax\n\t"   //+5
980                 "movl %%fs:(%%edx), %%eax\n\t"      //+6
981                 "movl %%fs:64(%%edx), %%eax\n\t"   //+7
982
983                 "movl %%fs:-128(%%ebx), %%eax\n\t"      //+8
984
985                 :: "a"(0), "b" (addr_lo+128+8*64), "c"(addr_lo+128), "d"(addr_lo+128+4*64)
986         );
987
988 }
989
990
991 static void ReadDQSTestPattern(unsigned addr_lo, unsigned pattern)
992 {
993         if(pattern == 0) {
994                 ReadL9TestPattern(addr_lo);
995         }
996         else {
997                 ReadL18TestPattern(addr_lo);
998         }
999 }
1000
1001 static void FlushDQSTestPattern_L9(unsigned addr_lo)
1002 {
1003         __asm__ volatile (
1004                 "clflush %%fs:-128(%%ecx)\n\t"
1005                 "clflush %%fs:-64(%%ecx)\n\t"
1006                 "clflush %%fs:(%%ecx)\n\t"
1007                 "clflush %%fs:64(%%ecx)\n\t"
1008
1009                 "clflush %%fs:-128(%%eax)\n\t"
1010                 "clflush %%fs:-64(%%eax)\n\t"
1011                 "clflush %%fs:(%%eax)\n\t"
1012                 "clflush %%fs:64(%%eax)\n\t"
1013
1014                 "clflush %%fs:-128(%%ebx)\n\t"
1015
1016                 ::  "b" (addr_lo+128+8*64), "c"(addr_lo+128), "a"(addr_lo+128+4*64)
1017         );
1018
1019 }
1020 static __attribute__((noinline)) void FlushDQSTestPattern_L18(unsigned addr_lo)
1021 {
1022        __asm__ volatile (
1023                 "clflush %%fs:-128(%%eax)\n\t"
1024                 "clflush %%fs:-64(%%eax)\n\t"
1025                 "clflush %%fs:(%%eax)\n\t"
1026                 "clflush %%fs:64(%%eax)\n\t"
1027
1028                 "clflush %%fs:-128(%%edi)\n\t"
1029                 "clflush %%fs:-64(%%edi)\n\t"
1030                 "clflush %%fs:(%%edi)\n\t"
1031                 "clflush %%fs:64(%%edi)\n\t"
1032
1033                 "clflush %%fs:-128(%%ebx)\n\t"
1034                 "clflush %%fs:-64(%%ebx)\n\t"
1035                 "clflush %%fs:(%%ebx)\n\t"
1036                 "clflush %%fs:64(%%ebx)\n\t"
1037
1038                 "clflush %%fs:-128(%%ecx)\n\t"
1039                 "clflush %%fs:-64(%%ecx)\n\t"
1040                 "clflush %%fs:(%%ecx)\n\t"
1041                 "clflush %%fs:64(%%ecx)\n\t"
1042
1043                 "clflush %%fs:-128(%%edx)\n\t"
1044                 "clflush %%fs:-64(%%edx)\n\t"
1045
1046                 :: "b" (addr_lo+128+8*64), "c" (addr_lo+128+12*64), "d" (addr_lo +128+16*64), "a"(addr_lo+128), "D"(addr_lo+128+4*64)
1047         );
1048 }
1049
1050 static void FlushDQSTestPattern(unsigned addr_lo, unsigned pattern )
1051 {
1052
1053         if(pattern == 0){
1054                 FlushDQSTestPattern_L9(addr_lo);
1055         }
1056         else {
1057                 FlushDQSTestPattern_L18(addr_lo);
1058         }
1059 }
1060
1061 static unsigned CompareDQSTestPattern(unsigned channel, unsigned addr_lo, unsigned pattern, uint8_t *buf_a)
1062 {
1063         uint32_t *test_buf;
1064         unsigned bitmap = 0xff;
1065         unsigned bytelane;
1066         int i;
1067         uint32_t value;
1068         int j;
1069         uint32_t value_test;
1070
1071         test_buf = (uint32_t *)buf_a;
1072
1073
1074         if(pattern && channel) {
1075                 addr_lo += 8; //second channel
1076                 test_buf+= 2;
1077         }
1078
1079         bytelane = 0;
1080         for(i=0;i<9*64/4;i++) {
1081                 __asm__ volatile (
1082                         "movl %%fs:(%1), %0\n\t"
1083                         :"=b"(value): "a" (addr_lo)
1084                 );
1085                 value_test = *test_buf;
1086
1087                 print_debug_dqs_pair("\t\t\t\t\t\ttest_buf= ", (unsigned)test_buf, " value = ", value_test, 7);
1088                 print_debug_dqs_pair("\t\t\t\t\t\ttaddr_lo = ",addr_lo, " value = ", value, 7);
1089
1090                 for(j=0;j<4*8;j+=8) {
1091                         if(((value>>j)&0xff) != ((value_test>>j)& 0xff)) {
1092                                 bitmap &= ~(1<<bytelane);
1093                         }
1094
1095                         bytelane++;
1096                         bytelane &= 0x7;
1097                 }
1098                 print_debug_dqs("\t\t\t\t\t\tbitmap = ", bitmap, 7);
1099
1100                 if(bytelane == 0) {
1101                         if(pattern == 1) { //dual channel
1102                                 addr_lo += 8; //skip over other channel's data
1103                                 test_buf += 2;
1104                         }
1105                 }
1106                 addr_lo += 4;
1107                 test_buf +=1;
1108
1109         }
1110
1111
1112         return bitmap;
1113
1114 }
1115
1116 static unsigned TrainDQSPos(const struct mem_controller *ctrl, unsigned channel, unsigned Direction, unsigned Pattern, uint8_t *buf_a, uint8_t *dqs_delay_a, struct sys_info *sysinfo)
1117 {
1118         unsigned ByteLane;
1119         unsigned Errors;
1120         unsigned BanksPresent;
1121
1122         unsigned MutualCSPassW[48];
1123
1124         unsigned ChipSel;
1125         unsigned DQSDelay;
1126
1127         unsigned TestAddr;
1128
1129         unsigned LastTest;
1130         unsigned RnkDlyFilterMax, RnkDlyFilterMin = 0;
1131         unsigned RnkDlySeqPassMax, RnkDlySeqPassMin = 0;
1132
1133         Errors = 0;
1134         BanksPresent = 0;
1135
1136         print_debug_dqs("\t\t\tTrainDQSPos begin ", 0, 3);
1137
1138         printk_debug("TrainDQSPos: MutualCSPassW[48] :%p\n", MutualCSPassW);
1139
1140         for(DQSDelay=0; DQSDelay<48; DQSDelay++) {
1141                 MutualCSPassW[DQSDelay] = 0xff; // Bitmapped status per delay setting, 0xff=All positions passing (1= PASS)
1142         }
1143
1144         for(ChipSel = 0; ChipSel < 8; ChipSel++) { //logical register chipselects 0..7
1145                 print_debug_dqs("\t\t\t\tTrainDQSPos: 11 ChipSel ", ChipSel, 4);
1146                 //FIXME: process 64MUXedMode
1147                 if(!ChipSelPresent(ctrl, ChipSel, sysinfo)) continue;
1148                 BanksPresent  = 1;
1149
1150                 TestAddr = Get_MCTSysAddr(ctrl, ChipSel, sysinfo);
1151
1152                 print_debug_dqs("\t\t\t\tTrainDQSPos: 12 TestAddr ", TestAddr, 4);
1153
1154                 //set fs and use fs prefix to access the mem
1155                 set_FSBASE(TestAddr>>24);
1156
1157                 if(Direction == DQS_READDIR) {
1158                         print_debug_dqs("\t\t\t\tTrainDQSPos: 13 for read so write at first", 0, 4);
1159                         WriteDQSTestPattern(TestAddr<<8, Pattern, buf_a);
1160                 }
1161
1162                 for(DQSDelay = 0; DQSDelay < 48; DQSDelay++ ){
1163                         print_debug_dqs("\t\t\t\t\tTrainDQSPos: 141 DQSDelay ", DQSDelay, 5);
1164                         if(MutualCSPassW[DQSDelay] == 0) continue; //skip current delay value if other chipselects have failed all 8 bytelanes
1165                         SetDQSDelayAllCSR(ctrl, channel, Direction, DQSDelay);
1166                         print_debug_dqs("\t\t\t\t\tTrainDQSPos: 142 MutualCSPassW ", MutualCSPassW[DQSDelay], 5);
1167                         if(Direction == DQS_WRITEDIR) {
1168                                 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 143 for write", 0, 5);
1169                                 WriteDQSTestPattern(TestAddr<<8, Pattern, buf_a);
1170                         }
1171                         print_debug_dqs("\t\t\t\t\tTrainDQSPos: 144 Pattern ", Pattern, 5);
1172                         ReadDQSTestPattern(TestAddr<<8, Pattern);
1173                         print_debug_dqs("\t\t\t\t\tTrainDQSPos: 145 MutualCSPassW ", MutualCSPassW[DQSDelay], 5);
1174                         MutualCSPassW[DQSDelay] &= CompareDQSTestPattern(channel, TestAddr<<8, Pattern, buf_a); //0: fail, 1=pass
1175                         print_debug_dqs("\t\t\t\t\tTrainDQSPos: 146 MutualCSPassW ", MutualCSPassW[DQSDelay], 5);
1176                         SetTargetWTIO(TestAddr);
1177                         FlushDQSTestPattern(TestAddr<<8, Pattern);
1178                         ResetTargetWTIO();
1179                 }
1180         }
1181
1182         if(BanksPresent)
1183         for(ByteLane = 0; ByteLane < 8; ByteLane++) {
1184                 print_debug_dqs("\t\t\t\tTrainDQSPos: 31 ByteLane ",ByteLane, 4);
1185
1186                 LastTest = DQS_FAIL;
1187                 RnkDlySeqPassMax = 0;
1188                 RnkDlyFilterMax = 0;
1189                 RnkDlyFilterMin = 0;
1190                 for(DQSDelay=0; DQSDelay<48; DQSDelay++) {
1191                         if(MutualCSPassW[DQSDelay] & (1<<ByteLane)) {
1192
1193                                 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 321 DQSDelay ", DQSDelay, 5);
1194                                 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 322 MutualCSPassW ", MutualCSPassW[DQSDelay], 5);
1195
1196                                 RnkDlySeqPassMax = DQSDelay;
1197                                 if(LastTest == DQS_FAIL) {
1198                                         RnkDlySeqPassMin = DQSDelay; //start sequential run
1199                                 }
1200                                 if((RnkDlySeqPassMax - RnkDlySeqPassMin)>(RnkDlyFilterMax-RnkDlyFilterMin)){
1201                                         RnkDlyFilterMin = RnkDlySeqPassMin;
1202                                         RnkDlyFilterMax = RnkDlySeqPassMax;
1203                                 }
1204                                 LastTest = DQS_PASS;
1205                         }
1206                         else {
1207                                 LastTest = DQS_FAIL;
1208                         }
1209                 }
1210                 print_debug_dqs("\t\t\t\tTrainDQSPos: 33 RnkDlySeqPassMax ", RnkDlySeqPassMax, 4);
1211
1212                 if(RnkDlySeqPassMax == 0) {
1213                         Errors |= SB_NODQSPOS; // no passing window
1214                 }
1215                 else {
1216                         print_debug_dqs("\t\t\t\tTrainDQSPos: 34 RnkDlyFilterMax ", RnkDlyFilterMax, 4);
1217                         print_debug_dqs("\t\t\t\tTrainDQSPos: 34 RnkDlyFilterMin ", RnkDlyFilterMin, 4);
1218                         if((RnkDlyFilterMax - RnkDlyFilterMin)< MIN_DQS_WNDW){
1219                                 Errors |= SB_SMALLDQS;
1220                         }
1221                         else {
1222                                 unsigned middle_dqs;
1223                                 middle_dqs = MiddleDQS(RnkDlyFilterMin, RnkDlyFilterMax);
1224                                 print_debug_dqs("\t\t\t\tTrainDQSPos: 35 middle_dqs ",middle_dqs, 4);
1225                                 SetDQSDelayCSR(ctrl, channel, ByteLane, Direction, middle_dqs);
1226                                 save_dqs_delay(channel, ByteLane, Direction, dqs_delay_a, middle_dqs);
1227                         }
1228                 }
1229
1230         }
1231
1232         print_debug_dqs("\t\t\tTrainDQSPos: end", 0xff, 3);
1233
1234         return Errors;
1235
1236
1237 }
1238
1239 static unsigned TrainReadDQS(const struct mem_controller *ctrl, unsigned channel, unsigned pattern, uint8_t *buf_a, uint8_t *dqs_delay_a, struct sys_info *sysinfo)
1240 {
1241         print_debug_dqs("\t\tTrainReadPos", 0, 2);
1242         return TrainDQSPos(ctrl, channel, DQS_READDIR, pattern, buf_a, dqs_delay_a, sysinfo);
1243 }
1244
1245 static unsigned TrainWriteDQS(const struct mem_controller *ctrl, unsigned channel, unsigned pattern, uint8_t *buf_a, uint8_t *dqs_delay_a, struct sys_info *sysinfo)
1246 {
1247         print_debug_dqs("\t\tTrainWritePos", 0, 2);
1248         return TrainDQSPos(ctrl, channel, DQS_WRITEDIR, pattern, buf_a, dqs_delay_a, sysinfo);
1249 }
1250
1251
1252
1253 static unsigned TrainDQSRdWrPos(const struct mem_controller *ctrl, struct sys_info *sysinfo)
1254 {
1255         static const uint32_t TestPatternJD1a[] = {
1256                                         0x00000000,0x00000000,0xFFFFFFFF,0xFFFFFFFF, // QW0-1, ALL-EVEN
1257                                         0x00000000,0x00000000,0x00000000,0x00000000, // QW2-3, ALL-EVEN
1258                                         0x00000000,0x00000000,0xFFFFFFFF,0xFFFFFFFF, // QW4-5, ALL-EVEN
1259                                         0x00000000,0x00000000,0x00000000,0x00000000, // QW6-7, ALL-EVEN
1260                                         0xFeFeFeFe,0xFeFeFeFe,0x01010101,0x01010101, // QW0-1, DQ0-ODD
1261                                         0xFeFeFeFe,0xFeFeFeFe,0x01010101,0x01010101, // QW2-3, DQ0-ODD
1262                                         0x01010101,0x01010101,0xFeFeFeFe,0xFeFeFeFe, // QW4-5, DQ0-ODD
1263                                         0xFeFeFeFe,0xFeFeFeFe,0x01010101,0x01010101, // QW6-7, DQ0-ODD
1264                                         0x02020202,0x02020202,0x02020202,0x02020202, // QW0-1, DQ1-ODD
1265                                         0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, // QW2-3, DQ1-ODD
1266                                         0xFdFdFdFd,0xFdFdFdFd,0x02020202,0x02020202, // QW4-5, DQ1-ODD
1267                                         0x02020202,0x02020202,0x02020202,0x02020202, // QW6-7, DQ1-ODD
1268                                         0x04040404,0x04040404,0xfBfBfBfB,0xfBfBfBfB, // QW0-1, DQ2-ODD
1269                                         0x04040404,0x04040404,0x04040404,0x04040404, // QW2-3, DQ2-ODD
1270                                         0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW4-5, DQ2-ODD
1271                                         0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW6-7, DQ2-ODD
1272                                         0x08080808,0x08080808,0xF7F7F7F7,0xF7F7F7F7, // QW0-1, DQ3-ODD
1273                                         0x08080808,0x08080808,0x08080808,0x08080808, // QW2-3, DQ3-ODD
1274                                         0xF7F7F7F7,0xF7F7F7F7,0x08080808,0x08080808, // QW4-5, DQ3-ODD
1275                                         0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, // QW6-7, DQ3-ODD
1276                                         0x10101010,0x10101010,0x10101010,0x10101010, // QW0-1, DQ4-ODD
1277                                         0xeFeFeFeF,0xeFeFeFeF,0x10101010,0x10101010, // QW2-3, DQ4-ODD
1278                                         0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, // QW4-5, DQ4-ODD
1279                                         0xeFeFeFeF,0xeFeFeFeF,0x10101010,0x10101010, // QW6-7, DQ4-ODD
1280                                         0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW0-1, DQ5-ODD
1281                                         0xdFdFdFdF,0xdFdFdFdF,0x20202020,0x20202020, // QW2-3, DQ5-ODD
1282                                         0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW4-5, DQ5-ODD
1283                                         0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW6-7, DQ5-ODD
1284                                         0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW0-1, DQ6-ODD
1285                                         0x40404040,0x40404040,0xBfBfBfBf,0xBfBfBfBf, // QW2-3, DQ6-ODD
1286                                         0x40404040,0x40404040,0xBfBfBfBf,0xBfBfBfBf, // QW4-5, DQ6-ODD
1287                                         0x40404040,0x40404040,0xBfBfBfBf,0xBfBfBfBf, // QW6-7, DQ6-ODD
1288                                         0x80808080,0x80808080,0x7F7F7F7F,0x7F7F7F7F, // QW0-1, DQ7-ODD
1289                                         0x80808080,0x80808080,0x7F7F7F7F,0x7F7F7F7F, // QW2-3, DQ7-ODD
1290                                         0x80808080,0x80808080,0x7F7F7F7F,0x7F7F7F7F, // QW4-5, DQ7-ODD
1291                                         0x80808080,0x80808080,0x80808080,0x80808080  // QW6-7, DQ7-ODD
1292                 };
1293         static const uint32_t TestPatternJD1b[] = {
1294                                         0x00000000,0x00000000,0x00000000,0x00000000, // QW0,CHA-B, ALL-EVEN
1295                                         0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, // QW1,CHA-B, ALL-EVEN
1296                                         0x00000000,0x00000000,0x00000000,0x00000000, // QW2,CHA-B, ALL-EVEN
1297                                         0x00000000,0x00000000,0x00000000,0x00000000, // QW3,CHA-B, ALL-EVEN
1298                                         0x00000000,0x00000000,0x00000000,0x00000000, // QW4,CHA-B, ALL-EVEN
1299                                         0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, // QW5,CHA-B, ALL-EVEN
1300                                         0x00000000,0x00000000,0x00000000,0x00000000, // QW6,CHA-B, ALL-EVEN
1301                                         0x00000000,0x00000000,0x00000000,0x00000000, // QW7,CHA-B, ALL-EVEN
1302                                         0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, // QW0,CHA-B, DQ0-ODD
1303                                         0x01010101,0x01010101,0x01010101,0x01010101, // QW1,CHA-B, DQ0-ODD
1304                                         0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, // QW2,CHA-B, DQ0-ODD
1305                                         0x01010101,0x01010101,0x01010101,0x01010101, // QW3,CHA-B, DQ0-ODD
1306                                         0x01010101,0x01010101,0x01010101,0x01010101, // QW4,CHA-B, DQ0-ODD
1307                                         0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, // QW5,CHA-B, DQ0-ODD
1308                                         0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, // QW6,CHA-B, DQ0-ODD
1309                                         0x01010101,0x01010101,0x01010101,0x01010101, // QW7,CHA-B, DQ0-ODD
1310                                         0x02020202,0x02020202,0x02020202,0x02020202, // QW0,CHA-B, DQ1-ODD
1311                                         0x02020202,0x02020202,0x02020202,0x02020202, // QW1,CHA-B, DQ1-ODD
1312                                         0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, // QW2,CHA-B, DQ1-ODD
1313                                         0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, // QW3,CHA-B, DQ1-ODD
1314                                         0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, // QW4,CHA-B, DQ1-ODD
1315                                         0x02020202,0x02020202,0x02020202,0x02020202, // QW5,CHA-B, DQ1-ODD
1316                                         0x02020202,0x02020202,0x02020202,0x02020202, // QW6,CHA-B, DQ1-ODD
1317                                         0x02020202,0x02020202,0x02020202,0x02020202, // QW7,CHA-B, DQ1-ODD
1318                                         0x04040404,0x04040404,0x04040404,0x04040404, // QW0,CHA-B, DQ2-ODD
1319                                         0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW1,CHA-B, DQ2-ODD
1320                                         0x04040404,0x04040404,0x04040404,0x04040404, // QW2,CHA-B, DQ2-ODD
1321                                         0x04040404,0x04040404,0x04040404,0x04040404, // QW3,CHA-B, DQ2-ODD
1322                                         0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW4,CHA-B, DQ2-ODD
1323                                         0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW5,CHA-B, DQ2-ODD
1324                                         0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW6,CHA-B, DQ2-ODD
1325                                         0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW7,CHA-B, DQ2-ODD
1326                                         0x08080808,0x08080808,0x08080808,0x08080808, // QW0,CHA-B, DQ3-ODD
1327                                         0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, // QW1,CHA-B, DQ3-ODD
1328                                         0x08080808,0x08080808,0x08080808,0x08080808, // QW2,CHA-B, DQ3-ODD
1329                                         0x08080808,0x08080808,0x08080808,0x08080808, // QW3,CHA-B, DQ3-ODD
1330                                         0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, // QW4,CHA-B, DQ3-ODD
1331                                         0x08080808,0x08080808,0x08080808,0x08080808, // QW5,CHA-B, DQ3-ODD
1332                                         0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, // QW6,CHA-B, DQ3-ODD
1333                                         0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, // QW7,CHA-B, DQ3-ODD
1334                                         0x10101010,0x10101010,0x10101010,0x10101010, // QW0,CHA-B, DQ4-ODD
1335                                         0x10101010,0x10101010,0x10101010,0x10101010, // QW1,CHA-B, DQ4-ODD
1336                                         0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, // QW2,CHA-B, DQ4-ODD
1337                                         0x10101010,0x10101010,0x10101010,0x10101010, // QW3,CHA-B, DQ4-ODD
1338                                         0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, // QW4,CHA-B, DQ4-ODD
1339                                         0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, // QW5,CHA-B, DQ4-ODD
1340                                         0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, // QW6,CHA-B, DQ4-ODD
1341                                         0x10101010,0x10101010,0x10101010,0x10101010, // QW7,CHA-B, DQ4-ODD
1342                                         0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW0,CHA-B, DQ5-ODD
1343                                         0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW1,CHA-B, DQ5-ODD
1344                                         0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW2,CHA-B, DQ5-ODD
1345                                         0x20202020,0x20202020,0x20202020,0x20202020, // QW3,CHA-B, DQ5-ODD
1346                                         0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW4,CHA-B, DQ5-ODD
1347                                         0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW5,CHA-B, DQ5-ODD
1348                                         0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW6,CHA-B, DQ5-ODD
1349                                         0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW7,CHA-B, DQ5-ODD
1350                                         0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW0,CHA-B, DQ6-ODD
1351                                         0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW1,CHA-B, DQ6-ODD
1352                                         0x40404040,0x40404040,0x40404040,0x40404040, // QW2,CHA-B, DQ6-ODD
1353                                         0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW3,CHA-B, DQ6-ODD
1354                                         0x40404040,0x40404040,0x40404040,0x40404040, // QW4,CHA-B, DQ6-ODD
1355                                         0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW5,CHA-B, DQ6-ODD
1356                                         0x40404040,0x40404040,0x40404040,0x40404040, // QW6,CHA-B, DQ6-ODD
1357                                         0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW7,CHA-B, DQ6-ODD
1358                                         0x80808080,0x80808080,0x80808080,0x80808080, // QW0,CHA-B, DQ7-ODD
1359                                         0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F, // QW1,CHA-B, DQ7-ODD
1360                                         0x80808080,0x80808080,0x80808080,0x80808080, // QW2,CHA-B, DQ7-ODD
1361                                         0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F, // QW3,CHA-B, DQ7-ODD
1362                                         0x80808080,0x80808080,0x80808080,0x80808080, // QW4,CHA-B, DQ7-ODD
1363                                         0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F, // QW5,CHA-B, DQ7-ODD
1364                                         0x80808080,0x80808080,0x80808080,0x80808080, // QW6,CHA-B, DQ7-ODD
1365                                         0x80808080,0x80808080,0x80808080,0x80808080  // QW7,CHA-B, DQ7-ODD
1366                 };
1367         uint8_t pattern_buf_x[64 * 18 + 16]; // We need to two cache line So have more 16 bytes to keep 16 byte alignment */
1368         uint8_t *buf_a;
1369
1370         unsigned pattern;
1371         uint32_t dword;
1372         uint32_t ecc_bit;
1373         unsigned Errors;
1374         unsigned channel;
1375         int i;
1376         unsigned DQSWrDelay;
1377         unsigned is_Width128 = sysinfo->meminfo[ctrl->node_id].is_Width128;
1378         uint8_t *dqs_delay_a = &sysinfo->dqs_delay_a[ctrl->node_id * 2*2*9]; //channel 2, direction 2 , bytelane *9
1379
1380         //enable SSE2
1381         enable_sse2();
1382
1383         //wrap32dis
1384         set_wrap32dis();
1385
1386         //disable ECC temp
1387         dword = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
1388         ecc_bit = dword & DCL_DimmEccEn;
1389         dword &= ~(DCL_DimmEccEn);
1390         pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dword);
1391
1392         //SetupDqsPattern
1393         buf_a = (uint8_t *)(((uint32_t)(&pattern_buf_x[0]) + 0x10) & (~0xf));
1394
1395         if(is_Width128){
1396                 pattern = 1;
1397                 for(i=0;i<16*18;i++) {
1398                         *((uint32_t *)(buf_a + i*4)) = TestPatternJD1b[i];
1399                  }
1400         }
1401         else {
1402                 pattern = 0;
1403                 for(i=0; i<16*9;i++) {
1404                         *((uint32_t *)(buf_a + i*4)) = TestPatternJD1a[i];
1405                 }
1406
1407         }
1408
1409         print_debug_dqs("\r\nTrainDQSRdWrPos: 0 ctrl ", ctrl->node_id, 0);
1410
1411         printk_debug("TrainDQSRdWrPos: buf_a:%p\n", buf_a);
1412
1413         Errors = 0;
1414         channel = 0;
1415
1416         if (!(sysinfo->meminfo[ctrl->node_id].dimm_mask & 0x0F) &&
1417              (sysinfo->meminfo[ctrl->node_id].dimm_mask & 0xF0)) { /* channelB only? */
1418                 channel = 1;
1419         }
1420
1421         while( (channel<2) && (!Errors)) {
1422                 print_debug_dqs("\tTrainDQSRdWrPos: 1 channel ",channel, 1);
1423                 for(DQSWrDelay = 0; DQSWrDelay < 48; DQSWrDelay++) {
1424                         unsigned err;
1425                         SetDQSDelayAllCSR(ctrl, channel, DQS_WRITEDIR, DQSWrDelay);
1426                         print_debug_dqs("\t\tTrainDQSRdWrPos: 21 DQSWrDelay ", DQSWrDelay, 2);
1427                         err= TrainReadDQS(ctrl, channel, pattern, buf_a, dqs_delay_a, sysinfo);
1428                         print_debug_dqs("\t\tTrainDQSRdWrPos: 22 err ",err, 2);
1429                         if(err == 0) break;
1430                         Errors |= err;
1431                 }
1432
1433                 print_debug_dqs("\tTrainDQSRdWrPos: 3 DQSWrDelay ", DQSWrDelay, 1);
1434
1435                 if(DQSWrDelay < 48) {
1436                         Errors = TrainWriteDQS(ctrl, channel, pattern, buf_a, dqs_delay_a, sysinfo);
1437                         print_debug_dqs("\tTrainDQSRdWrPos: 4 Errors ", Errors, 1);
1438
1439                 }
1440                 channel++;
1441                 if(!is_Width128){
1442                         //FIXME: 64MuxMode??
1443                         channel++; // skip channel if 64-bit mode
1444                 }
1445         }
1446
1447         //Enable ECC again
1448         dword = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
1449         dword &= ~(DCL_DimmEccEn);
1450         dword |= ecc_bit;
1451         pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dword);
1452
1453         //Clear wrap32dis
1454
1455         clear_wrap32dis();
1456
1457         //restore SSE2 setting
1458         disable_sse2();
1459
1460         print_debug_dqs("TrainDQSRdWrPos: ", 5, 0);
1461
1462         return Errors;
1463
1464 }
1465 static inline uint8_t get_dqs_delay(unsigned channel, unsigned bytelane, unsigned direction, uint8_t *dqs_delay_a)
1466 {
1467         return dqs_delay_a[channel * 2*9 + direction * 9 + bytelane];
1468 }
1469
1470 static unsigned CalcEccDQSPos(unsigned channel,unsigned ByteLane0, unsigned ByteLane1, unsigned InterFactor, unsigned Direction, uint8_t *dqs_delay_a)
1471 /* InterFactor: 0: 100% ByteLane 0
1472                 0x80: 50% between ByteLane 0 and 1
1473                 0xff: 99.6% ByteLane 1 and 0.4% like 0
1474 */
1475 {
1476         unsigned DQSDelay0, DQSDelay1;
1477         unsigned DQSDelay;
1478
1479         DQSDelay0 = get_dqs_delay(channel, ByteLane0, Direction, dqs_delay_a);
1480         DQSDelay1 = get_dqs_delay(channel, ByteLane1, Direction, dqs_delay_a);
1481
1482         if(DQSDelay0>DQSDelay1) {
1483                 DQSDelay = DQSDelay0 - DQSDelay1;
1484                 InterFactor = 0xff - InterFactor;
1485         }
1486         else {
1487                 DQSDelay = DQSDelay1 - DQSDelay0;
1488         }
1489
1490         DQSDelay *= InterFactor;
1491
1492         DQSDelay >>= 8; // /255
1493
1494         if(DQSDelay0>DQSDelay1) {
1495                 DQSDelay += DQSDelay1;
1496         }
1497         else {
1498                 DQSDelay += DQSDelay0;
1499         }
1500
1501         return DQSDelay;
1502
1503 }
1504
1505 static void SetEccDQSRdWrPos(const struct mem_controller *ctrl, struct sys_info *sysinfo)
1506 {
1507         unsigned channel;
1508         unsigned ByteLane;
1509         unsigned Direction;
1510         unsigned lane0, lane1, ratio;
1511         unsigned dqs_delay;
1512
1513         unsigned direction[] = { DQS_READDIR, DQS_WRITEDIR };
1514         int i;
1515         uint8_t *dqs_delay_a = &sysinfo->dqs_delay_a[ctrl->node_id * 2*2*9]; //channel 2, direction 2 , bytelane *9
1516
1517         ByteLane = 8;
1518
1519         for(channel = 0; channel < 2; channel++) {
1520                 for(i=0;i<2;i++) {
1521                         Direction = direction[i];
1522                         lane0 = 4; lane1 = 5; ratio = 0;
1523                         dqs_delay = CalcEccDQSPos(channel, lane0, lane1, ratio, Direction, dqs_delay_a);
1524                         print_debug_dqs_pair("\t\tSetEccDQSRdWrPos: channel ", channel, Direction==DQS_READDIR? " R dqs_delay":" W dqs_delay",  dqs_delay, 2);
1525                         SetDQSDelayCSR(ctrl, channel, ByteLane, Direction, dqs_delay);
1526                         save_dqs_delay(channel, ByteLane, Direction, dqs_delay_a, dqs_delay);
1527                 }
1528         }
1529 }
1530
1531 static unsigned train_DqsRcvrEn(const struct mem_controller *ctrl, unsigned Pass, struct sys_info *sysinfo)
1532 {
1533         print_debug_dqs("\r\ntrain_DqsRcvrEn: begin ctrl ", ctrl->node_id, 0);
1534         if(TrainRcvrEn(ctrl, Pass, sysinfo)) {
1535                 return 1;
1536         }
1537         print_debug_dqs("\r\ntrain_DqsRcvrEn: end ctrl ", ctrl->node_id, 0);
1538         return 0;
1539
1540 }
1541 static unsigned train_DqsPos(const struct mem_controller *ctrl, struct sys_info *sysinfo)
1542 {
1543         print_debug_dqs("\r\ntrain_DqsPos: begin ctrl ", ctrl->node_id, 0);
1544         if(TrainDQSRdWrPos(ctrl, sysinfo) != 0) {
1545                 printk_err("\r\nDQS Training Rd Wr failed ctrl%02x\r\n", ctrl->node_id);
1546                 return 1;
1547         }
1548         else {
1549                 SetEccDQSRdWrPos(ctrl, sysinfo);
1550         }
1551         print_debug_dqs("\r\ntrain_DqsPos: end ctrl ", ctrl->node_id, 0);
1552         return 0;
1553
1554 }
1555
1556 #if K8_REV_F_SUPPORT_F0_F1_WORKAROUND == 1
1557 static void f0_svm_workaround(int controllers, const struct mem_controller *ctrl, tsc_t *tsc0, struct sys_info *sysinfo)
1558 {
1559         tsc_t tsc1[8];
1560         unsigned cpu_f0_f1[8];
1561         int i;
1562
1563         print_debug_addr("dqs_timing: tsc1[8] :", tsc1);
1564
1565         for(i = 0; i < controllers; i++) {
1566                 if (!sysinfo->ctrl_present[i])
1567                         continue;
1568
1569                 /* Skip everything if I don't have any memory on this controller */
1570                 if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
1571
1572                 uint32_t dword;
1573
1574                 cpu_f0_f1[i] = is_cpu_pre_f2_in_bsp(i);
1575
1576                 if(!cpu_f0_f1[i]) continue;
1577
1578                 dword = pci_read_config32(ctrl[i].f2, DRAM_CTRL);
1579                 dword &= ~DC_DqsRcvEnTrain;
1580                 pci_write_config32(ctrl[i].f2, DRAM_CTRL, dword);
1581
1582                 dword = pci_read_config32(ctrl[i].f2, DRAM_INIT);
1583                 dword |= DI_EnDramInit;
1584                 pci_write_config32(ctrl[i].f2, DRAM_INIT, dword);
1585                 dword &= ~DI_EnDramInit;
1586                 pci_write_config32(ctrl[i].f2, DRAM_INIT, dword);
1587
1588                 tsc1[i] = rdtsc();
1589                 print_debug_dqs_tsc("begin: tsc1", i, tsc1[i].hi, tsc1[i].lo, 2);
1590
1591                 dword = tsc1[i].lo + tsc0[i].lo;
1592                 if((dword<tsc1[i].lo) || (dword<tsc0[i].lo)) {
1593                         tsc1[i].hi++;
1594                 }
1595                 tsc1[i].lo = dword;
1596                 tsc1[i].hi+= tsc0[i].hi;
1597
1598                 print_debug_dqs_tsc("end  : tsc1", i, tsc1[i].hi, tsc1[i].lo, 2);
1599
1600         }
1601
1602         for(i = 0; i < controllers; i++) {
1603                 if (!sysinfo->ctrl_present[i])
1604                         continue;
1605
1606                 /* Skip everything if I don't have any memory on this controller */
1607                 if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
1608
1609                 if(!cpu_f0_f1[i]) continue;
1610
1611                 tsc_t tsc;
1612
1613                 do {
1614                         tsc = rdtsc();
1615                 } while ((tsc1[i].hi>tsc.hi) || ((tsc1[i].hi==tsc.hi) && (tsc1[i].lo>tsc.lo)));
1616
1617                 print_debug_dqs_tsc("end  : tsc ", i, tsc.hi, tsc.lo, 2);
1618         }
1619
1620 }
1621
1622 #endif
1623
1624
1625 /* setting variable mtrr, comes from linux kernel source */
1626 static void set_var_mtrr_dqs(
1627         unsigned int reg, unsigned long basek, unsigned long sizek,
1628         unsigned char type, unsigned address_bits)
1629 {
1630         msr_t base, mask;
1631         unsigned address_mask_high;
1632
1633         address_mask_high = ((1u << (address_bits - 32u)) - 1u);
1634
1635         base.hi = basek >> 22;
1636         base.lo  = basek << 10;
1637
1638         if (sizek < 4*1024*1024) {
1639                 mask.hi = address_mask_high;
1640                 mask.lo = ~((sizek << 10) -1);
1641         }
1642         else {
1643                 mask.hi = address_mask_high & (~((sizek >> 22) -1));
1644                 mask.lo = 0;
1645         }
1646
1647         if (reg >= 8)
1648                 return;
1649
1650         if (sizek == 0) {
1651                 msr_t zero;
1652                 zero.lo = zero.hi = 0;
1653                 /* The invalid bit is kept in the mask, so we simply clear the
1654                    relevant mask register to disable a range. */
1655                 wrmsr (MTRRphysMask_MSR(reg), zero);
1656         } else {
1657                 /* Bit 32-35 of MTRRphysMask should be set to 1 */
1658                 base.lo |= type;
1659                 mask.lo |= 0x800;
1660                 wrmsr (MTRRphysBase_MSR(reg), base);
1661                 wrmsr (MTRRphysMask_MSR(reg), mask);
1662         }
1663 }
1664
1665
1666 /* fms: find most sigificant bit set, stolen from Linux Kernel Source. */
1667 static inline unsigned int fms(unsigned int x)
1668 {
1669         int r;
1670
1671         __asm__("bsrl %1,%0\n\t"
1672                 "jnz 1f\n\t"
1673                 "movl $0,%0\n"
1674                 "1:" : "=r" (r) : "g" (x));
1675         return r;
1676 }
1677
1678 /* fls: find least sigificant bit set */
1679 static inline unsigned int fls(unsigned int x)
1680 {
1681         int r;
1682
1683         __asm__("bsfl %1,%0\n\t"
1684                 "jnz 1f\n\t"
1685                 "movl $32,%0\n"
1686                 "1:" : "=r" (r) : "g" (x));
1687         return r;
1688 }
1689
1690 static unsigned int range_to_mtrr(unsigned int reg,
1691         unsigned long range_startk, unsigned long range_sizek,
1692         unsigned long next_range_startk, unsigned char type, unsigned address_bits)
1693 {
1694         if (!range_sizek || (reg >= 8)) {
1695                 return reg;
1696         }
1697         while(range_sizek) {
1698                 unsigned long max_align, align;
1699                 unsigned long sizek;
1700                 /* Compute the maximum size I can make a range */
1701                 max_align = fls(range_startk);
1702                 align = fms(range_sizek);
1703                 if (align > max_align) {
1704                         align = max_align;
1705                 }
1706                 sizek = 1 << align;
1707 #if CONFIG_MEM_TRAIN_SEQ != 1
1708                 printk_debug("Setting variable MTRR %d, base: %4dMB, range: %4dMB, type %s\r\n",
1709                         reg, range_startk >>10, sizek >> 10,
1710                         (type==MTRR_TYPE_UNCACHEABLE)?"UC":
1711                             ((type==MTRR_TYPE_WRBACK)?"WB":"Other")
1712                         );
1713 #endif
1714                 set_var_mtrr_dqs(reg++, range_startk, sizek, type, address_bits);
1715                 range_startk += sizek;
1716                 range_sizek -= sizek;
1717                 if (reg >= 8)
1718                         break;
1719         }
1720         return reg;
1721 }
1722
1723 static void set_top_mem_ap(unsigned tom_k, unsigned tom2_k)
1724 {
1725         msr_t msr;
1726
1727         /* Now set top of memory */
1728         msr.lo = (tom2_k & 0x003fffff) << 10;
1729         msr.hi = (tom2_k & 0xffc00000) >> 22;
1730         wrmsr(TOP_MEM2, msr);
1731
1732         msr.lo = (tom_k & 0x003fffff) << 10;
1733         msr.hi = (tom_k & 0xffc00000) >> 22;
1734         wrmsr(TOP_MEM, msr);
1735 }
1736
1737 static void setup_mtrr_dqs(unsigned tom_k, unsigned tom2_k)
1738 {
1739         unsigned reg;
1740         msr_t msr;
1741
1742 #if 0
1743         //still enable from cache_as_ram.inc
1744         msr = rdmsr(SYSCFG_MSR);
1745         msr.lo |= SYSCFG_MSR_MtrrFixDramModEn;
1746         wrmsr(SYSCFG_MSR,msr);
1747 #endif
1748
1749         //[0,512k), [512k, 640k)
1750         msr.hi = 0x1e1e1e1e;
1751         msr.lo = msr.hi;
1752         wrmsr(0x250, msr);
1753         wrmsr(0x258, msr);
1754
1755         //[1M, TOM)
1756         reg = range_to_mtrr(2, 0, tom_k,4*1024*1024, MTRR_TYPE_WRBACK, 40);
1757
1758         //[4G, TOM2)
1759         if(tom2_k) {
1760                 //enable tom2 and type
1761                 msr = rdmsr(SYSCFG_MSR);
1762                 msr.lo |= (1<<21) | (1<<22); //MtrrTom2En and Tom2ForceMemTypeWB
1763                 wrmsr(SYSCFG_MSR, msr);
1764         }
1765
1766 }
1767
1768 static void clear_mtrr_dqs(unsigned tom2_k)
1769 {
1770         msr_t msr;
1771         unsigned i;
1772
1773         //still enable from cache_as_ram.inc
1774         msr = rdmsr(SYSCFG_MSR);
1775         msr.lo |= SYSCFG_MSR_MtrrFixDramModEn;
1776         wrmsr(SYSCFG_MSR,msr);
1777
1778         //[0,512k), [512k, 640k)
1779         msr.hi = 0;
1780         msr.lo = msr.hi;
1781         wrmsr(0x250, msr);
1782         wrmsr(0x258, msr);
1783
1784         //[1M, TOM)
1785         for(i=0x204;i<0x210;i++) {
1786                 wrmsr(i, msr);
1787         }
1788
1789         //[4G, TOM2)
1790         if(tom2_k) {
1791                 //enable tom2 and type
1792                 msr = rdmsr(SYSCFG_MSR);
1793                 msr.lo &= ~((1<<21) | (1<<22)); //MtrrTom2En and Tom2ForceMemTypeWB
1794                 wrmsr(SYSCFG_MSR, msr);
1795         }
1796 }
1797
1798 static void set_htic_bit(unsigned i, unsigned val, unsigned bit)
1799 {
1800         uint32_t dword;
1801         dword = pci_read_config32(PCI_DEV(0, 0x18+i, 0), HT_INIT_CONTROL);
1802         dword &= ~(1<<bit);
1803         dword |= ((val & 1) <<bit);
1804         pci_write_config32(PCI_DEV(0, 0x18+i, 0), HT_INIT_CONTROL, dword);
1805 }
1806
1807
1808 static unsigned get_htic_bit(unsigned i, unsigned bit)
1809 {
1810         uint32_t dword;
1811         dword = pci_read_config32(PCI_DEV(0, 0x18+i, 0), HT_INIT_CONTROL);
1812         dword &= (1<<bit);
1813         return dword;
1814 }
1815
1816 static void wait_till_sysinfo_in_ram(void)
1817 {
1818         while(1) {
1819                 if(get_htic_bit(0, 9)) return;
1820         }
1821 }
1822
1823 static void set_sysinfo_in_ram(unsigned val)
1824 {
1825         set_htic_bit(0, val, 9);
1826 }
1827
1828 #ifdef S3_NVRAM_EARLY
1829 int s3_save_nvram_early(u32 dword, int size, int  nvram_pos);
1830 int s3_load_nvram_early(int size, u32 *old_dword, int nvram_pos);
1831 #else
1832 int s3_save_nvram_early(u32 dword, int size, int  nvram_pos)
1833 {
1834         return nvram_pos;
1835 }
1836
1837 int s3_load_nvram_early(int size, u32 *old_dword, int nvram_pos)
1838 {
1839         die("No memory NVRAM loader for DQS data! Unable to restore memory state\n");
1840
1841         return nvram_pos; /* Make GCC happy */
1842 }
1843 #endif
1844
1845 static int save_index_to_pos(unsigned int dev, int size, int index, int nvram_pos)
1846 {
1847         u32 dword = pci_read_config32_index_wait(dev, 0x98, index);
1848
1849         return s3_save_nvram_early(dword, size, nvram_pos);
1850 }
1851
1852 static int load_index_to_pos(unsigned int dev, int size, int index, int nvram_pos)
1853 {
1854
1855         u32 old_dword = pci_read_config32_index_wait(dev, 0x98, index);
1856         nvram_pos = s3_load_nvram_early(size, &old_dword, nvram_pos);
1857         pci_write_config32_index_wait(dev, 0x98, index, old_dword);
1858         return nvram_pos;
1859 }
1860
1861 static int dqs_load_MC_NVRAM_ch(unsigned int dev, int ch, int pos)
1862 {
1863         /* 30 bytes per channel */
1864         ch *= 0x20;
1865         pos = load_index_to_pos(dev, 4, 0x00 + ch, pos);
1866         pos = load_index_to_pos(dev, 4, 0x01 + ch, pos);
1867         pos = load_index_to_pos(dev, 4, 0x02 + ch, pos);
1868         pos = load_index_to_pos(dev, 1, 0x03 + ch, pos);
1869         pos = load_index_to_pos(dev, 4, 0x04 + ch, pos);
1870         pos = load_index_to_pos(dev, 4, 0x05 + ch, pos);
1871         pos = load_index_to_pos(dev, 4, 0x06 + ch, pos);
1872         pos = load_index_to_pos(dev, 1, 0x07 + ch, pos);
1873         pos = load_index_to_pos(dev, 1, 0x10 + ch, pos);
1874         pos = load_index_to_pos(dev, 1, 0x13 + ch, pos);
1875         pos = load_index_to_pos(dev, 1, 0x16 + ch, pos);
1876         pos = load_index_to_pos(dev, 1, 0x19 + ch, pos);
1877         return pos;
1878 }
1879
1880 static int dqs_save_MC_NVRAM_ch(unsigned int dev, int ch, int pos)
1881 {
1882         /* 30 bytes per channel */
1883         ch *= 0x20;
1884         pos = save_index_to_pos(dev, 4, 0x00 + ch, pos);
1885         pos = save_index_to_pos(dev, 4, 0x01 + ch, pos);
1886         pos = save_index_to_pos(dev, 4, 0x02 + ch, pos);
1887         pos = save_index_to_pos(dev, 1, 0x03 + ch, pos);
1888         pos = save_index_to_pos(dev, 4, 0x04 + ch, pos);
1889         pos = save_index_to_pos(dev, 4, 0x05 + ch, pos);
1890         pos = save_index_to_pos(dev, 4, 0x06 + ch, pos);
1891         pos = save_index_to_pos(dev, 1, 0x07 + ch, pos);
1892         pos = save_index_to_pos(dev, 1, 0x10 + ch, pos);
1893         pos = save_index_to_pos(dev, 1, 0x13 + ch, pos);
1894         pos = save_index_to_pos(dev, 1, 0x16 + ch, pos);
1895         pos = save_index_to_pos(dev, 1, 0x19 + ch, pos);
1896         return pos;
1897 }
1898
1899 static void dqs_save_MC_NVRAM(unsigned int dev)
1900 {
1901         int pos = 0;
1902         u32 reg;
1903         printk_debug("DQS SAVE NVRAM: %x\n", dev);
1904         pos = dqs_save_MC_NVRAM_ch(dev, 0, pos);
1905         pos = dqs_save_MC_NVRAM_ch(dev, 1, pos);
1906         /* save the maxasync lat here */
1907         reg = pci_read_config32(dev, DRAM_CONFIG_HIGH);
1908         pos = s3_save_nvram_early(reg, 4, pos);
1909 }
1910
1911 static void dqs_restore_MC_NVRAM(unsigned int dev)
1912 {
1913         int pos = 0;
1914         u32 reg;
1915
1916         printk_debug("DQS RESTORE FROM NVRAM: %x\n", dev);
1917         pos = dqs_load_MC_NVRAM_ch(dev, 0, pos);
1918         pos = dqs_load_MC_NVRAM_ch(dev, 1, pos);
1919         /* load the maxasync lat here */
1920         pos = s3_load_nvram_early(4, &reg, pos);
1921         reg &= (DCH_MaxAsyncLat_MASK <<DCH_MaxAsyncLat_SHIFT);
1922         reg |= pci_read_config32(dev, DRAM_CONFIG_HIGH);
1923         pci_write_config32(dev, DRAM_CONFIG_HIGH, reg);
1924 }
1925
1926 #if CONFIG_MEM_TRAIN_SEQ == 0
1927 #if K8_REV_F_SUPPORT_F0_F1_WORKAROUND == 1
1928 static void dqs_timing(int controllers, const struct mem_controller *ctrl, tsc_t *tsc0, struct sys_info *sysinfo)
1929 #else
1930 static void dqs_timing(int controllers, const struct mem_controller *ctrl, struct sys_info *sysinfo)
1931 #endif
1932 {
1933         int  i;
1934
1935         tsc_t tsc[5];
1936
1937         //need to enable mtrr, so dqs training could access the test address
1938         setup_mtrr_dqs(sysinfo->tom_k, sysinfo->tom2_k);
1939
1940         for(i = 0; i < controllers; i++) {
1941                 if (!sysinfo->ctrl_present[ i ])
1942                         continue;
1943
1944                 /* Skip everything if I don't have any memory on this controller */
1945                 if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
1946
1947                 fill_mem_cs_sysinfo(i, ctrl+i, sysinfo);
1948         }
1949
1950         tsc[0] = rdtsc();
1951         for(i = 0; i < controllers; i++) {
1952                 if (!sysinfo->ctrl_present[ i ])
1953                         continue;
1954
1955                 /* Skip everything if I don't have any memory on this controller */
1956                 if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
1957
1958                 printk_debug("DQS Training:RcvrEn:Pass1: %02x\n", i);
1959                 if(train_DqsRcvrEn(ctrl+i, 1, sysinfo)) goto out;
1960                 printk_debug(" done\r\n");
1961         }
1962
1963         tsc[1] = rdtsc();
1964 #if K8_REV_F_SUPPORT_F0_F1_WORKAROUND == 1
1965         f0_svm_workaround(controllers, ctrl, tsc0, sysinfo);
1966 #endif
1967
1968         tsc[2] = rdtsc();
1969         for(i = 0; i < controllers; i++) {
1970                 if (!sysinfo->ctrl_present[i])
1971                         continue;
1972
1973                 /* Skip everything if I don't have any memory on this controller */
1974                 if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
1975
1976                 printk_debug("DQS Training:DQSPos: %02x\n", i);
1977                 if(train_DqsPos(ctrl+i, sysinfo)) goto out;
1978                 printk_debug(" done\r\n");
1979         }
1980
1981         tsc[3] = rdtsc();
1982         for(i = 0; i < controllers; i++) {
1983                 if (!sysinfo->ctrl_present[i])
1984                         continue;
1985
1986                 /* Skip everything if I don't have any memory on this controller */
1987                 if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
1988
1989                 printk_debug("DQS Training:RcvrEn:Pass2: %02x\n", i);
1990                 if(train_DqsRcvrEn(ctrl+i, 2, sysinfo)) goto out;
1991                 printk_debug(" done\r\n");
1992                 sysinfo->mem_trained[i]=1;
1993                 dqs_save_MC_NVRAM((ctrl+i)->f2);
1994         }
1995
1996 out:
1997         tsc[4] = rdtsc();
1998         clear_mtrr_dqs(sysinfo->tom2_k);
1999
2000
2001         for(i=0;i<5;i++) {
2002                 print_debug_dqs_tsc_x("DQS Training:tsc", i,  tsc[i].hi, tsc[i].lo);
2003         }
2004
2005
2006
2007 }
2008
2009 #endif
2010
2011
2012 #if CONFIG_MEM_TRAIN_SEQ > 0
2013
2014 static void dqs_timing(int i, const struct mem_controller *ctrl, struct sys_info *sysinfo, unsigned v)
2015 {
2016
2017         int ii;
2018
2019          tsc_t tsc[4];
2020
2021         if(sysinfo->mem_trained[i] != 0x80) return;
2022
2023 #if CONFIG_MEM_TRAIN_SEQ == 1
2024         //need to enable mtrr, so dqs training could access the test address
2025         setup_mtrr_dqs(sysinfo->tom_k, sysinfo->tom2_k);
2026 #endif
2027
2028         fill_mem_cs_sysinfo(i, ctrl, sysinfo);
2029
2030         if(v) {
2031                 tsc[0] = rdtsc();
2032
2033                 printk_debug("set DQS timing:RcvrEn:Pass1: %02x\n", i);
2034         }
2035         if(train_DqsRcvrEn(ctrl, 1,  sysinfo)) {
2036                 sysinfo->mem_trained[i]=0x81; //
2037                 goto out;
2038         }
2039
2040         if(v) {
2041                 printk_debug(" done\r\n");
2042                 tsc[1] = rdtsc();
2043                 printk_debug("set DQS timing:DQSPos: %02x\n", i);
2044         }
2045
2046         if(train_DqsPos(ctrl, sysinfo)) {
2047                 sysinfo->mem_trained[i]=0x82; //
2048                 goto out;
2049         }
2050
2051         if(v) {
2052                 printk_debug(" done\r\n");
2053                 tsc[2] = rdtsc();
2054
2055                 printk_debug("set DQS timing:RcvrEn:Pass2: %02x\n", i);
2056         }
2057         if(train_DqsRcvrEn(ctrl, 2,  sysinfo)){
2058                 sysinfo->mem_trained[i]=0x83; //
2059                 goto out;
2060         }
2061
2062         if(v) {
2063                 printk_debug(" done\r\n");
2064
2065                 tsc[3] = rdtsc();
2066         }
2067
2068 out:
2069 #if CONFIG_MEM_TRAIN_SEQ == 1
2070         clear_mtrr_dqs(sysinfo->tom2_k);
2071 #endif
2072
2073         if(v) {
2074                 for(ii=0;ii<4;ii++) {
2075                       print_debug_dqs_tsc_x("Total DQS Training : tsc ", ii,  tsc[ii].hi, tsc[ii].lo);
2076                 }
2077         }
2078
2079         if(sysinfo->mem_trained[i] == 0x80) {
2080                 sysinfo->mem_trained[i]=1;
2081         }
2082
2083 }
2084 #endif
2085
2086 #if CONFIG_MEM_TRAIN_SEQ == 1
2087 static void train_ram(unsigned nodeid, struct sys_info *sysinfo, struct sys_info *sysinfox)
2088 {
2089         dqs_timing(nodeid, &sysinfo->ctrl[nodeid], sysinfo, 0); // keep the output tidy
2090 //      memcpy(&sysinfox->dqs_rcvr_dly_a[nodeid * 2 * 8],&sysinfo->dqs_rcvr_dly_a[nodeid * 2 * 8], 2*8);
2091 //      memcpy(&sysinfox->dqs_delay_a[nodeid * 2 * 2 * 9], &sysinfo->dqs_delay_a[nodeid * 2 * 2 * 9], 2 * 2 * 9);
2092         sysinfox->mem_trained[nodeid] = sysinfo->mem_trained[nodeid];
2093
2094 }
2095 static void copy_and_run_ap_code_in_car(unsigned ret_addr);
2096 static inline void train_ram_on_node(unsigned nodeid, unsigned coreid, struct sys_info *sysinfo, unsigned retcall)
2097 {
2098         if(coreid) return; // only do it on core0
2099         struct sys_info *sysinfox = (void*)((CONFIG_RAMTOP) - CONFIG_DCACHE_RAM_GLOBAL_VAR_SIZE);
2100         wait_till_sysinfo_in_ram(); // use pci to get it
2101
2102         if(sysinfox->mem_trained[nodeid] == 0x80) {
2103         #if 0
2104                 sysinfo->tom_k = sysinfox->tom_k;
2105                 sysinfo->tom2_k = sysinfox->tom2_k;
2106                 sysinfo->meminfo[nodeid].is_Width128 = sysinfox->meminfo[nodeid].is_Width128;
2107                 sysinfo->mem_trained[nodeid] = sysinfox->mem_trained[nodeid];
2108                 memcpy(&sysinfo->ctrl[nodeid], &sysinfox->ctrl[nodeid], sizeof(struct mem_controller));
2109         #else
2110                 memcpy(sysinfo, sysinfox, CONFIG_DCACHE_RAM_GLOBAL_VAR_SIZE);
2111         #endif
2112                 set_top_mem_ap(sysinfo->tom_k, sysinfo->tom2_k); // keep the ap's tom consistent with bsp's
2113         #if CONFIG_AP_CODE_IN_CAR == 0
2114                 printk_debug("CODE IN ROM AND RUN ON NODE: %02x\n", nodeid);
2115                 train_ram(nodeid, sysinfo, sysinfox);
2116         #else
2117                 /* Can copy dqs_timing to ap cache and run from cache?
2118                 * we need coreboot_ap_car.rom? and treat it as coreboot_ram.rom for ap ?
2119                 */
2120                 copy_and_run_ap_code_in_car(retcall);
2121                 // will go back by jump
2122         #endif
2123         }
2124 }
2125 #endif