Since some people disapprove of white space cleanups mixed in regular commits
[coreboot.git] / src / northbridge / amd / amdk8 / raminit_f_dqs.c
1 /*
2  * This file is part of the coreboot project.
3  *
4  * Copyright (C) 2005 YingHai Lu
5  * Copyright (C) 2008 Advanced Micro Devices, Inc.
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; version 2 of the License.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
19 */
20
21 #include <arch/stages.h>
22
23 //0: mean no debug info
24 #define DQS_TRAIN_DEBUG 0
25
26 static inline void print_debug_dqs(const char *str, unsigned val, unsigned level)
27 {
28 #if DQS_TRAIN_DEBUG > 0
29         if(DQS_TRAIN_DEBUG > level) {
30                 printk(BIOS_DEBUG, "%s%x\n", str, val);
31         }
32 #endif
33 }
34
35 static inline void print_debug_dqs_pair(const char *str, unsigned val, const char *str2, unsigned val2, unsigned level)
36 {
37 #if DQS_TRAIN_DEBUG > 0
38         if(DQS_TRAIN_DEBUG > level) {
39                 printk(BIOS_DEBUG, "%s%08x%s%08x\n", str, val, str2, val2);
40         }
41 #endif
42 }
43
44 static inline void print_debug_dqs_tsc(const char *str, unsigned i, unsigned val, unsigned val2, unsigned level)
45 {
46 #if DQS_TRAIN_DEBUG > 0
47         if(DQS_TRAIN_DEBUG > level) {
48                 printk(BIOS_DEBUG, "%s[%02x]=%08x%08x\n", str, i, val, val2);
49         }
50 #endif
51 }
52
53 static inline void print_debug_dqs_tsc_x(const char *str, unsigned i, unsigned val, unsigned val2)
54 {
55         printk(BIOS_DEBUG, "%s[%02x]=%08x%08x\n", str, i, val, val2);
56
57 }
58
59 static void fill_mem_cs_sysinfo(unsigned nodeid, const struct mem_controller *ctrl, struct sys_info *sysinfo)
60 {
61
62         int i;
63         sysinfo->mem_base[nodeid] = pci_read_config32(ctrl->f1, 0x40 + (nodeid<<3));
64
65         for(i=0;i<8; i++) {
66                 sysinfo->cs_base[nodeid*8+i] = pci_read_config32(ctrl->f2, 0x40 + (i<<2));
67         }
68
69         sysinfo->hole_reg[nodeid] = pci_read_config32(ctrl->f1, 0xf0);
70
71 }
72 static unsigned Get_MCTSysAddr(const struct mem_controller *ctrl,  unsigned cs_idx, struct sys_info *sysinfo)
73 {
74         uint32_t dword;
75         uint32_t mem_base;
76         unsigned nodeid = ctrl->node_id;
77
78 #if CONFIG_HW_MEM_HOLE_SIZEK != 0
79         uint32_t hole_reg;
80 #endif
81
82         //get the local base addr of the chipselect
83         dword = sysinfo->cs_base[nodeid * 8 + cs_idx];
84         dword &= 0xfffffff0;
85
86         //sys addr= node base + local cs base
87         mem_base = sysinfo->mem_base[nodeid];
88         mem_base &= 0xffff0000;
89
90         dword += mem_base;
91 #if CONFIG_HW_MEM_HOLE_SIZEK != 0
92         hole_reg = sysinfo->hole_reg[nodeid];
93         if(hole_reg & 1) {
94                 unsigned hole_startk;
95                 hole_startk = (hole_reg & (0xff<<24)) >> 10;
96                 if( (dword >= (hole_startk<<2)) && (dword < ((4*1024*1024)<<2))) {
97                         dword += ((4*1024*1024 - hole_startk)<<2);
98                 }
99         }
100 #endif
101
102         //add 1MB offset to avoid compat area
103         dword += (1<<(20-8));
104
105         //So final result is upper 32 bit addr
106
107         return dword;
108
109 }
110
111 static unsigned Get_RcvrSysAddr(const struct mem_controller * ctrl, unsigned channel, unsigned cs_idx, struct sys_info *sysinfo)
112 {
113         return Get_MCTSysAddr(ctrl, cs_idx, sysinfo);
114
115 }
116
117 static inline unsigned long read_cr4(void)
118 {
119         unsigned long cr4;
120         asm volatile ("movl %%cr4, %0" : "=r" (cr4));
121         return cr4;
122 }
123
124 static inline void write_cr4(unsigned long cr4)
125 {
126         asm volatile ("movl %0, %%cr4" : : "r" (cr4));
127 }
128
129
130 static inline void enable_sse2(void)
131 {
132         unsigned long cr4;
133         cr4 = read_cr4();
134         cr4 |= (1<<9);
135         write_cr4(cr4);
136 }
137
138 static inline void disable_sse2(void)
139 {
140         unsigned long cr4;
141         cr4 = read_cr4();
142         cr4 &= ~(1<<9);
143         write_cr4(cr4);
144 }
145
146
147 static void set_wrap32dis(void) {
148         msr_t msr;
149
150         msr = rdmsr(0xc0010015);
151         msr.lo |= (1<<17);
152
153         wrmsr(0xc0010015, msr);
154
155 }
156
157 static void clear_wrap32dis(void) {
158         msr_t msr;
159
160         msr = rdmsr(0xc0010015);
161         msr.lo &= ~(1<<17);
162
163         wrmsr(0xc0010015, msr);
164
165 }
166
167 static void set_FSBASE(uint32_t addr_hi)
168 {
169         msr_t msr;
170
171         //set fs and use fs prefix to access the mem
172         msr.hi = addr_hi;
173         msr.lo = 0;
174         wrmsr(0xc0000100, msr); //FS_BASE
175
176 }
177
178 static unsigned ChipSelPresent(const struct mem_controller *ctrl, unsigned cs_idx, struct sys_info *sysinfo)
179 {
180         unsigned enabled;
181         unsigned nodeid = ctrl->node_id;
182
183
184         enabled = sysinfo->cs_base[nodeid * 8 + cs_idx];
185         enabled &= 1;
186
187         return enabled;
188
189 }
190
191 static unsigned RcvrRankEnabled(const struct mem_controller *ctrl, int channel, int cs_idx, unsigned is_Width128, struct sys_info *sysinfo)
192 {
193         return ChipSelPresent(ctrl, cs_idx, sysinfo);
194 }
195
196 static void WriteLNTestPattern(unsigned addr_lo, uint8_t *buf_a, unsigned line_num)
197 {
198         __asm__ volatile (
199                 "1:\n\t"
200                 "movdqa (%3), %%xmm0\n\t"
201                 "movntdq %%xmm0, %%fs:(%0)\n\t" /* xmm0 is 128 bit */
202                 "addl %1, %0\n\t"
203                 "addl %1, %3\n\t"
204                 "loop 1b\n\t"
205
206                 :: "a" (addr_lo), "d" (16), "c" (line_num * 4), "b"(buf_a)
207         );
208
209
210 }
211
212 static void Write1LTestPattern(unsigned addr, unsigned p, uint8_t *buf_a, uint8_t *buf_b)
213 {
214         uint8_t *buf;
215         if(p==1) { buf = buf_b; }
216         else { buf = buf_a; }
217
218         set_FSBASE (addr>>24);
219
220         WriteLNTestPattern(addr<<8, buf, 1);
221 }
222
223 static void Read1LTestPattern(unsigned addr)
224 {
225         unsigned value;
226
227         set_FSBASE(addr>>24);
228
229         /* 1st move causes read fill (to exclusive or shared)*/
230         __asm__ volatile (
231                 "movl %%fs:(%1), %0\n\t"
232                 :"=b"(value): "a" (addr<<8)
233         );
234
235 }
236
237 #define DQS_PASS 0
238 #define DQS_FAIL 1
239
240 #define DQS_FIRST_PASS 1
241 #define DQS_SECOND_PASS 2
242
243 #define SB_NORCVREN 11
244 #define RCVREN_MARGIN 6
245 #define SB_SmallRCVR 13
246 #define SB_CHA2BRCVREN 12
247 #define SB_NODQSPOS  14
248 #define MIN_DQS_WNDW 3
249 #define SB_SMALLDQS 15
250
251
252 static unsigned CompareTestPatternQW0(unsigned channel, unsigned addr, unsigned pattern, const uint32_t *TestPattern0, const uint32_t *TestPattern1, const uint32_t *TestPattern2, unsigned Pass, unsigned is_Width128)
253 {
254         uint32_t addr_lo;
255         uint32_t *test_buf;
256         uint32_t value;
257         uint32_t value_test;
258         unsigned result = DQS_FAIL;
259
260         if(Pass == DQS_FIRST_PASS) {
261                 if(pattern==1) {
262                         test_buf = (uint32_t *)TestPattern1;
263                 }
264                 else {
265                         test_buf = (uint32_t *)TestPattern0;
266                 }
267         }
268         else {
269                 test_buf = (uint32_t *)TestPattern2;
270         }
271
272         set_FSBASE(addr>>24);
273
274         addr_lo = addr<<8;
275
276         if(is_Width128 && (channel == 1)) {
277                 addr_lo += 8; //second channel
278                 test_buf += 2;
279         }
280
281         __asm__ volatile (
282                 "movl %%fs:(%1), %0\n\t"
283                 :"=b"(value): "a" (addr_lo)
284         );
285
286         value_test = *test_buf;
287
288
289         print_debug_dqs_pair("\t\t\t\t\t\tQW0.lo : test_buf= ", (unsigned)test_buf, " value = ", value_test, 4);
290         print_debug_dqs_pair("\t\t\t\t\t\tQW0.lo : addr_lo = ", addr_lo, " value = ", value, 4);
291
292         if(value == value_test) {
293                 addr_lo += 4;
294                 test_buf++;
295                 __asm__ volatile (
296                         "movl %%fs:(%1), %0\n\t"
297                         :"=b"(value): "a" (addr_lo)
298                 );
299                 value_test = *test_buf;
300                 print_debug_dqs_pair("\t\t\t\t\t\tQW0.hi : test_buf= ", (unsigned)test_buf, " value = ", value_test, 4);
301                 print_debug_dqs_pair("\t\t\t\t\t\tQW0.hi : addr_lo = ", addr_lo, " value = ", value, 4);
302
303                 if(value == value_test){
304                         result =  DQS_PASS;
305                 }
306         }
307
308         if(Pass == DQS_SECOND_PASS) { // second pass need to be inverted
309                 if(result==DQS_PASS) {
310                         result = DQS_FAIL;
311                 }
312                 else {
313                         result = DQS_PASS;
314                 }
315         }
316
317         return result;
318
319 }
320
321 static void SetMaxAL_RcvrDly(const struct mem_controller *ctrl, unsigned dly)
322 {
323         uint32_t reg;
324
325         dly += (20-1); // round it
326         dly /= 20; // convert from unit 50ps to 1ns
327
328         dly += 6;
329
330
331         reg = pci_read_config32(ctrl->f2, DRAM_CONFIG_HIGH);
332         reg &= ~(DCH_MaxAsyncLat_MASK <<DCH_MaxAsyncLat_SHIFT);
333         reg |= ((dly - DCH_MaxAsyncLat_BASE) << DCH_MaxAsyncLat_SHIFT);
334         pci_write_config32(ctrl->f2, DRAM_CONFIG_HIGH, reg);
335
336 }
337
338 /*
339         Set the Target range to WT IO (using an IORR overlapping the already existing
340         WB dram type). Use IORR0
341 */
342 static void SetTargetWTIO(unsigned addr)
343 {
344         msr_t msr;
345         msr.hi = addr>>24;
346         msr.lo = addr<<8;
347         wrmsr(0xc0010016, msr); //IORR0 BASE
348
349         msr.hi = 0xff;
350         msr.lo = 0xfc000800;  // 64MB Mask
351         wrmsr(0xc0010017, msr); // IORR0 Mask
352 }
353
354 static void ResetTargetWTIO(void)
355 {
356         msr_t msr;
357
358         msr.hi = 0;
359         msr.lo = 0;
360         wrmsr(0xc0010017, msr); // IORR0 Mask
361 }
362
363 static void proc_CLFLUSH(unsigned addr)
364 {
365
366         set_FSBASE(addr>>24);
367
368         /* 1st move causes read fill (to exclusive or shared)*/
369         __asm__ volatile (
370                         /* clflush fs:[eax] */
371                 "clflush %%fs:(%0)\n\t"
372                 ::"a" (addr<<8)
373         );
374
375 }
376 static void proc_IOCLFLUSH(unsigned addr)
377 {
378         SetTargetWTIO(addr);
379         proc_CLFLUSH(addr);
380         ResetTargetWTIO();
381 }
382
383 static void ResetDCTWrPtr(const struct mem_controller *ctrl)
384 {
385         uint32_t dword;
386         unsigned index = 0x10;
387
388         dword = pci_read_config32_index_wait(ctrl->f2, 0x98, index);
389         pci_write_config32_index_wait(ctrl->f2, 0x98, index, dword);
390
391         index += 0x20;
392         dword = pci_read_config32_index_wait(ctrl->f2, 0x98, index);
393         pci_write_config32_index_wait(ctrl->f2, 0x98, index, dword);
394
395 }
396
397
398 static uint16_t get_exact_T1000(unsigned i)
399 {
400         //                                 200   266,   333,  400
401         static const uint16_t T1000_a[]= { 5000, 3759, 3003, 2500 };
402
403         static const uint16_t TT_a[] = {
404                  /*200   266   333   400 */
405          /*4 */   6250, 6250, 6250, 6250,
406          /*5 */   5000, 5000, 5000, 2500,
407          /*6 */   5000, 4166, 4166, 2500,
408          /*7 */   5000, 4285, 3571, 2500,
409
410          /*8 */   5000, 3750, 3125, 2500,
411          /*9 */   5000, 3888, 3333, 2500,
412          /*10*/   5000, 4000, 3000, 2500,
413          /*11*/   5000, 4090, 3181, 2500,
414
415          /*12*/   5000, 3750, 3333, 2500,
416          /*13*/   5000, 3846, 3076, 2500,
417          /*14*/   5000, 3928, 3214, 2500,
418          /*15*/   5000, 4000, 3000, 2500,
419         };
420
421         int index;
422         msr_t msr;
423
424         /* Check for FID control support */
425         struct cpuid_result cpuid1;
426         cpuid1 = cpuid(0x80000007);
427         if( cpuid1.edx & 0x02 ) {
428                 /* Use current FID */
429                 unsigned fid_cur;
430                 msr = rdmsr(0xc0010042);
431                 fid_cur = msr.lo & 0x3f;
432
433                 index = fid_cur>>1;
434         } else {
435                 /* Use startup FID */
436                 unsigned fid_start;
437                 msr = rdmsr(0xc0010015);
438                 fid_start = (msr.lo & (0x3f << 24));
439
440                 index = fid_start>>25;
441         }
442
443         if(index>12) return T1000_a[i];
444
445         return TT_a[index * 4+i];
446
447 }
448
449 static void InitDQSPos4RcvrEn(const struct mem_controller *ctrl)
450 {
451         int i;
452         uint32_t dword;
453
454         dword = 0x00000000;
455         for(i=1; i<=3; i++) {
456                 /* Program the DQS Write Timing Control Registers (Function 2:Offset 0x9c, index 0x01-0x03, 0x21-0x23) to 0x00 for all bytes */
457                 pci_write_config32_index_wait(ctrl->f2, 0x98, i, dword);
458                 pci_write_config32_index_wait(ctrl->f2, 0x98, i+0x20, dword);
459         }
460
461         dword = 0x2f2f2f2f;
462         for(i=5; i<=7; i++) {
463                 /* Program the DQS Write Timing Control Registers (Function 2:Offset 0x9c, index 0x05-0x07, 0x25-0x27) to 0x2f for all bytes */
464                 pci_write_config32_index_wait(ctrl->f2, 0x98, i, dword);
465                 pci_write_config32_index_wait(ctrl->f2, 0x98, i+0x20, dword);
466         }
467
468
469 }
470 #ifndef K8_REV_F_SUPPORT_F0_F1_WORKAROUND
471 #define K8_REV_F_SUPPORT_F0_F1_WORKAROUND 1
472 #endif
473
474 static unsigned TrainRcvrEn(const struct mem_controller *ctrl, unsigned Pass, struct sys_info *sysinfo)
475 {
476
477         static const uint32_t TestPattern0[] = {
478                         0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
479                         0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
480                         0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
481                         0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
482                 };
483         static const uint32_t TestPattern1[] = {
484                         0x55555555, 0x55555555, 0x55555555, 0x55555555,
485                         0x55555555, 0x55555555, 0x55555555, 0x55555555,
486                         0x55555555, 0x55555555, 0x55555555, 0x55555555,
487                         0x55555555, 0x55555555, 0x55555555, 0x55555555,
488                 };
489         static const uint32_t TestPattern2[] = {
490                         0x12345678, 0x87654321, 0x23456789, 0x98765432,
491                         0x59385824, 0x30496724, 0x24490795, 0x99938733,
492                         0x40385642, 0x38465245, 0x29432163, 0x05067894,
493                         0x12349045, 0x98723467, 0x12387634, 0x34587623,
494                 };
495
496         uint8_t pattern_buf_x[64 * 4 + 16]; // We need to two cache line So have more 16 bytes to keep 16 byte alignment */
497         uint8_t *buf_a, *buf_b;
498         uint32_t ecc_bit;
499         uint32_t dword;
500         uint8_t *dqs_rcvr_dly_a = &sysinfo->dqs_rcvr_dly_a[ctrl->node_id * 2* 8] ; //8 node, channel 2, receiver 8
501
502         int i;
503
504         unsigned channel, receiver;
505
506         unsigned Errors;
507         unsigned CTLRMaxDelay;
508         unsigned T1000;
509
510         unsigned LastTest;
511         unsigned CurrTest;
512         unsigned Test0, Test1;
513
514         unsigned RcvrEnDlyRmin;
515
516         unsigned two_ranks;
517         unsigned RcvrEnDly;
518
519         unsigned PatternA;
520         unsigned PatternB;
521
522         unsigned TestAddr0, TestAddr0B, TestAddr1 = 0, TestAddr1B = 0;
523
524         unsigned CurrRcvrCHADelay = 0;
525
526         unsigned tmp;
527
528         unsigned is_Width128 = sysinfo->meminfo[ctrl->node_id].is_Width128;
529
530 #if K8_REV_F_SUPPORT_F0_F1_WORKAROUND == 1
531         unsigned cpu_f0_f1;
532 #endif
533
534         if(Pass == DQS_FIRST_PASS) {
535                 InitDQSPos4RcvrEn(ctrl);
536         }
537
538         //enable SSE2
539         enable_sse2();
540
541         //wrap32dis
542         set_wrap32dis();
543
544         //disable ECC temp
545         dword = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
546         ecc_bit = dword & DCL_DimmEccEn;
547         dword &= ~(DCL_DimmEccEn);
548         pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dword);
549
550
551         if(Pass == DQS_FIRST_PASS) {
552 #if K8_REV_F_SUPPORT_F0_F1_WORKAROUND == 1
553         cpu_f0_f1 = is_cpu_pre_f2_in_bsp(ctrl->node_id);
554         if(!cpu_f0_f1)
555 #endif
556         {
557 #if 1
558                 /* Set the DqsRcvEnTrain bit */
559                 dword = pci_read_config32(ctrl->f2, DRAM_CTRL);
560                 dword |= DC_DqsRcvEnTrain;
561                 pci_write_config32(ctrl->f2, DRAM_CTRL, dword);
562 #endif
563         }
564         }
565
566         //get T1000 figures (cycle time (ns)) * 1K
567         dword = pci_read_config32(ctrl->f2, DRAM_CONFIG_HIGH);
568         dword &= DCH_MemClkFreq_MASK;
569
570         T1000 = get_exact_T1000(dword);
571
572         // SetupRcvrPattern
573         buf_a = (uint8_t *)(((uint32_t)(&pattern_buf_x[0]) + 0x10) & (0xfffffff0));
574         buf_b = buf_a + 128; //??
575         if(Pass==DQS_FIRST_PASS) {
576                 for(i=0;i<16;i++) {
577                         *((uint32_t *)(buf_a + i*4)) = TestPattern0[i];
578                         *((uint32_t *)(buf_b + i*4)) = TestPattern1[i];
579                 }
580         }
581         else {
582                 for(i=0;i<16;i++) {
583                         *((uint32_t *)(buf_a + i*4)) = TestPattern2[i];
584                         *((uint32_t *)(buf_b + i*4)) = TestPattern2[i];
585                 }
586         }
587
588         print_debug_dqs("\nTrainRcvEn: 0 ctrl", ctrl->node_id, 0);
589
590         print_debug_addr("TrainRcvEn: buf_a:", buf_a);
591
592         Errors = 0;
593         /* for each channel */
594         CTLRMaxDelay = 0;
595         channel = 0;
596
597         if (!(sysinfo->meminfo[ctrl->node_id].dimm_mask & 0x0F) &&
598              (sysinfo->meminfo[ctrl->node_id].dimm_mask & 0xF0)) { /* channelB only? */
599                 channel = 1;
600         }
601
602         for ( ; (channel < 2) && (!Errors); channel++)
603         {
604                 print_debug_dqs("\tTrainRcvEn51: channel ",channel, 1);
605
606                 /* for each rank */
607                 /* there are four recriver pairs, loosely associated with CS */
608                 for( receiver = 0; (receiver < 8) && (!Errors); receiver+=2)
609                 {
610
611                         unsigned index=(receiver>>1) * 3 + 0x10;
612
613                         print_debug_dqs("\t\tTrainRcvEn52: index ", index, 2);
614
615                         if(is_Width128) {
616                                 if(channel) {
617                                         dword = pci_read_config32_index_wait(ctrl->f2, 0x98, index);
618                                         CurrRcvrCHADelay= dword & 0xff;
619                                 }
620                         }
621                         else {
622                                 if(channel) {
623                                         index += 0x20;
624                                 }
625                         }
626
627                         LastTest = DQS_FAIL;
628                         RcvrEnDlyRmin = 0xaf;
629
630                         if(!RcvrRankEnabled(ctrl, channel, receiver, is_Width128, sysinfo)) continue;
631
632                         /* for each DQS receiver enable setting */
633
634                         TestAddr0 = Get_RcvrSysAddr(ctrl, channel, receiver, sysinfo);
635
636                         TestAddr0B = TestAddr0 + (1<<(20+2-8)); // 4MB
637
638                         if(RcvrRankEnabled(ctrl, channel, receiver+1, is_Width128, sysinfo)) {
639                                 TestAddr1 = Get_RcvrSysAddr(ctrl, channel, receiver+1, sysinfo);
640                                 TestAddr1B = TestAddr1 + (1<<(20+2-8)); //4MB
641                                 two_ranks = 1;
642                         }
643                         else {
644                                 two_ranks = 0;
645                         }
646
647                         print_debug_dqs("\t\tTrainRcvEn53: TestAddr0B ", TestAddr0B, 2);
648
649                         Write1LTestPattern(TestAddr0, 0, buf_a, buf_b); // rank0 of dimm, test p0
650                         Write1LTestPattern(TestAddr0B, 1, buf_a, buf_b); //rank0 of dimm, test p1
651
652                         if(two_ranks == 1) {
653                                 Write1LTestPattern(TestAddr1, 0, buf_a, buf_b); //rank 1 of dimm
654                                 Write1LTestPattern(TestAddr1B, 1, buf_a, buf_b);//rank 1 of dimm
655                         }
656
657                         if(Pass == DQS_FIRST_PASS) {
658                                 RcvrEnDly = 0;
659                         } else {
660                                 RcvrEnDly = dqs_rcvr_dly_a[channel * 8 + receiver];
661                         }
662
663                         while ( RcvrEnDly < 0xaf) { // Sweep Delay value here
664                                 print_debug_dqs("\t\t\tTrainRcvEn541: RcvrEnDly ", RcvrEnDly, 3);
665
666                                 if(RcvrEnDly & 1) {
667                                         /* Odd steps get another pattern such that even
668                                            and odd steps alternate.
669                                            The pointers to the patterns will be swapped
670                                            at the end of the loop so they are correspond
671                                         */
672                                         PatternA = 1;
673                                         PatternB = 0;
674                                 }
675                                 else {
676                                         /* Even step */
677                                         PatternA = 0;
678                                         PatternB = 1;
679                                 }
680
681                                 /* Program current Receiver enable delay */
682                                 pci_write_config32_index_wait(ctrl->f2, 0x98, index, RcvrEnDly);
683                                 /* FIXME: 64bit MUX */
684
685                                 if(is_Width128) {
686                                         /* Program current Receiver enable delay chaannel b */
687                                         pci_write_config32_index_wait(ctrl->f2, 0x98, index+ 0x20, RcvrEnDly);
688                                 }
689
690                                 /* Program the MaxAsyncLat filed with the
691                                    current DQS receiver enable setting plus 6ns
692                                 */
693                                 /*Porgram MaxAsyncLat to correspond with current delay */
694                                 SetMaxAL_RcvrDly(ctrl, RcvrEnDly);
695
696                                 CurrTest = DQS_FAIL;
697
698                                 Read1LTestPattern(TestAddr0);  //Cache Fill
699                                 /* ROM vs cache compare */
700                                 Test0 = CompareTestPatternQW0(channel, TestAddr0, PatternA, TestPattern0, TestPattern1, TestPattern2, Pass, is_Width128);
701                                 proc_IOCLFLUSH(TestAddr0);
702
703                                 ResetDCTWrPtr(ctrl);
704
705                                 print_debug_dqs("\t\t\tTrainRcvEn542: Test0 ", Test0, 3);
706
707                                 if(Test0 == DQS_PASS) {
708
709                                         Read1LTestPattern(TestAddr0B);
710                                         Test1 = CompareTestPatternQW0(channel, TestAddr0B, PatternB, TestPattern0, TestPattern1, TestPattern2, Pass, is_Width128);
711                                         proc_IOCLFLUSH(TestAddr0B);
712
713                                         ResetDCTWrPtr(ctrl);
714
715                                         print_debug_dqs("\t\t\tTrainRcvEn543: Test1 ", Test1, 3);
716
717                                         if(Test1 == DQS_PASS) {
718                                                 if(two_ranks) {
719                                                         Read1LTestPattern(TestAddr1);
720                                                         Test0 = CompareTestPatternQW0(channel, TestAddr1, PatternA, TestPattern0, TestPattern1, TestPattern2, Pass, is_Width128);
721                                                         proc_IOCLFLUSH(TestAddr1);
722                                                         ResetDCTWrPtr(ctrl);
723
724                                                         if(Test0 == DQS_PASS) {
725                                                                 Read1LTestPattern(TestAddr1B);
726                                                                 Test1 = CompareTestPatternQW0(channel, TestAddr1B, PatternB, TestPattern0, TestPattern1, TestPattern2, Pass, is_Width128);
727                                                                 proc_IOCLFLUSH(TestAddr1B);
728                                                                 ResetDCTWrPtr(ctrl);
729
730                                                                 if(Test1 == DQS_PASS) {
731                                                                         CurrTest = DQS_PASS;
732                                                                 }
733                                                         }
734                                                         print_debug_dqs("\t\t\tTrainRcvEn544: Test0 ", Test0, 3);
735                                                 }
736                                                 else {
737                                                         CurrTest = DQS_PASS;
738                                                 }
739                                         }
740                                 }
741
742                                 print_debug_dqs("\t\t\tTrainRcvEn55: RcvrEnDly ", RcvrEnDly, 3);
743
744                                 if(CurrTest == DQS_PASS) {
745                                         if(LastTest == DQS_FAIL) {
746                                                 RcvrEnDlyRmin = RcvrEnDly;
747                                                 break;
748                                         }
749                                 }
750
751                                 LastTest = CurrTest;
752
753                                 /* swap the rank 0 pointers */
754                                 tmp = TestAddr0;
755                                 TestAddr0 = TestAddr0B;
756                                 TestAddr0B = tmp;
757
758                                 /* swap the rank 1 pointers */
759                                 tmp = TestAddr1;
760                                 TestAddr1 = TestAddr1B;
761                                 TestAddr1B = tmp;
762
763                                 print_debug_dqs("\t\t\tTrainRcvEn56: RcvrEnDly ", RcvrEnDly, 3);
764
765                                 RcvrEnDly++;
766
767                         } // while RcvrEnDly
768
769                         print_debug_dqs("\t\tTrainRcvEn61: RcvrEnDly ", RcvrEnDly, 2);
770
771                         if(RcvrEnDlyRmin == 0xaf) {
772                                 //no passing window
773                                 Errors |= SB_NORCVREN;
774                         }
775
776                         if(Pass == DQS_FIRST_PASS) {
777                                 // We need a better value for DQSPos trainning
778                                 RcvrEnDly = RcvrEnDlyRmin /* + RCVREN_MARGIN * T1000/64/50 */;
779                         } else {
780                                 RcvrEnDly = RcvrEnDlyRmin;
781                         }
782
783                         if(RcvrEnDly > 0xae) {
784                                 //passing window too narrow, too far delayed
785                                 Errors |= SB_SmallRCVR;
786                                 RcvrEnDly = 0xae;
787                         }
788
789                         if(Pass == DQS_SECOND_PASS) { //second pass must average vales
790                                 RcvrEnDly += dqs_rcvr_dly_a[channel * 8 + receiver] /* - (RCVREN_MARGIN * T1000/64/50)*/;
791                                 RcvrEnDly >>= 1;
792                         }
793
794                         dqs_rcvr_dly_a[channel * 8 + receiver] = RcvrEnDly;
795
796                         //Set final RcvrEnDly for this DIMM and Channel
797                         pci_write_config32_index_wait(ctrl->f2, 0x98, index, RcvrEnDly);
798
799                         if(is_Width128) {
800                                 pci_write_config32_index_wait(ctrl->f2, 0x98, index+0x20, RcvrEnDly); // channel B
801                                 if(channel) {
802                                         pci_write_config32_index_wait(ctrl->f2, 0x98, index, CurrRcvrCHADelay);
803                                         if(RcvrEnDly > CurrRcvrCHADelay) {
804                                                 dword = RcvrEnDly - CurrRcvrCHADelay;
805                                         }
806                                         else {
807                                                 dword = CurrRcvrCHADelay - RcvrEnDly;
808                                         }
809                                         dword *= 50;
810                                         if(dword > T1000) {
811                                                 Errors |= SB_CHA2BRCVREN;
812                                         }
813                                 }
814                         }
815
816                         print_debug_dqs("\t\tTrainRcvEn63: RcvrEnDly ", RcvrEnDly, 2);
817
818                         if(RcvrEnDly > CTLRMaxDelay) {
819                                 CTLRMaxDelay = RcvrEnDly;
820                         }
821
822                         print_debug_dqs("\t\tTrainRcvEn64: CTLRMaxDelay ", CTLRMaxDelay, 2);
823
824                 } /* receiver */
825         } /* channel */
826
827         print_debug_dqs("\tTrainRcvEn65: CTLRMaxDelay ", CTLRMaxDelay, 1);
828
829         /* Program the MaxAsysncLat field with the largest DQS Receiver Enable setting */
830         SetMaxAL_RcvrDly(ctrl, CTLRMaxDelay);
831         ResetDCTWrPtr(ctrl);
832
833         //Enable ECC again
834         dword = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
835         dword &= ~(DCL_DimmEccEn);
836         dword |= ecc_bit;
837         pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dword);
838
839         if(Pass == DQS_FIRST_PASS) {
840 #if K8_REV_F_SUPPORT_F0_F1_WORKAROUND == 1
841         if(!cpu_f0_f1)
842 #endif
843         {
844                 dword = pci_read_config32(ctrl->f2, DRAM_CTRL);
845                 dword &= ~DC_DqsRcvEnTrain;
846                 pci_write_config32(ctrl->f2, DRAM_CTRL, dword);
847         }
848         }
849
850         //Clear wrap32dis
851
852         clear_wrap32dis();
853
854         //restore SSE2 setting
855         disable_sse2();
856
857 #if CONFIG_MEM_TRAIN_SEQ != 1
858         /* We need tidy output for type 1 */
859         printk(BIOS_DEBUG, " CTLRMaxDelay=%02x\n", CTLRMaxDelay);
860 #endif
861
862         return (CTLRMaxDelay==0xae)?1:0;
863
864 }
865
866 #define DQS_READDIR 1
867 #define DQS_WRITEDIR 0
868
869
870 static void SetDQSDelayCSR(const struct mem_controller *ctrl, unsigned channel, unsigned bytelane, unsigned direction, unsigned dqs_delay)
871 { //ByteLane could be 0-8, last is for ECC
872         unsigned index;
873         uint32_t dword;
874         unsigned shift;
875
876         dqs_delay &= 0xff;
877
878         index = (bytelane>>2) + 1 + channel * 0x20 + (direction << 2);
879         shift = bytelane;
880         while(shift>3) {
881                 shift-=4;
882         }
883         shift <<= 3; // 8 bit
884
885         dword = pci_read_config32_index_wait(ctrl->f2, 0x98, index);
886         dword &= ~(0x3f<<shift);
887         dword |= (dqs_delay<<shift);
888         pci_write_config32_index_wait(ctrl->f2, 0x98, index, dword);
889
890 }
891
892 static void SetDQSDelayAllCSR(const struct mem_controller *ctrl, unsigned channel, unsigned direction, unsigned dqs_delay)
893 {
894         unsigned index;
895         uint32_t dword;
896         int i;
897
898         dword = 0;
899         dqs_delay &= 0xff;
900         for(i=0;i<4;i++) {
901                 dword |= dqs_delay<<(i*8);
902         }
903
904         index = 1 + channel * 0x20 + direction * 4;
905
906         for(i=0; i<2; i++) {
907                 pci_write_config32_index_wait(ctrl->f2, 0x98, index + i, dword);
908         }
909
910 }
911
912 static unsigned MiddleDQS(unsigned min_d, unsigned max_d)
913 {
914         unsigned size_d;
915         size_d = max_d-min_d;
916         if(size_d & 1) { //need round up
917                 min_d++;
918         }
919         return ( min_d + (size_d>>1));
920 }
921
922 static  inline void save_dqs_delay(unsigned channel, unsigned bytelane, unsigned direction, uint8_t *dqs_delay_a, uint8_t dqs_delay)
923 {
924         dqs_delay_a[channel * 2*9 + direction * 9 + bytelane] = dqs_delay;
925 }
926
927 static void WriteDQSTestPattern(unsigned addr_lo, unsigned pattern , uint8_t *buf_a)
928 {
929         WriteLNTestPattern(addr_lo, buf_a, (pattern+1) * 9);
930 }
931
932 static void ReadL18TestPattern(unsigned addr_lo)
933 {
934         //set fs and use fs prefix to access the mem
935         __asm__ volatile (
936                 "movl %%fs:-128(%%esi), %%eax\n\t"  //TestAddr cache line
937                 "movl %%fs:-64(%%esi), %%eax\n\t"   //+1
938                 "movl %%fs:(%%esi), %%eax\n\t"  //+2
939                 "movl %%fs:64(%%esi), %%eax\n\t"   //+3
940
941                 "movl %%fs:-128(%%edi), %%eax\n\t"      //+4
942                 "movl %%fs:-64(%%edi), %%eax\n\t"       //+5
943                 "movl %%fs:(%%edi), %%eax\n\t"  //+6
944                 "movl %%fs:64(%%edi), %%eax\n\t"        //+7
945
946                 "movl %%fs:-128(%%ebx), %%eax\n\t"  //+8
947                 "movl %%fs:-64(%%ebx), %%eax\n\t"       //+9
948                 "movl %%fs:(%%ebx), %%eax\n\t"  //+10
949                 "movl %%fs:64(%%ebx), %%eax\n\t"        //+11
950
951                 "movl %%fs:-128(%%ecx), %%eax\n\t"      //+12
952                 "movl %%fs:-64(%%ecx), %%eax\n\t"       //+13
953                 "movl %%fs:(%%ecx), %%eax\n\t"  //+14
954                 "movl %%fs:64(%%ecx), %%eax\n\t"        //+15
955
956                 "movl %%fs:-128(%%edx), %%eax\n\t"      //+16
957                 "movl %%fs:-64(%%edx), %%eax\n\t"       //+17
958
959                 :: "a"(0), "b" (addr_lo+128+8*64), "c" (addr_lo+128+12*64), "d" (addr_lo +128+16*64), "S"(addr_lo+128), "D"(addr_lo+128+4*64)
960         );
961
962 }
963
964 static void ReadL9TestPattern(unsigned addr_lo)
965 {
966
967         //set fs and use fs prefix to access the mem
968         __asm__ volatile (
969
970                 "movl %%fs:-128(%%ecx), %%eax\n\t"  //TestAddr cache line
971                 "movl %%fs:-64(%%ecx), %%eax\n\t"   //+1
972                 "movl %%fs:(%%ecx), %%eax\n\t"      //+2
973                 "movl %%fs:64(%%ecx), %%eax\n\t"   //+3
974
975                 "movl %%fs:-128(%%edx), %%eax\n\t"  //+4
976                 "movl %%fs:-64(%%edx), %%eax\n\t"   //+5
977                 "movl %%fs:(%%edx), %%eax\n\t"      //+6
978                 "movl %%fs:64(%%edx), %%eax\n\t"   //+7
979
980                 "movl %%fs:-128(%%ebx), %%eax\n\t"      //+8
981
982                 :: "a"(0), "b" (addr_lo+128+8*64), "c"(addr_lo+128), "d"(addr_lo+128+4*64)
983         );
984
985 }
986
987
988 static void ReadDQSTestPattern(unsigned addr_lo, unsigned pattern)
989 {
990         if(pattern == 0) {
991                 ReadL9TestPattern(addr_lo);
992         }
993         else {
994                 ReadL18TestPattern(addr_lo);
995         }
996 }
997
998 static void FlushDQSTestPattern_L9(unsigned addr_lo)
999 {
1000         __asm__ volatile (
1001                 "clflush %%fs:-128(%%ecx)\n\t"
1002                 "clflush %%fs:-64(%%ecx)\n\t"
1003                 "clflush %%fs:(%%ecx)\n\t"
1004                 "clflush %%fs:64(%%ecx)\n\t"
1005
1006                 "clflush %%fs:-128(%%eax)\n\t"
1007                 "clflush %%fs:-64(%%eax)\n\t"
1008                 "clflush %%fs:(%%eax)\n\t"
1009                 "clflush %%fs:64(%%eax)\n\t"
1010
1011                 "clflush %%fs:-128(%%ebx)\n\t"
1012
1013                 ::  "b" (addr_lo+128+8*64), "c"(addr_lo+128), "a"(addr_lo+128+4*64)
1014         );
1015
1016 }
1017 static __attribute__((noinline)) void FlushDQSTestPattern_L18(unsigned addr_lo)
1018 {
1019        __asm__ volatile (
1020                 "clflush %%fs:-128(%%eax)\n\t"
1021                 "clflush %%fs:-64(%%eax)\n\t"
1022                 "clflush %%fs:(%%eax)\n\t"
1023                 "clflush %%fs:64(%%eax)\n\t"
1024
1025                 "clflush %%fs:-128(%%edi)\n\t"
1026                 "clflush %%fs:-64(%%edi)\n\t"
1027                 "clflush %%fs:(%%edi)\n\t"
1028                 "clflush %%fs:64(%%edi)\n\t"
1029
1030                 "clflush %%fs:-128(%%ebx)\n\t"
1031                 "clflush %%fs:-64(%%ebx)\n\t"
1032                 "clflush %%fs:(%%ebx)\n\t"
1033                 "clflush %%fs:64(%%ebx)\n\t"
1034
1035                 "clflush %%fs:-128(%%ecx)\n\t"
1036                 "clflush %%fs:-64(%%ecx)\n\t"
1037                 "clflush %%fs:(%%ecx)\n\t"
1038                 "clflush %%fs:64(%%ecx)\n\t"
1039
1040                 "clflush %%fs:-128(%%edx)\n\t"
1041                 "clflush %%fs:-64(%%edx)\n\t"
1042
1043                 :: "b" (addr_lo+128+8*64), "c" (addr_lo+128+12*64), "d" (addr_lo +128+16*64), "a"(addr_lo+128), "D"(addr_lo+128+4*64)
1044         );
1045 }
1046
1047 static void FlushDQSTestPattern(unsigned addr_lo, unsigned pattern )
1048 {
1049
1050         if(pattern == 0){
1051                 FlushDQSTestPattern_L9(addr_lo);
1052         }
1053         else {
1054                 FlushDQSTestPattern_L18(addr_lo);
1055         }
1056 }
1057
1058 static unsigned CompareDQSTestPattern(unsigned channel, unsigned addr_lo, unsigned pattern, uint8_t *buf_a)
1059 {
1060         uint32_t *test_buf;
1061         unsigned bitmap = 0xff;
1062         unsigned bytelane;
1063         int i;
1064         uint32_t value;
1065         int j;
1066         uint32_t value_test;
1067
1068         test_buf = (uint32_t *)buf_a;
1069
1070
1071         if(pattern && channel) {
1072                 addr_lo += 8; //second channel
1073                 test_buf+= 2;
1074         }
1075
1076         bytelane = 0;
1077         for(i=0;i<9*64/4;i++) {
1078                 __asm__ volatile (
1079                         "movl %%fs:(%1), %0\n\t"
1080                         :"=b"(value): "a" (addr_lo)
1081                 );
1082                 value_test = *test_buf;
1083
1084                 print_debug_dqs_pair("\t\t\t\t\t\ttest_buf= ", (unsigned)test_buf, " value = ", value_test, 7);
1085                 print_debug_dqs_pair("\t\t\t\t\t\ttaddr_lo = ",addr_lo, " value = ", value, 7);
1086
1087                 for(j=0;j<4*8;j+=8) {
1088                         if(((value>>j)&0xff) != ((value_test>>j)& 0xff)) {
1089                                 bitmap &= ~(1<<bytelane);
1090                         }
1091
1092                         bytelane++;
1093                         bytelane &= 0x7;
1094                 }
1095                 print_debug_dqs("\t\t\t\t\t\tbitmap = ", bitmap, 7);
1096
1097                 if(bytelane == 0) {
1098                         if(pattern == 1) { //dual channel
1099                                 addr_lo += 8; //skip over other channel's data
1100                                 test_buf += 2;
1101                         }
1102                 }
1103                 addr_lo += 4;
1104                 test_buf +=1;
1105
1106         }
1107
1108
1109         return bitmap;
1110
1111 }
1112
1113 static unsigned TrainDQSPos(const struct mem_controller *ctrl, unsigned channel, unsigned Direction, unsigned Pattern, uint8_t *buf_a, uint8_t *dqs_delay_a, struct sys_info *sysinfo)
1114 {
1115         unsigned ByteLane;
1116         unsigned Errors;
1117         unsigned BanksPresent;
1118
1119         unsigned MutualCSPassW[48];
1120
1121         unsigned ChipSel;
1122         unsigned DQSDelay;
1123
1124         unsigned TestAddr;
1125
1126         unsigned LastTest;
1127         unsigned RnkDlyFilterMax, RnkDlyFilterMin = 0;
1128         unsigned RnkDlySeqPassMax, RnkDlySeqPassMin = 0;
1129
1130         Errors = 0;
1131         BanksPresent = 0;
1132
1133         print_debug_dqs("\t\t\tTrainDQSPos begin ", 0, 3);
1134
1135         printk(BIOS_DEBUG, "TrainDQSPos: MutualCSPassW[48] :%p\n", MutualCSPassW);
1136
1137         for(DQSDelay=0; DQSDelay<48; DQSDelay++) {
1138                 MutualCSPassW[DQSDelay] = 0xff; // Bitmapped status per delay setting, 0xff=All positions passing (1= PASS)
1139         }
1140
1141         for(ChipSel = 0; ChipSel < 8; ChipSel++) { //logical register chipselects 0..7
1142                 print_debug_dqs("\t\t\t\tTrainDQSPos: 11 ChipSel ", ChipSel, 4);
1143                 //FIXME: process 64MUXedMode
1144                 if(!ChipSelPresent(ctrl, ChipSel, sysinfo)) continue;
1145                 BanksPresent  = 1;
1146
1147                 TestAddr = Get_MCTSysAddr(ctrl, ChipSel, sysinfo);
1148
1149                 print_debug_dqs("\t\t\t\tTrainDQSPos: 12 TestAddr ", TestAddr, 4);
1150
1151                 //set fs and use fs prefix to access the mem
1152                 set_FSBASE(TestAddr>>24);
1153
1154                 if(Direction == DQS_READDIR) {
1155                         print_debug_dqs("\t\t\t\tTrainDQSPos: 13 for read so write at first", 0, 4);
1156                         WriteDQSTestPattern(TestAddr<<8, Pattern, buf_a);
1157                 }
1158
1159                 for(DQSDelay = 0; DQSDelay < 48; DQSDelay++ ){
1160                         print_debug_dqs("\t\t\t\t\tTrainDQSPos: 141 DQSDelay ", DQSDelay, 5);
1161                         if(MutualCSPassW[DQSDelay] == 0) continue; //skip current delay value if other chipselects have failed all 8 bytelanes
1162                         SetDQSDelayAllCSR(ctrl, channel, Direction, DQSDelay);
1163                         print_debug_dqs("\t\t\t\t\tTrainDQSPos: 142 MutualCSPassW ", MutualCSPassW[DQSDelay], 5);
1164                         if(Direction == DQS_WRITEDIR) {
1165                                 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 143 for write", 0, 5);
1166                                 WriteDQSTestPattern(TestAddr<<8, Pattern, buf_a);
1167                         }
1168                         print_debug_dqs("\t\t\t\t\tTrainDQSPos: 144 Pattern ", Pattern, 5);
1169                         ReadDQSTestPattern(TestAddr<<8, Pattern);
1170                         print_debug_dqs("\t\t\t\t\tTrainDQSPos: 145 MutualCSPassW ", MutualCSPassW[DQSDelay], 5);
1171                         MutualCSPassW[DQSDelay] &= CompareDQSTestPattern(channel, TestAddr<<8, Pattern, buf_a); //0: fail, 1=pass
1172                         print_debug_dqs("\t\t\t\t\tTrainDQSPos: 146 MutualCSPassW ", MutualCSPassW[DQSDelay], 5);
1173                         SetTargetWTIO(TestAddr);
1174                         FlushDQSTestPattern(TestAddr<<8, Pattern);
1175                         ResetTargetWTIO();
1176                 }
1177         }
1178
1179         if(BanksPresent)
1180         for(ByteLane = 0; ByteLane < 8; ByteLane++) {
1181                 print_debug_dqs("\t\t\t\tTrainDQSPos: 31 ByteLane ",ByteLane, 4);
1182
1183                 LastTest = DQS_FAIL;
1184                 RnkDlySeqPassMax = 0;
1185                 RnkDlyFilterMax = 0;
1186                 RnkDlyFilterMin = 0;
1187                 for(DQSDelay=0; DQSDelay<48; DQSDelay++) {
1188                         if(MutualCSPassW[DQSDelay] & (1<<ByteLane)) {
1189
1190                                 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 321 DQSDelay ", DQSDelay, 5);
1191                                 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 322 MutualCSPassW ", MutualCSPassW[DQSDelay], 5);
1192
1193                                 RnkDlySeqPassMax = DQSDelay;
1194                                 if(LastTest == DQS_FAIL) {
1195                                         RnkDlySeqPassMin = DQSDelay; //start sequential run
1196                                 }
1197                                 if((RnkDlySeqPassMax - RnkDlySeqPassMin)>(RnkDlyFilterMax-RnkDlyFilterMin)){
1198                                         RnkDlyFilterMin = RnkDlySeqPassMin;
1199                                         RnkDlyFilterMax = RnkDlySeqPassMax;
1200                                 }
1201                                 LastTest = DQS_PASS;
1202                         }
1203                         else {
1204                                 LastTest = DQS_FAIL;
1205                         }
1206                 }
1207                 print_debug_dqs("\t\t\t\tTrainDQSPos: 33 RnkDlySeqPassMax ", RnkDlySeqPassMax, 4);
1208
1209                 if(RnkDlySeqPassMax == 0) {
1210                         Errors |= SB_NODQSPOS; // no passing window
1211                 }
1212                 else {
1213                         print_debug_dqs("\t\t\t\tTrainDQSPos: 34 RnkDlyFilterMax ", RnkDlyFilterMax, 4);
1214                         print_debug_dqs("\t\t\t\tTrainDQSPos: 34 RnkDlyFilterMin ", RnkDlyFilterMin, 4);
1215                         if((RnkDlyFilterMax - RnkDlyFilterMin)< MIN_DQS_WNDW){
1216                                 Errors |= SB_SMALLDQS;
1217                         }
1218                         else {
1219                                 unsigned middle_dqs;
1220                                 middle_dqs = MiddleDQS(RnkDlyFilterMin, RnkDlyFilterMax);
1221                                 print_debug_dqs("\t\t\t\tTrainDQSPos: 35 middle_dqs ",middle_dqs, 4);
1222                                 SetDQSDelayCSR(ctrl, channel, ByteLane, Direction, middle_dqs);
1223                                 save_dqs_delay(channel, ByteLane, Direction, dqs_delay_a, middle_dqs);
1224                         }
1225                 }
1226
1227         }
1228
1229         print_debug_dqs("\t\t\tTrainDQSPos: end", 0xff, 3);
1230
1231         return Errors;
1232
1233
1234 }
1235
1236 static unsigned TrainReadDQS(const struct mem_controller *ctrl, unsigned channel, unsigned pattern, uint8_t *buf_a, uint8_t *dqs_delay_a, struct sys_info *sysinfo)
1237 {
1238         print_debug_dqs("\t\tTrainReadPos", 0, 2);
1239         return TrainDQSPos(ctrl, channel, DQS_READDIR, pattern, buf_a, dqs_delay_a, sysinfo);
1240 }
1241
1242 static unsigned TrainWriteDQS(const struct mem_controller *ctrl, unsigned channel, unsigned pattern, uint8_t *buf_a, uint8_t *dqs_delay_a, struct sys_info *sysinfo)
1243 {
1244         print_debug_dqs("\t\tTrainWritePos", 0, 2);
1245         return TrainDQSPos(ctrl, channel, DQS_WRITEDIR, pattern, buf_a, dqs_delay_a, sysinfo);
1246 }
1247
1248
1249
1250 static unsigned TrainDQSRdWrPos(const struct mem_controller *ctrl, struct sys_info *sysinfo)
1251 {
1252         static const uint32_t TestPatternJD1a[] = {
1253                                         0x00000000,0x00000000,0xFFFFFFFF,0xFFFFFFFF, // QW0-1, ALL-EVEN
1254                                         0x00000000,0x00000000,0x00000000,0x00000000, // QW2-3, ALL-EVEN
1255                                         0x00000000,0x00000000,0xFFFFFFFF,0xFFFFFFFF, // QW4-5, ALL-EVEN
1256                                         0x00000000,0x00000000,0x00000000,0x00000000, // QW6-7, ALL-EVEN
1257                                         0xFeFeFeFe,0xFeFeFeFe,0x01010101,0x01010101, // QW0-1, DQ0-ODD
1258                                         0xFeFeFeFe,0xFeFeFeFe,0x01010101,0x01010101, // QW2-3, DQ0-ODD
1259                                         0x01010101,0x01010101,0xFeFeFeFe,0xFeFeFeFe, // QW4-5, DQ0-ODD
1260                                         0xFeFeFeFe,0xFeFeFeFe,0x01010101,0x01010101, // QW6-7, DQ0-ODD
1261                                         0x02020202,0x02020202,0x02020202,0x02020202, // QW0-1, DQ1-ODD
1262                                         0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, // QW2-3, DQ1-ODD
1263                                         0xFdFdFdFd,0xFdFdFdFd,0x02020202,0x02020202, // QW4-5, DQ1-ODD
1264                                         0x02020202,0x02020202,0x02020202,0x02020202, // QW6-7, DQ1-ODD
1265                                         0x04040404,0x04040404,0xfBfBfBfB,0xfBfBfBfB, // QW0-1, DQ2-ODD
1266                                         0x04040404,0x04040404,0x04040404,0x04040404, // QW2-3, DQ2-ODD
1267                                         0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW4-5, DQ2-ODD
1268                                         0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW6-7, DQ2-ODD
1269                                         0x08080808,0x08080808,0xF7F7F7F7,0xF7F7F7F7, // QW0-1, DQ3-ODD
1270                                         0x08080808,0x08080808,0x08080808,0x08080808, // QW2-3, DQ3-ODD
1271                                         0xF7F7F7F7,0xF7F7F7F7,0x08080808,0x08080808, // QW4-5, DQ3-ODD
1272                                         0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, // QW6-7, DQ3-ODD
1273                                         0x10101010,0x10101010,0x10101010,0x10101010, // QW0-1, DQ4-ODD
1274                                         0xeFeFeFeF,0xeFeFeFeF,0x10101010,0x10101010, // QW2-3, DQ4-ODD
1275                                         0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, // QW4-5, DQ4-ODD
1276                                         0xeFeFeFeF,0xeFeFeFeF,0x10101010,0x10101010, // QW6-7, DQ4-ODD
1277                                         0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW0-1, DQ5-ODD
1278                                         0xdFdFdFdF,0xdFdFdFdF,0x20202020,0x20202020, // QW2-3, DQ5-ODD
1279                                         0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW4-5, DQ5-ODD
1280                                         0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW6-7, DQ5-ODD
1281                                         0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW0-1, DQ6-ODD
1282                                         0x40404040,0x40404040,0xBfBfBfBf,0xBfBfBfBf, // QW2-3, DQ6-ODD
1283                                         0x40404040,0x40404040,0xBfBfBfBf,0xBfBfBfBf, // QW4-5, DQ6-ODD
1284                                         0x40404040,0x40404040,0xBfBfBfBf,0xBfBfBfBf, // QW6-7, DQ6-ODD
1285                                         0x80808080,0x80808080,0x7F7F7F7F,0x7F7F7F7F, // QW0-1, DQ7-ODD
1286                                         0x80808080,0x80808080,0x7F7F7F7F,0x7F7F7F7F, // QW2-3, DQ7-ODD
1287                                         0x80808080,0x80808080,0x7F7F7F7F,0x7F7F7F7F, // QW4-5, DQ7-ODD
1288                                         0x80808080,0x80808080,0x80808080,0x80808080  // QW6-7, DQ7-ODD
1289                 };
1290         static const uint32_t TestPatternJD1b[] = {
1291                                         0x00000000,0x00000000,0x00000000,0x00000000, // QW0,CHA-B, ALL-EVEN
1292                                         0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, // QW1,CHA-B, ALL-EVEN
1293                                         0x00000000,0x00000000,0x00000000,0x00000000, // QW2,CHA-B, ALL-EVEN
1294                                         0x00000000,0x00000000,0x00000000,0x00000000, // QW3,CHA-B, ALL-EVEN
1295                                         0x00000000,0x00000000,0x00000000,0x00000000, // QW4,CHA-B, ALL-EVEN
1296                                         0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, // QW5,CHA-B, ALL-EVEN
1297                                         0x00000000,0x00000000,0x00000000,0x00000000, // QW6,CHA-B, ALL-EVEN
1298                                         0x00000000,0x00000000,0x00000000,0x00000000, // QW7,CHA-B, ALL-EVEN
1299                                         0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, // QW0,CHA-B, DQ0-ODD
1300                                         0x01010101,0x01010101,0x01010101,0x01010101, // QW1,CHA-B, DQ0-ODD
1301                                         0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, // QW2,CHA-B, DQ0-ODD
1302                                         0x01010101,0x01010101,0x01010101,0x01010101, // QW3,CHA-B, DQ0-ODD
1303                                         0x01010101,0x01010101,0x01010101,0x01010101, // QW4,CHA-B, DQ0-ODD
1304                                         0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, // QW5,CHA-B, DQ0-ODD
1305                                         0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, // QW6,CHA-B, DQ0-ODD
1306                                         0x01010101,0x01010101,0x01010101,0x01010101, // QW7,CHA-B, DQ0-ODD
1307                                         0x02020202,0x02020202,0x02020202,0x02020202, // QW0,CHA-B, DQ1-ODD
1308                                         0x02020202,0x02020202,0x02020202,0x02020202, // QW1,CHA-B, DQ1-ODD
1309                                         0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, // QW2,CHA-B, DQ1-ODD
1310                                         0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, // QW3,CHA-B, DQ1-ODD
1311                                         0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, // QW4,CHA-B, DQ1-ODD
1312                                         0x02020202,0x02020202,0x02020202,0x02020202, // QW5,CHA-B, DQ1-ODD
1313                                         0x02020202,0x02020202,0x02020202,0x02020202, // QW6,CHA-B, DQ1-ODD
1314                                         0x02020202,0x02020202,0x02020202,0x02020202, // QW7,CHA-B, DQ1-ODD
1315                                         0x04040404,0x04040404,0x04040404,0x04040404, // QW0,CHA-B, DQ2-ODD
1316                                         0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW1,CHA-B, DQ2-ODD
1317                                         0x04040404,0x04040404,0x04040404,0x04040404, // QW2,CHA-B, DQ2-ODD
1318                                         0x04040404,0x04040404,0x04040404,0x04040404, // QW3,CHA-B, DQ2-ODD
1319                                         0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW4,CHA-B, DQ2-ODD
1320                                         0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW5,CHA-B, DQ2-ODD
1321                                         0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW6,CHA-B, DQ2-ODD
1322                                         0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW7,CHA-B, DQ2-ODD
1323                                         0x08080808,0x08080808,0x08080808,0x08080808, // QW0,CHA-B, DQ3-ODD
1324                                         0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, // QW1,CHA-B, DQ3-ODD
1325                                         0x08080808,0x08080808,0x08080808,0x08080808, // QW2,CHA-B, DQ3-ODD
1326                                         0x08080808,0x08080808,0x08080808,0x08080808, // QW3,CHA-B, DQ3-ODD
1327                                         0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, // QW4,CHA-B, DQ3-ODD
1328                                         0x08080808,0x08080808,0x08080808,0x08080808, // QW5,CHA-B, DQ3-ODD
1329                                         0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, // QW6,CHA-B, DQ3-ODD
1330                                         0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, // QW7,CHA-B, DQ3-ODD
1331                                         0x10101010,0x10101010,0x10101010,0x10101010, // QW0,CHA-B, DQ4-ODD
1332                                         0x10101010,0x10101010,0x10101010,0x10101010, // QW1,CHA-B, DQ4-ODD
1333                                         0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, // QW2,CHA-B, DQ4-ODD
1334                                         0x10101010,0x10101010,0x10101010,0x10101010, // QW3,CHA-B, DQ4-ODD
1335                                         0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, // QW4,CHA-B, DQ4-ODD
1336                                         0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, // QW5,CHA-B, DQ4-ODD
1337                                         0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, // QW6,CHA-B, DQ4-ODD
1338                                         0x10101010,0x10101010,0x10101010,0x10101010, // QW7,CHA-B, DQ4-ODD
1339                                         0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW0,CHA-B, DQ5-ODD
1340                                         0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW1,CHA-B, DQ5-ODD
1341                                         0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW2,CHA-B, DQ5-ODD
1342                                         0x20202020,0x20202020,0x20202020,0x20202020, // QW3,CHA-B, DQ5-ODD
1343                                         0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW4,CHA-B, DQ5-ODD
1344                                         0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW5,CHA-B, DQ5-ODD
1345                                         0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW6,CHA-B, DQ5-ODD
1346                                         0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW7,CHA-B, DQ5-ODD
1347                                         0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW0,CHA-B, DQ6-ODD
1348                                         0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW1,CHA-B, DQ6-ODD
1349                                         0x40404040,0x40404040,0x40404040,0x40404040, // QW2,CHA-B, DQ6-ODD
1350                                         0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW3,CHA-B, DQ6-ODD
1351                                         0x40404040,0x40404040,0x40404040,0x40404040, // QW4,CHA-B, DQ6-ODD
1352                                         0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW5,CHA-B, DQ6-ODD
1353                                         0x40404040,0x40404040,0x40404040,0x40404040, // QW6,CHA-B, DQ6-ODD
1354                                         0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW7,CHA-B, DQ6-ODD
1355                                         0x80808080,0x80808080,0x80808080,0x80808080, // QW0,CHA-B, DQ7-ODD
1356                                         0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F, // QW1,CHA-B, DQ7-ODD
1357                                         0x80808080,0x80808080,0x80808080,0x80808080, // QW2,CHA-B, DQ7-ODD
1358                                         0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F, // QW3,CHA-B, DQ7-ODD
1359                                         0x80808080,0x80808080,0x80808080,0x80808080, // QW4,CHA-B, DQ7-ODD
1360                                         0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F, // QW5,CHA-B, DQ7-ODD
1361                                         0x80808080,0x80808080,0x80808080,0x80808080, // QW6,CHA-B, DQ7-ODD
1362                                         0x80808080,0x80808080,0x80808080,0x80808080  // QW7,CHA-B, DQ7-ODD
1363                 };
1364         uint8_t pattern_buf_x[64 * 18 + 16]; // We need to two cache line So have more 16 bytes to keep 16 byte alignment */
1365         uint8_t *buf_a;
1366
1367         unsigned pattern;
1368         uint32_t dword;
1369         uint32_t ecc_bit;
1370         unsigned Errors;
1371         unsigned channel;
1372         int i;
1373         unsigned DQSWrDelay;
1374         unsigned is_Width128 = sysinfo->meminfo[ctrl->node_id].is_Width128;
1375         uint8_t *dqs_delay_a = &sysinfo->dqs_delay_a[ctrl->node_id * 2*2*9]; //channel 2, direction 2 , bytelane *9
1376
1377         //enable SSE2
1378         enable_sse2();
1379
1380         //wrap32dis
1381         set_wrap32dis();
1382
1383         //disable ECC temp
1384         dword = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
1385         ecc_bit = dword & DCL_DimmEccEn;
1386         dword &= ~(DCL_DimmEccEn);
1387         pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dword);
1388
1389         //SetupDqsPattern
1390         buf_a = (uint8_t *)(((uint32_t)(&pattern_buf_x[0]) + 0x10) & (~0xf));
1391
1392         if(is_Width128){
1393                 pattern = 1;
1394                 for(i=0;i<16*18;i++) {
1395                         *((uint32_t *)(buf_a + i*4)) = TestPatternJD1b[i];
1396                  }
1397         }
1398         else {
1399                 pattern = 0;
1400                 for(i=0; i<16*9;i++) {
1401                         *((uint32_t *)(buf_a + i*4)) = TestPatternJD1a[i];
1402                 }
1403
1404         }
1405
1406         print_debug_dqs("\nTrainDQSRdWrPos: 0 ctrl ", ctrl->node_id, 0);
1407
1408         printk(BIOS_DEBUG, "TrainDQSRdWrPos: buf_a:%p\n", buf_a);
1409
1410         Errors = 0;
1411         channel = 0;
1412
1413         if (!(sysinfo->meminfo[ctrl->node_id].dimm_mask & 0x0F) &&
1414              (sysinfo->meminfo[ctrl->node_id].dimm_mask & 0xF0)) { /* channelB only? */
1415                 channel = 1;
1416         }
1417
1418         while( (channel<2) && (!Errors)) {
1419                 print_debug_dqs("\tTrainDQSRdWrPos: 1 channel ",channel, 1);
1420                 for(DQSWrDelay = 0; DQSWrDelay < 48; DQSWrDelay++) {
1421                         unsigned err;
1422                         SetDQSDelayAllCSR(ctrl, channel, DQS_WRITEDIR, DQSWrDelay);
1423                         print_debug_dqs("\t\tTrainDQSRdWrPos: 21 DQSWrDelay ", DQSWrDelay, 2);
1424                         err= TrainReadDQS(ctrl, channel, pattern, buf_a, dqs_delay_a, sysinfo);
1425                         print_debug_dqs("\t\tTrainDQSRdWrPos: 22 err ",err, 2);
1426                         if(err == 0) break;
1427                         Errors |= err;
1428                 }
1429
1430                 print_debug_dqs("\tTrainDQSRdWrPos: 3 DQSWrDelay ", DQSWrDelay, 1);
1431
1432                 if(DQSWrDelay < 48) {
1433                         Errors = TrainWriteDQS(ctrl, channel, pattern, buf_a, dqs_delay_a, sysinfo);
1434                         print_debug_dqs("\tTrainDQSRdWrPos: 4 Errors ", Errors, 1);
1435
1436                 }
1437                 channel++;
1438                 if(!is_Width128){
1439                         //FIXME: 64MuxMode??
1440                         channel++; // skip channel if 64-bit mode
1441                 }
1442         }
1443
1444         //Enable ECC again
1445         dword = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
1446         dword &= ~(DCL_DimmEccEn);
1447         dword |= ecc_bit;
1448         pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dword);
1449
1450         //Clear wrap32dis
1451
1452         clear_wrap32dis();
1453
1454         //restore SSE2 setting
1455         disable_sse2();
1456
1457         print_debug_dqs("TrainDQSRdWrPos: ", 5, 0);
1458
1459         return Errors;
1460
1461 }
1462 static inline uint8_t get_dqs_delay(unsigned channel, unsigned bytelane, unsigned direction, uint8_t *dqs_delay_a)
1463 {
1464         return dqs_delay_a[channel * 2*9 + direction * 9 + bytelane];
1465 }
1466
1467 static unsigned CalcEccDQSPos(unsigned channel,unsigned ByteLane0, unsigned ByteLane1, unsigned InterFactor, unsigned Direction, uint8_t *dqs_delay_a)
1468 /* InterFactor: 0: 100% ByteLane 0
1469                 0x80: 50% between ByteLane 0 and 1
1470                 0xff: 99.6% ByteLane 1 and 0.4% like 0
1471 */
1472 {
1473         unsigned DQSDelay0, DQSDelay1;
1474         unsigned DQSDelay;
1475
1476         DQSDelay0 = get_dqs_delay(channel, ByteLane0, Direction, dqs_delay_a);
1477         DQSDelay1 = get_dqs_delay(channel, ByteLane1, Direction, dqs_delay_a);
1478
1479         if(DQSDelay0>DQSDelay1) {
1480                 DQSDelay = DQSDelay0 - DQSDelay1;
1481                 InterFactor = 0xff - InterFactor;
1482         }
1483         else {
1484                 DQSDelay = DQSDelay1 - DQSDelay0;
1485         }
1486
1487         DQSDelay *= InterFactor;
1488
1489         DQSDelay >>= 8; // /255
1490
1491         if(DQSDelay0>DQSDelay1) {
1492                 DQSDelay += DQSDelay1;
1493         }
1494         else {
1495                 DQSDelay += DQSDelay0;
1496         }
1497
1498         return DQSDelay;
1499
1500 }
1501
1502 static void SetEccDQSRdWrPos(const struct mem_controller *ctrl, struct sys_info *sysinfo)
1503 {
1504         unsigned channel;
1505         unsigned ByteLane;
1506         unsigned Direction;
1507         unsigned lane0, lane1, ratio;
1508         unsigned dqs_delay;
1509
1510         unsigned direction[] = { DQS_READDIR, DQS_WRITEDIR };
1511         int i;
1512         uint8_t *dqs_delay_a = &sysinfo->dqs_delay_a[ctrl->node_id * 2*2*9]; //channel 2, direction 2 , bytelane *9
1513
1514         ByteLane = 8;
1515
1516         for(channel = 0; channel < 2; channel++) {
1517                 for(i=0;i<2;i++) {
1518                         Direction = direction[i];
1519                         lane0 = 4; lane1 = 5; ratio = 0;
1520                         dqs_delay = CalcEccDQSPos(channel, lane0, lane1, ratio, Direction, dqs_delay_a);
1521                         print_debug_dqs_pair("\t\tSetEccDQSRdWrPos: channel ", channel, Direction==DQS_READDIR? " R dqs_delay":" W dqs_delay",  dqs_delay, 2);
1522                         SetDQSDelayCSR(ctrl, channel, ByteLane, Direction, dqs_delay);
1523                         save_dqs_delay(channel, ByteLane, Direction, dqs_delay_a, dqs_delay);
1524                 }
1525         }
1526 }
1527
1528 static unsigned train_DqsRcvrEn(const struct mem_controller *ctrl, unsigned Pass, struct sys_info *sysinfo)
1529 {
1530         print_debug_dqs("\ntrain_DqsRcvrEn: begin ctrl ", ctrl->node_id, 0);
1531         if(TrainRcvrEn(ctrl, Pass, sysinfo)) {
1532                 return 1;
1533         }
1534         print_debug_dqs("\ntrain_DqsRcvrEn: end ctrl ", ctrl->node_id, 0);
1535         return 0;
1536
1537 }
1538 static unsigned train_DqsPos(const struct mem_controller *ctrl, struct sys_info *sysinfo)
1539 {
1540         print_debug_dqs("\ntrain_DqsPos: begin ctrl ", ctrl->node_id, 0);
1541         if(TrainDQSRdWrPos(ctrl, sysinfo) != 0) {
1542                 printk(BIOS_ERR, "\nDQS Training Rd Wr failed ctrl%02x\n", ctrl->node_id);
1543                 return 1;
1544         }
1545         else {
1546                 SetEccDQSRdWrPos(ctrl, sysinfo);
1547         }
1548         print_debug_dqs("\ntrain_DqsPos: end ctrl ", ctrl->node_id, 0);
1549         return 0;
1550
1551 }
1552
1553 #if K8_REV_F_SUPPORT_F0_F1_WORKAROUND == 1
1554 static void f0_svm_workaround(int controllers, const struct mem_controller *ctrl, tsc_t *tsc0, struct sys_info *sysinfo)
1555 {
1556         tsc_t tsc1[8];
1557         unsigned cpu_f0_f1[8];
1558         int i;
1559
1560         print_debug_addr("dqs_timing: tsc1[8] :", tsc1);
1561
1562         for(i = 0; i < controllers; i++) {
1563                 if (!sysinfo->ctrl_present[i])
1564                         continue;
1565
1566                 /* Skip everything if I don't have any memory on this controller */
1567                 if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
1568
1569                 uint32_t dword;
1570
1571                 cpu_f0_f1[i] = is_cpu_pre_f2_in_bsp(i);
1572
1573                 if(!cpu_f0_f1[i]) continue;
1574
1575                 dword = pci_read_config32(ctrl[i].f2, DRAM_CTRL);
1576                 dword &= ~DC_DqsRcvEnTrain;
1577                 pci_write_config32(ctrl[i].f2, DRAM_CTRL, dword);
1578
1579                 dword = pci_read_config32(ctrl[i].f2, DRAM_INIT);
1580                 dword |= DI_EnDramInit;
1581                 pci_write_config32(ctrl[i].f2, DRAM_INIT, dword);
1582                 dword &= ~DI_EnDramInit;
1583                 pci_write_config32(ctrl[i].f2, DRAM_INIT, dword);
1584
1585                 tsc1[i] = rdtsc();
1586                 print_debug_dqs_tsc("begin: tsc1", i, tsc1[i].hi, tsc1[i].lo, 2);
1587
1588                 dword = tsc1[i].lo + tsc0[i].lo;
1589                 if((dword<tsc1[i].lo) || (dword<tsc0[i].lo)) {
1590                         tsc1[i].hi++;
1591                 }
1592                 tsc1[i].lo = dword;
1593                 tsc1[i].hi+= tsc0[i].hi;
1594
1595                 print_debug_dqs_tsc("end  : tsc1", i, tsc1[i].hi, tsc1[i].lo, 2);
1596
1597         }
1598
1599         for(i = 0; i < controllers; i++) {
1600                 if (!sysinfo->ctrl_present[i])
1601                         continue;
1602
1603                 /* Skip everything if I don't have any memory on this controller */
1604                 if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
1605
1606                 if(!cpu_f0_f1[i]) continue;
1607
1608                 tsc_t tsc;
1609
1610                 do {
1611                         tsc = rdtsc();
1612                 } while ((tsc1[i].hi>tsc.hi) || ((tsc1[i].hi==tsc.hi) && (tsc1[i].lo>tsc.lo)));
1613
1614                 print_debug_dqs_tsc("end  : tsc ", i, tsc.hi, tsc.lo, 2);
1615         }
1616
1617 }
1618
1619 #endif
1620
1621
1622 /* setting variable mtrr, comes from linux kernel source */
1623 static void set_var_mtrr_dqs(
1624         unsigned int reg, unsigned long basek, unsigned long sizek,
1625         unsigned char type, unsigned address_bits)
1626 {
1627         msr_t base, mask;
1628         unsigned address_mask_high;
1629
1630         address_mask_high = ((1u << (address_bits - 32u)) - 1u);
1631
1632         base.hi = basek >> 22;
1633         base.lo  = basek << 10;
1634
1635         if (sizek < 4*1024*1024) {
1636                 mask.hi = address_mask_high;
1637                 mask.lo = ~((sizek << 10) -1);
1638         }
1639         else {
1640                 mask.hi = address_mask_high & (~((sizek >> 22) -1));
1641                 mask.lo = 0;
1642         }
1643
1644         if (reg >= 8)
1645                 return;
1646
1647         if (sizek == 0) {
1648                 msr_t zero;
1649                 zero.lo = zero.hi = 0;
1650                 /* The invalid bit is kept in the mask, so we simply clear the
1651                    relevant mask register to disable a range. */
1652                 wrmsr (MTRRphysMask_MSR(reg), zero);
1653         } else {
1654                 /* Bit 32-35 of MTRRphysMask should be set to 1 */
1655                 base.lo |= type;
1656                 mask.lo |= 0x800;
1657                 wrmsr (MTRRphysBase_MSR(reg), base);
1658                 wrmsr (MTRRphysMask_MSR(reg), mask);
1659         }
1660 }
1661
1662
1663 /* fms: find most sigificant bit set, stolen from Linux Kernel Source. */
1664 static inline unsigned int fms(unsigned int x)
1665 {
1666         int r;
1667
1668         __asm__("bsrl %1,%0\n\t"
1669                 "jnz 1f\n\t"
1670                 "movl $0,%0\n"
1671                 "1:" : "=r" (r) : "g" (x));
1672         return r;
1673 }
1674
1675 /* fls: find least sigificant bit set */
1676 static inline unsigned int fls(unsigned int x)
1677 {
1678         int r;
1679
1680         __asm__("bsfl %1,%0\n\t"
1681                 "jnz 1f\n\t"
1682                 "movl $32,%0\n"
1683                 "1:" : "=r" (r) : "g" (x));
1684         return r;
1685 }
1686
1687 static unsigned int range_to_mtrr(unsigned int reg,
1688         unsigned long range_startk, unsigned long range_sizek,
1689         unsigned long next_range_startk, unsigned char type, unsigned address_bits)
1690 {
1691         if (!range_sizek || (reg >= 8)) {
1692                 return reg;
1693         }
1694         while(range_sizek) {
1695                 unsigned long max_align, align;
1696                 unsigned long sizek;
1697                 /* Compute the maximum size I can make a range */
1698                 max_align = fls(range_startk);
1699                 align = fms(range_sizek);
1700                 if (align > max_align) {
1701                         align = max_align;
1702                 }
1703                 sizek = 1 << align;
1704 #if CONFIG_MEM_TRAIN_SEQ != 1
1705                 printk(BIOS_DEBUG, "Setting variable MTRR %d, base: %4ldMB, range: %4ldMB, type %s\n",
1706                         reg, range_startk >>10, sizek >> 10,
1707                         (type==MTRR_TYPE_UNCACHEABLE)?"UC":
1708                             ((type==MTRR_TYPE_WRBACK)?"WB":"Other")
1709                         );
1710 #endif
1711                 set_var_mtrr_dqs(reg++, range_startk, sizek, type, address_bits);
1712                 range_startk += sizek;
1713                 range_sizek -= sizek;
1714                 if (reg >= 8)
1715                         break;
1716         }
1717         return reg;
1718 }
1719
1720 static void set_top_mem_ap(unsigned tom_k, unsigned tom2_k)
1721 {
1722         msr_t msr;
1723
1724         /* Now set top of memory */
1725         msr.lo = (tom2_k & 0x003fffff) << 10;
1726         msr.hi = (tom2_k & 0xffc00000) >> 22;
1727         wrmsr(TOP_MEM2, msr);
1728
1729         msr.lo = (tom_k & 0x003fffff) << 10;
1730         msr.hi = (tom_k & 0xffc00000) >> 22;
1731         wrmsr(TOP_MEM, msr);
1732 }
1733
1734 static void setup_mtrr_dqs(unsigned tom_k, unsigned tom2_k)
1735 {
1736         unsigned reg;
1737         msr_t msr;
1738
1739 #if 0
1740         //still enable from cache_as_ram.inc
1741         msr = rdmsr(SYSCFG_MSR);
1742         msr.lo |= SYSCFG_MSR_MtrrFixDramModEn;
1743         wrmsr(SYSCFG_MSR,msr);
1744 #endif
1745
1746         //[0,512k), [512k, 640k)
1747         msr.hi = 0x1e1e1e1e;
1748         msr.lo = msr.hi;
1749         wrmsr(0x250, msr);
1750         wrmsr(0x258, msr);
1751
1752         //[1M, TOM)
1753         reg = range_to_mtrr(2, 0, tom_k,4*1024*1024, MTRR_TYPE_WRBACK, 40);
1754
1755         //[4G, TOM2)
1756         if(tom2_k) {
1757                 //enable tom2 and type
1758                 msr = rdmsr(SYSCFG_MSR);
1759                 msr.lo |= (1<<21) | (1<<22); //MtrrTom2En and Tom2ForceMemTypeWB
1760                 wrmsr(SYSCFG_MSR, msr);
1761         }
1762
1763 }
1764
1765 static void clear_mtrr_dqs(unsigned tom2_k)
1766 {
1767         msr_t msr;
1768         unsigned i;
1769
1770         //still enable from cache_as_ram.inc
1771         msr = rdmsr(SYSCFG_MSR);
1772         msr.lo |= SYSCFG_MSR_MtrrFixDramModEn;
1773         wrmsr(SYSCFG_MSR,msr);
1774
1775         //[0,512k), [512k, 640k)
1776         msr.hi = 0;
1777         msr.lo = msr.hi;
1778         wrmsr(0x250, msr);
1779         wrmsr(0x258, msr);
1780
1781         //[1M, TOM)
1782         for(i=0x204;i<0x210;i++) {
1783                 wrmsr(i, msr);
1784         }
1785
1786         //[4G, TOM2)
1787         if(tom2_k) {
1788                 //enable tom2 and type
1789                 msr = rdmsr(SYSCFG_MSR);
1790                 msr.lo &= ~((1<<21) | (1<<22)); //MtrrTom2En and Tom2ForceMemTypeWB
1791                 wrmsr(SYSCFG_MSR, msr);
1792         }
1793 }
1794
1795 static void set_htic_bit(unsigned i, unsigned val, unsigned bit)
1796 {
1797         uint32_t dword;
1798         dword = pci_read_config32(PCI_DEV(0, 0x18+i, 0), HT_INIT_CONTROL);
1799         dword &= ~(1<<bit);
1800         dword |= ((val & 1) <<bit);
1801         pci_write_config32(PCI_DEV(0, 0x18+i, 0), HT_INIT_CONTROL, dword);
1802 }
1803
1804
1805 static unsigned get_htic_bit(unsigned i, unsigned bit)
1806 {
1807         uint32_t dword;
1808         dword = pci_read_config32(PCI_DEV(0, 0x18+i, 0), HT_INIT_CONTROL);
1809         dword &= (1<<bit);
1810         return dword;
1811 }
1812
1813 static void wait_till_sysinfo_in_ram(void)
1814 {
1815         while(1) {
1816                 if(get_htic_bit(0, 9)) return;
1817         }
1818 }
1819
1820 static void set_sysinfo_in_ram(unsigned val)
1821 {
1822         set_htic_bit(0, val, 9);
1823 }
1824
1825 #ifdef S3_NVRAM_EARLY
1826 // Don't define these prototypes as the real functions are already included
1827 // at this point.
1828 //
1829 //int s3_save_nvram_early(u32 dword, int size, int  nvram_pos);
1830 //int s3_load_nvram_early(int size, u32 *old_dword, int nvram_pos);
1831 #else
1832 static inline int s3_save_nvram_early(u32 dword, int size, int  nvram_pos)
1833 {
1834         return nvram_pos;
1835 }
1836
1837 static inline int s3_load_nvram_early(int size, u32 *old_dword, int nvram_pos)
1838 {
1839         die("No memory NVRAM loader for DQS data! Unable to restore memory state\n");
1840
1841         return nvram_pos; /* Make GCC happy */
1842 }
1843 #endif
1844
1845 #if CONFIG_MEM_TRAIN_SEQ == 0
1846 static int save_index_to_pos(unsigned int dev, int size, int index, int nvram_pos)
1847 {
1848         u32 dword = pci_read_config32_index_wait(dev, 0x98, index);
1849
1850         return s3_save_nvram_early(dword, size, nvram_pos);
1851 }
1852 #endif
1853
1854 static int load_index_to_pos(unsigned int dev, int size, int index, int nvram_pos)
1855 {
1856
1857         u32 old_dword = pci_read_config32_index_wait(dev, 0x98, index);
1858         nvram_pos = s3_load_nvram_early(size, &old_dword, nvram_pos);
1859         pci_write_config32_index_wait(dev, 0x98, index, old_dword);
1860         return nvram_pos;
1861 }
1862
1863 static int dqs_load_MC_NVRAM_ch(unsigned int dev, int ch, int pos)
1864 {
1865         /* 30 bytes per channel */
1866         ch *= 0x20;
1867         pos = load_index_to_pos(dev, 4, 0x00 + ch, pos);
1868         pos = load_index_to_pos(dev, 4, 0x01 + ch, pos);
1869         pos = load_index_to_pos(dev, 4, 0x02 + ch, pos);
1870         pos = load_index_to_pos(dev, 1, 0x03 + ch, pos);
1871         pos = load_index_to_pos(dev, 4, 0x04 + ch, pos);
1872         pos = load_index_to_pos(dev, 4, 0x05 + ch, pos);
1873         pos = load_index_to_pos(dev, 4, 0x06 + ch, pos);
1874         pos = load_index_to_pos(dev, 1, 0x07 + ch, pos);
1875         pos = load_index_to_pos(dev, 1, 0x10 + ch, pos);
1876         pos = load_index_to_pos(dev, 1, 0x13 + ch, pos);
1877         pos = load_index_to_pos(dev, 1, 0x16 + ch, pos);
1878         pos = load_index_to_pos(dev, 1, 0x19 + ch, pos);
1879         return pos;
1880 }
1881
1882 #if CONFIG_MEM_TRAIN_SEQ == 0
1883 static int dqs_save_MC_NVRAM_ch(unsigned int dev, int ch, int pos)
1884 {
1885         /* 30 bytes per channel */
1886         ch *= 0x20;
1887         pos = save_index_to_pos(dev, 4, 0x00 + ch, pos);
1888         pos = save_index_to_pos(dev, 4, 0x01 + ch, pos);
1889         pos = save_index_to_pos(dev, 4, 0x02 + ch, pos);
1890         pos = save_index_to_pos(dev, 1, 0x03 + ch, pos);
1891         pos = save_index_to_pos(dev, 4, 0x04 + ch, pos);
1892         pos = save_index_to_pos(dev, 4, 0x05 + ch, pos);
1893         pos = save_index_to_pos(dev, 4, 0x06 + ch, pos);
1894         pos = save_index_to_pos(dev, 1, 0x07 + ch, pos);
1895         pos = save_index_to_pos(dev, 1, 0x10 + ch, pos);
1896         pos = save_index_to_pos(dev, 1, 0x13 + ch, pos);
1897         pos = save_index_to_pos(dev, 1, 0x16 + ch, pos);
1898         pos = save_index_to_pos(dev, 1, 0x19 + ch, pos);
1899         return pos;
1900 }
1901
1902 static void dqs_save_MC_NVRAM(unsigned int dev)
1903 {
1904         int pos = 0;
1905         u32 reg;
1906         printk(BIOS_DEBUG, "DQS SAVE NVRAM: %x\n", dev);
1907         pos = dqs_save_MC_NVRAM_ch(dev, 0, pos);
1908         pos = dqs_save_MC_NVRAM_ch(dev, 1, pos);
1909         /* save the maxasync lat here */
1910         reg = pci_read_config32(dev, DRAM_CONFIG_HIGH);
1911         pos = s3_save_nvram_early(reg, 4, pos);
1912 }
1913 #endif
1914
1915 static void dqs_restore_MC_NVRAM(unsigned int dev)
1916 {
1917         int pos = 0;
1918         u32 reg;
1919
1920         printk(BIOS_DEBUG, "DQS RESTORE FROM NVRAM: %x\n", dev);
1921         pos = dqs_load_MC_NVRAM_ch(dev, 0, pos);
1922         pos = dqs_load_MC_NVRAM_ch(dev, 1, pos);
1923         /* load the maxasync lat here */
1924         pos = s3_load_nvram_early(4, &reg, pos);
1925         reg &= (DCH_MaxAsyncLat_MASK <<DCH_MaxAsyncLat_SHIFT);
1926         reg |= pci_read_config32(dev, DRAM_CONFIG_HIGH);
1927         pci_write_config32(dev, DRAM_CONFIG_HIGH, reg);
1928 }
1929
1930 #if CONFIG_MEM_TRAIN_SEQ == 0
1931 #if K8_REV_F_SUPPORT_F0_F1_WORKAROUND == 1
1932 static void dqs_timing(int controllers, const struct mem_controller *ctrl, tsc_t *tsc0, struct sys_info *sysinfo)
1933 #else
1934 static void dqs_timing(int controllers, const struct mem_controller *ctrl, struct sys_info *sysinfo)
1935 #endif
1936 {
1937         int  i;
1938
1939         tsc_t tsc[5];
1940
1941         //need to enable mtrr, so dqs training could access the test address
1942         setup_mtrr_dqs(sysinfo->tom_k, sysinfo->tom2_k);
1943
1944         for(i = 0; i < controllers; i++) {
1945                 if (!sysinfo->ctrl_present[ i ])
1946                         continue;
1947
1948                 /* Skip everything if I don't have any memory on this controller */
1949                 if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
1950
1951                 fill_mem_cs_sysinfo(i, ctrl+i, sysinfo);
1952         }
1953
1954         tsc[0] = rdtsc();
1955         for(i = 0; i < controllers; i++) {
1956                 if (!sysinfo->ctrl_present[ i ])
1957                         continue;
1958
1959                 /* Skip everything if I don't have any memory on this controller */
1960                 if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
1961
1962                 printk(BIOS_DEBUG, "DQS Training:RcvrEn:Pass1: %02x\n", i);
1963                 if(train_DqsRcvrEn(ctrl+i, 1, sysinfo)) goto out;
1964                 printk(BIOS_DEBUG, " done\n");
1965         }
1966
1967         tsc[1] = rdtsc();
1968 #if K8_REV_F_SUPPORT_F0_F1_WORKAROUND == 1
1969         f0_svm_workaround(controllers, ctrl, tsc0, sysinfo);
1970 #endif
1971
1972         tsc[2] = rdtsc();
1973         for(i = 0; i < controllers; i++) {
1974                 if (!sysinfo->ctrl_present[i])
1975                         continue;
1976
1977                 /* Skip everything if I don't have any memory on this controller */
1978                 if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
1979
1980                 printk(BIOS_DEBUG, "DQS Training:DQSPos: %02x\n", i);
1981                 if(train_DqsPos(ctrl+i, sysinfo)) goto out;
1982                 printk(BIOS_DEBUG, " done\n");
1983         }
1984
1985         tsc[3] = rdtsc();
1986         for(i = 0; i < controllers; i++) {
1987                 if (!sysinfo->ctrl_present[i])
1988                         continue;
1989
1990                 /* Skip everything if I don't have any memory on this controller */
1991                 if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
1992
1993                 printk(BIOS_DEBUG, "DQS Training:RcvrEn:Pass2: %02x\n", i);
1994                 if(train_DqsRcvrEn(ctrl+i, 2, sysinfo)) goto out;
1995                 printk(BIOS_DEBUG, " done\n");
1996                 sysinfo->mem_trained[i]=1;
1997                 dqs_save_MC_NVRAM((ctrl+i)->f2);
1998         }
1999
2000 out:
2001         tsc[4] = rdtsc();
2002         clear_mtrr_dqs(sysinfo->tom2_k);
2003
2004
2005         for(i=0;i<5;i++) {
2006                 print_debug_dqs_tsc_x("DQS Training:tsc", i,  tsc[i].hi, tsc[i].lo);
2007         }
2008
2009
2010
2011 }
2012
2013 #endif
2014
2015
2016 #if CONFIG_MEM_TRAIN_SEQ > 0
2017
2018 static void dqs_timing(int i, const struct mem_controller *ctrl, struct sys_info *sysinfo, unsigned v)
2019 {
2020
2021         int ii;
2022
2023          tsc_t tsc[4];
2024
2025         if(sysinfo->mem_trained[i] != 0x80) return;
2026
2027 #if CONFIG_MEM_TRAIN_SEQ == 1
2028         //need to enable mtrr, so dqs training could access the test address
2029         setup_mtrr_dqs(sysinfo->tom_k, sysinfo->tom2_k);
2030 #endif
2031
2032         fill_mem_cs_sysinfo(i, ctrl, sysinfo);
2033
2034         if(v) {
2035                 tsc[0] = rdtsc();
2036
2037                 printk(BIOS_DEBUG, "set DQS timing:RcvrEn:Pass1: %02x\n", i);
2038         }
2039         if(train_DqsRcvrEn(ctrl, 1,  sysinfo)) {
2040                 sysinfo->mem_trained[i]=0x81; //
2041                 goto out;
2042         }
2043
2044         if(v) {
2045                 printk(BIOS_DEBUG, " done\n");
2046                 tsc[1] = rdtsc();
2047                 printk(BIOS_DEBUG, "set DQS timing:DQSPos: %02x\n", i);
2048         }
2049
2050         if(train_DqsPos(ctrl, sysinfo)) {
2051                 sysinfo->mem_trained[i]=0x82; //
2052                 goto out;
2053         }
2054
2055         if(v) {
2056                 printk(BIOS_DEBUG, " done\n");
2057                 tsc[2] = rdtsc();
2058
2059                 printk(BIOS_DEBUG, "set DQS timing:RcvrEn:Pass2: %02x\n", i);
2060         }
2061         if(train_DqsRcvrEn(ctrl, 2,  sysinfo)){
2062                 sysinfo->mem_trained[i]=0x83; //
2063                 goto out;
2064         }
2065
2066         if(v) {
2067                 printk(BIOS_DEBUG, " done\n");
2068
2069                 tsc[3] = rdtsc();
2070         }
2071
2072 out:
2073 #if CONFIG_MEM_TRAIN_SEQ == 1
2074         clear_mtrr_dqs(sysinfo->tom2_k);
2075 #endif
2076
2077         if(v) {
2078                 for(ii=0;ii<4;ii++) {
2079                       print_debug_dqs_tsc_x("Total DQS Training : tsc ", ii,  tsc[ii].hi, tsc[ii].lo);
2080                 }
2081         }
2082
2083         if(sysinfo->mem_trained[i] == 0x80) {
2084                 sysinfo->mem_trained[i]=1;
2085         }
2086
2087 }
2088 #endif
2089
2090 #if CONFIG_MEM_TRAIN_SEQ == 1
2091 static void train_ram(unsigned nodeid, struct sys_info *sysinfo, struct sys_info *sysinfox)
2092 {
2093         dqs_timing(nodeid, &sysinfo->ctrl[nodeid], sysinfo, 0); // keep the output tidy
2094 //      memcpy(&sysinfox->dqs_rcvr_dly_a[nodeid * 2 * 8],&sysinfo->dqs_rcvr_dly_a[nodeid * 2 * 8], 2*8);
2095 //      memcpy(&sysinfox->dqs_delay_a[nodeid * 2 * 2 * 9], &sysinfo->dqs_delay_a[nodeid * 2 * 2 * 9], 2 * 2 * 9);
2096         sysinfox->mem_trained[nodeid] = sysinfo->mem_trained[nodeid];
2097
2098 }
2099
2100 static inline void train_ram_on_node(unsigned nodeid, unsigned coreid, struct sys_info *sysinfo, unsigned retcall)
2101 {
2102         if(coreid) return; // only do it on core0
2103         struct sys_info *sysinfox = (void*)((CONFIG_RAMTOP) - CONFIG_DCACHE_RAM_GLOBAL_VAR_SIZE);
2104         wait_till_sysinfo_in_ram(); // use pci to get it
2105
2106         if(sysinfox->mem_trained[nodeid] == 0x80) {
2107         #if 0
2108                 sysinfo->tom_k = sysinfox->tom_k;
2109                 sysinfo->tom2_k = sysinfox->tom2_k;
2110                 sysinfo->meminfo[nodeid].is_Width128 = sysinfox->meminfo[nodeid].is_Width128;
2111                 sysinfo->mem_trained[nodeid] = sysinfox->mem_trained[nodeid];
2112                 memcpy(&sysinfo->ctrl[nodeid], &sysinfox->ctrl[nodeid], sizeof(struct mem_controller));
2113         #else
2114                 memcpy(sysinfo, sysinfox, CONFIG_DCACHE_RAM_GLOBAL_VAR_SIZE);
2115         #endif
2116                 set_top_mem_ap(sysinfo->tom_k, sysinfo->tom2_k); // keep the ap's tom consistent with bsp's
2117         #if CONFIG_AP_CODE_IN_CAR == 0
2118                 printk(BIOS_DEBUG, "CODE IN ROM AND RUN ON NODE: %02x\n", nodeid);
2119                 train_ram(nodeid, sysinfo, sysinfox);
2120         #else
2121                 /* Can copy dqs_timing to ap cache and run from cache?
2122                 * we need coreboot_ap_car.rom? and treat it as coreboot_ram.rom for ap ?
2123                 */
2124                 copy_and_run_ap_code_in_car(retcall);
2125                 // will go back by jump
2126         #endif
2127         }
2128 }
2129 #endif