Please bear with me - another rename checkin. This qualifies as trivial, no
[coreboot.git] / src / northbridge / amd / amdmct / mct / mctsrc.c
1 /*
2  * This file is part of the coreboot project.
3  *
4  * Copyright (C) 2007 Advanced Micro Devices, Inc.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
19  */
20
21 /******************************************************************************
22  Description: Receiver En and DQS Timing Training feature for DDR 2 MCT
23 ******************************************************************************/
24
25 static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat,
26                                 struct DCTStatStruc *pDCTstat, u8 Pass);
27 static u8 mct_SavePassRcvEnDly_D(struct DCTStatStruc *pDCTstat,
28                                         u8 rcvrEnDly, u8 Channel,
29                                         u8 receiver, u8 Pass);
30 static u8 mct_CompareTestPatternQW0_D(struct MCTStatStruc *pMCTstat,
31                                         struct DCTStatStruc *pDCTstat,
32                                         u32 addr, u8 channel,
33                                         u8 pattern, u8 Pass);
34 static void mct_InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat,
35                                          struct DCTStatStruc *pDCTstat);
36 static void InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat,
37                                 struct DCTStatStruc *pDCTstat, u8 Channel);
38 static void CalcEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat,
39                                 struct DCTStatStruc *pDCTstat, u8 Channel);
40 static void mct_SetFinalRcvrEnDly_D(struct DCTStatStruc *pDCTstat,
41                                 u8 RcvrEnDly, u8 where,
42                                 u8 Channel, u8 Receiver,
43                                 u32 dev, u32 index_reg,
44                                 u8 Addl_Index, u8 Pass);
45 static void CalcMaxLatency_D(struct DCTStatStruc *pDCTstat,
46                                 u8 DQSRcvrEnDly, u8 Channel);
47 static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u8 DQSRcvEnDly);
48 static void mct_SetDQSRcvEn_D(struct DCTStatStruc *pDCTstat, u32 val);
49 static void fenceDynTraining_D(struct MCTStatStruc *pMCTstat,
50                         struct DCTStatStruc *pDCTstat, u8 dct);
51 static void mct_DisableDQSRcvEn_D(struct DCTStatStruc *pDCTstat);
52
53
54 /* Warning:  These must be located so they do not cross a logical 16-bit
55    segment boundary! */
56 const static u32 TestPattern0_D[] = {
57         0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
58         0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
59         0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
60         0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
61 };
62 const static u32 TestPattern1_D[] = {
63         0x55555555, 0x55555555, 0x55555555, 0x55555555,
64         0x55555555, 0x55555555, 0x55555555, 0x55555555,
65         0x55555555, 0x55555555, 0x55555555, 0x55555555,
66         0x55555555, 0x55555555, 0x55555555, 0x55555555,
67 };
68 const static u32 TestPattern2_D[] = {
69         0x12345678, 0x87654321, 0x23456789, 0x98765432,
70         0x59385824, 0x30496724, 0x24490795, 0x99938733,
71         0x40385642, 0x38465245, 0x29432163, 0x05067894,
72         0x12349045, 0x98723467, 0x12387634, 0x34587623,
73 };
74
75 static void SetupRcvrPattern(struct MCTStatStruc *pMCTstat,
76                 struct DCTStatStruc *pDCTstat, u32 *buffer, u8 pass)
77 {
78         /*
79          * 1. Copy the alpha and Beta patterns from ROM to Cache,
80          *     aligning on 16 byte boundary
81          * 2. Set the ptr to DCTStatstruc.PtrPatternBufA for Alpha
82          * 3. Set the ptr to DCTStatstruc.PtrPatternBufB for Beta
83          */
84
85         u32 *buf_a;
86         u32 *buf_b;
87         u32 *p_A;
88         u32 *p_B;
89         u8 i;
90
91         buf_a = (u32 *)(((u32)buffer + 0x10) & (0xfffffff0));
92         buf_b = buf_a + 32; //??
93         p_A = (u32 *)SetupDqsPattern_1PassB(pass);
94         p_B = (u32 *)SetupDqsPattern_1PassA(pass);
95
96         for(i=0;i<16;i++) {
97                 buf_a[i] = p_A[i];
98                 buf_b[i] = p_B[i];
99         }
100
101         pDCTstat->PtrPatternBufA = (u32)buf_a;
102         pDCTstat->PtrPatternBufB = (u32)buf_b;
103 }
104
105
106 void mct_TrainRcvrEn_D(struct MCTStatStruc *pMCTstat,
107                         struct DCTStatStruc *pDCTstat, u8 Pass)
108 {
109         if(mct_checkNumberOfDqsRcvEn_1Pass(Pass))
110                 dqsTrainRcvrEn_SW(pMCTstat, pDCTstat, Pass);
111 }
112
113
114 static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat,
115                                 struct DCTStatStruc *pDCTstat, u8 Pass)
116 {
117         u8 Channel, RcvrEnDly, RcvrEnDlyRmin;
118         u8 Test0, Test1, CurrTest, CurrTestSide0, CurrTestSide1;
119         u8 CTLRMaxDelay, _2Ranks, PatternA, PatternB;
120         u8 Addl_Index = 0;
121         u8 Receiver;
122         u8 _DisableDramECC = 0, _Wrap32Dis = 0, _SSE2 = 0;
123         u8 RcvrEnDlyLimit, Final_Value, MaxDelay_CH[2];
124         u32 TestAddr0, TestAddr1, TestAddr0B, TestAddr1B;
125         u32 PatternBuffer[64+4]; /* FIXME: need increase 8? */
126         u32 Errors;
127
128         u32 val;
129         u32 reg;
130         u32 dev;
131         u32 index_reg;
132         u32 ch_start, ch_end, ch;
133         u32 msr;
134         u32 cr4;
135         u32 lo, hi;
136
137         u8 valid;
138         u32 tmp;
139         u8 LastTest;
140
141         print_debug_dqs("\nTrainRcvEn: Node", pDCTstat->Node_ID, 0);
142         print_debug_dqs("TrainRcvEn: Pass", Pass, 0);
143
144
145         dev = pDCTstat->dev_dct;
146         ch_start = 0;
147         if(!pDCTstat->GangedMode) {
148                 ch_end = 2;
149         } else {
150                 ch_end = 1;
151         }
152
153         for (ch = ch_start; ch < ch_end; ch++) {
154                 reg = 0x78 + (0x100 * ch);
155                 val = Get_NB32(dev, reg);
156                 val &= ~(0x3ff << 22);
157                 val |= (0x0c8 << 22);           /* Max Rd Lat */
158                 Set_NB32(dev, reg, val);
159         }
160
161         Final_Value = 1;
162         if (Pass == FirstPass) {
163                 mct_InitDQSPos4RcvrEn_D(pMCTstat, pDCTstat);
164         } else {
165                 pDCTstat->DimmTrainFail = 0;
166                 pDCTstat->CSTrainFail = ~pDCTstat->CSPresent;
167         }
168         print_t("TrainRcvrEn: 1\n");
169
170         cr4 = read_cr4();
171         if(cr4 & ( 1 << 9)) {   /* save the old value */
172                 _SSE2 = 1;
173         }
174         cr4 |= (1 << 9);        /* OSFXSR enable SSE2 */
175         write_cr4(cr4);
176         print_t("TrainRcvrEn: 2\n");
177
178         msr = HWCR;
179         _RDMSR(msr, &lo, &hi);
180         //FIXME: Why use SSEDIS
181         if(lo & (1 << 17)) {    /* save the old value */
182                 _Wrap32Dis = 1;
183         }
184         lo |= (1 << 17);        /* HWCR.wrap32dis */
185         lo &= ~(1 << 15);       /* SSEDIS */
186         _WRMSR(msr, lo, hi);    /* Setting wrap32dis allows 64-bit memory references in real mode */
187         print_t("TrainRcvrEn: 3\n");
188
189         _DisableDramECC = mct_DisableDimmEccEn_D(pMCTstat, pDCTstat);
190
191
192         if(pDCTstat->Speed == 1) {
193                 pDCTstat->T1000 = 5000; /* get the T1000 figure (cycle time (ns)*1K */
194         } else if(pDCTstat->Speed == 2) {
195                 pDCTstat->T1000 = 3759;
196         } else if(pDCTstat->Speed == 3) {
197                 pDCTstat->T1000 = 3003;
198         } else if(pDCTstat->Speed == 4) {
199                 pDCTstat->T1000 = 2500;
200         } else if(pDCTstat->Speed  == 5) {
201                 pDCTstat->T1000 = 1876;
202         } else {
203                 pDCTstat->T1000 = 0;
204         }
205
206         SetupRcvrPattern(pMCTstat, pDCTstat, PatternBuffer, Pass);
207         print_t("TrainRcvrEn: 4\n");
208
209         Errors = 0;
210         dev = pDCTstat->dev_dct;
211         CTLRMaxDelay = 0;
212
213         for (Channel = 0; Channel < 2; Channel++) {
214                 print_debug_dqs("\tTrainRcvEn51: Node ", pDCTstat->Node_ID, 1);
215                 print_debug_dqs("\tTrainRcvEn51: Channel ", Channel, 1);
216                 pDCTstat->Channel = Channel;
217
218                 MaxDelay_CH[Channel] = 0;
219                 index_reg = 0x98 + 0x100 * Channel;
220
221                 Receiver = mct_InitReceiver_D(pDCTstat, Channel);
222                 /* There are four receiver pairs, loosely associated with chipselects. */
223                 for (; Receiver < 8; Receiver += 2) {
224                         Addl_Index = (Receiver >> 1) * 3 + 0x10;
225                         LastTest = DQS_FAIL;
226
227                         /* mct_ModifyIndex_D */
228                         RcvrEnDlyRmin = RcvrEnDlyLimit = 0xff;
229
230                         print_debug_dqs("\t\tTrainRcvEnd52: index ", Addl_Index, 2);
231
232                         if(!mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, Receiver)) {
233                                 print_t("\t\t\tRank not enabled_D\n");
234                                 continue;
235                         }
236
237                         TestAddr0 = mct_GetRcvrSysAddr_D(pMCTstat, pDCTstat, Channel, Receiver, &valid);
238                         if(!valid) {    /* Address not supported on current CS */
239                                 print_t("\t\t\tAddress not supported on current CS\n");
240                                 continue;
241                         }
242
243                         TestAddr0B = TestAddr0 + (BigPagex8_RJ8 << 3);
244
245                         if(mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, Receiver+1)) {
246                                 TestAddr1 = mct_GetRcvrSysAddr_D(pMCTstat, pDCTstat, Channel, Receiver+1, &valid);
247                                 if(!valid) {    /* Address not supported on current CS */
248                                         print_t("\t\t\tAddress not supported on current CS+1\n");
249                                         continue;
250                                 }
251                                 TestAddr1B = TestAddr1 + (BigPagex8_RJ8 << 3);
252                                 _2Ranks = 1;
253                         } else {
254                                 _2Ranks = TestAddr1 = TestAddr1B = 0;
255                         }
256
257                         print_debug_dqs("\t\tTrainRcvEn53: TestAddr0 ", TestAddr0, 2);
258                         print_debug_dqs("\t\tTrainRcvEn53: TestAddr0B ", TestAddr0B, 2);
259                         print_debug_dqs("\t\tTrainRcvEn53: TestAddr1 ", TestAddr1, 2);
260                         print_debug_dqs("\t\tTrainRcvEn53: TestAddr1B ", TestAddr1B, 2);
261
262                         /*
263                          * Get starting RcvrEnDly value
264                          */
265                         RcvrEnDly = mct_Get_Start_RcvrEnDly_1Pass(Pass);
266
267                         /* mct_GetInitFlag_D*/
268                         if (Pass == FirstPass) {
269                                 pDCTstat->DqsRcvEn_Pass = 0;
270                         } else {
271                                 pDCTstat->DqsRcvEn_Pass=0xFF;
272                         }
273                         pDCTstat->DqsRcvEn_Saved = 0;
274
275
276                         while(RcvrEnDly < RcvrEnDlyLimit) {     /* sweep Delay value here */
277                                 print_debug_dqs("\t\t\tTrainRcvEn541: RcvrEnDly ", RcvrEnDly, 3);
278
279                                 /* callback not required
280                                 if(mct_AdjustDelay_D(pDCTstat, RcvrEnDly))
281                                         goto skipDly;
282                                 */
283
284                                 /* Odd steps get another pattern such that even
285                                  and odd steps alternate. The pointers to the
286                                  patterns will be swaped at the end of the loop
287                                  so that they correspond. */
288                                 if(RcvrEnDly & 1) {
289                                         PatternA = 1;
290                                         PatternB = 0;
291                                 } else {
292                                         /* Even step */
293                                         PatternA = 0;
294                                         PatternB = 1;
295                                 }
296
297                                 mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0, PatternA); /* rank 0 of DIMM, testpattern 0 */
298                                 mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0B, PatternB); /* rank 0 of DIMM, testpattern 1 */
299                                 if(_2Ranks) {
300                                         mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1, PatternA); /*rank 1 of DIMM, testpattern 0 */
301                                         mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1B, PatternB); /*rank 1 of DIMM, testpattern 1 */
302                                 }
303
304                                 mct_SetRcvrEnDly_D(pDCTstat, RcvrEnDly, 0, Channel, Receiver, dev, index_reg, Addl_Index, Pass);
305
306                                 CurrTest = DQS_FAIL;
307                                 CurrTestSide0 = DQS_FAIL;
308                                 CurrTestSide1 = DQS_FAIL;
309
310                                 mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0); /*cache fills */
311                                 Test0 = mct_CompareTestPatternQW0_D(pMCTstat, pDCTstat, TestAddr0, Channel, PatternA, Pass);/* ROM vs cache compare */
312                                 proc_IOCLFLUSH_D(TestAddr0);
313                                 ResetDCTWrPtr_D(dev, index_reg, Addl_Index);
314
315                                 print_debug_dqs("\t\t\tTrainRcvEn542: Test0 result ", Test0, 3);
316
317                                 // != 0x00 mean pass
318
319                                 if(Test0 == DQS_PASS) {
320                                         mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0B);        /*cache fills */
321                                         /* ROM vs cache compare */
322                                         Test1 = mct_CompareTestPatternQW0_D(pMCTstat, pDCTstat, TestAddr0B, Channel, PatternB, Pass);
323                                         proc_IOCLFLUSH_D(TestAddr0B);
324                                         ResetDCTWrPtr_D(dev, index_reg, Addl_Index);
325
326                                         print_debug_dqs("\t\t\tTrainRcvEn543: Test1 result ", Test1, 3);
327
328                                         if(Test1 == DQS_PASS) {
329                                                 CurrTestSide0 = DQS_PASS;
330                                         }
331                                 }
332                                 if(_2Ranks) {
333                                         mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1); /*cache fills */
334                                         /* ROM vs cache compare */
335                                         Test0 = mct_CompareTestPatternQW0_D(pMCTstat, pDCTstat, TestAddr1, Channel, PatternA, Pass);
336                                         proc_IOCLFLUSH_D(TestAddr1);
337                                         ResetDCTWrPtr_D(dev, index_reg, Addl_Index);
338
339                                         print_debug_dqs("\t\t\tTrainRcvEn544: Test0 result ", Test0, 3);
340
341                                         if(Test0 == DQS_PASS) {
342                                                 mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1B);        /*cache fills */
343                                                 /* ROM vs cache compare */
344                                                 Test1 = mct_CompareTestPatternQW0_D(pMCTstat, pDCTstat, TestAddr1B, Channel, PatternB, Pass);
345                                                 proc_IOCLFLUSH_D(TestAddr1B);
346                                                 ResetDCTWrPtr_D(dev, index_reg, Addl_Index);
347
348                                                 print_debug_dqs("\t\t\tTrainRcvEn545: Test1 result ", Test1, 3);
349                                                 if(Test1 == DQS_PASS) {
350                                                         CurrTestSide1 = DQS_PASS;
351                                                 }
352                                         }
353                                 }
354
355                                 if(_2Ranks) {
356                                         if ((CurrTestSide0 == DQS_PASS) && (CurrTestSide1 == DQS_PASS)) {
357                                                 CurrTest = DQS_PASS;
358                                         }
359                                 } else if (CurrTestSide0 == DQS_PASS) {
360                                         CurrTest = DQS_PASS;
361                                 }
362
363
364                                 /* record first pass DqsRcvEn to stack */
365                                 valid = mct_SavePassRcvEnDly_D(pDCTstat, RcvrEnDly, Channel, Receiver, Pass);
366
367                                 /* Break(1:RevF,2:DR) or not(0) FIXME: This comment deosn't make sense */
368                                 if(valid == 2 || (LastTest == DQS_FAIL && valid == 1)) {
369                                         RcvrEnDlyRmin = RcvrEnDly;
370                                         break;
371                                 }
372
373                                 LastTest = CurrTest;
374
375                                 /* swap the rank 0 pointers */
376                                 tmp = TestAddr0;
377                                 TestAddr0 = TestAddr0B;
378                                 TestAddr0B = tmp;
379
380                                 /* swap the rank 1 pointers */
381                                 tmp = TestAddr1;
382                                 TestAddr1 = TestAddr1B;
383                                 TestAddr1B = tmp;
384
385                                 print_debug_dqs("\t\t\tTrainRcvEn56: RcvrEnDly ", RcvrEnDly, 3);
386
387                                 RcvrEnDly++;
388
389                         }       /* while RcvrEnDly */
390
391                         print_debug_dqs("\t\tTrainRcvEn61: RcvrEnDly ", RcvrEnDly, 2);
392                         print_debug_dqs("\t\tTrainRcvEn61: RcvrEnDlyRmin ", RcvrEnDlyRmin, 3);
393                         print_debug_dqs("\t\tTrainRcvEn61: RcvrEnDlyLimit ", RcvrEnDlyLimit, 3);
394                         if(RcvrEnDlyRmin == RcvrEnDlyLimit) {
395                                 /* no passing window */
396                                 pDCTstat->ErrStatus |= 1 << SB_NORCVREN;
397                                 Errors |= 1 << SB_NORCVREN;
398                                 pDCTstat->ErrCode = SC_FatalErr;
399                         }
400
401                         if(RcvrEnDly > (RcvrEnDlyLimit - 1)) {
402                                 /* passing window too narrow, too far delayed*/
403                                 pDCTstat->ErrStatus |= 1 << SB_SmallRCVR;
404                                 Errors |= 1 << SB_SmallRCVR;
405                                 pDCTstat->ErrCode = SC_FatalErr;
406                                 RcvrEnDly = RcvrEnDlyLimit - 1;
407                                 pDCTstat->CSTrainFail |= 1 << Receiver;
408                                 pDCTstat->DimmTrainFail |= 1 << (Receiver + Channel);
409                         }
410
411                         // CHB_D0_B0_RCVRDLY set in mct_Average_RcvrEnDly_Pass
412                         mct_Average_RcvrEnDly_Pass(pDCTstat, RcvrEnDly, RcvrEnDlyLimit, Channel, Receiver, Pass);
413
414                         mct_SetFinalRcvrEnDly_D(pDCTstat, RcvrEnDly, Final_Value, Channel, Receiver, dev, index_reg, Addl_Index, Pass);
415
416                         if(pDCTstat->ErrStatus & (1 << SB_SmallRCVR)) {
417                                 Errors |= 1 << SB_SmallRCVR;
418                         }
419
420                         RcvrEnDly += Pass1MemClkDly;
421                         if(RcvrEnDly > CTLRMaxDelay) {
422                                 CTLRMaxDelay = RcvrEnDly;
423                         }
424
425                 }       /* while Receiver */
426
427                 MaxDelay_CH[Channel] = CTLRMaxDelay;
428         }       /* for Channel */
429
430         CTLRMaxDelay = MaxDelay_CH[0];
431         if (MaxDelay_CH[1] > CTLRMaxDelay)
432                 CTLRMaxDelay = MaxDelay_CH[1];
433
434         for (Channel = 0; Channel < 2; Channel++) {
435                 mct_SetMaxLatency_D(pDCTstat, Channel, CTLRMaxDelay); /* program Ch A/B MaxAsyncLat to correspond with max delay */
436         }
437
438         ResetDCTWrPtr_D(dev, index_reg, Addl_Index);
439
440         if(_DisableDramECC) {
441                 mct_EnableDimmEccEn_D(pMCTstat, pDCTstat, _DisableDramECC);
442         }
443
444         if (Pass == FirstPass) {
445                 /*Disable DQSRcvrEn training mode */
446                 print_t("TrainRcvrEn: mct_DisableDQSRcvEn_D\n");
447                 mct_DisableDQSRcvEn_D(pDCTstat);
448         }
449
450         if(!_Wrap32Dis) {
451                 msr = HWCR;
452                 _RDMSR(msr, &lo, &hi);
453                 lo &= ~(1<<17);         /* restore HWCR.wrap32dis */
454                 _WRMSR(msr, lo, hi);
455         }
456         if(!_SSE2){
457                 cr4 = read_cr4();
458                 cr4 &= ~(1<<9);         /* restore cr4.OSFXSR */
459                 write_cr4(cr4);
460         }
461
462 #if DQS_TRAIN_DEBUG > 0
463         {
464                 u8 Channel;
465                 print_debug("TrainRcvrEn: CH_MaxRdLat:\n");
466                 for(Channel = 0; Channel<2; Channel++) {
467                         print_debug("Channel:"); print_debug_hex8(Channel);
468                         print_debug(": ");
469                         print_debug_hex8( pDCTstat->CH_MaxRdLat[Channel] );
470                         print_debug("\n");
471                 }
472         }
473 #endif
474
475 #if DQS_TRAIN_DEBUG > 0
476         {
477                 u8 val;
478                 u8 Channel, Receiver;
479                 u8 i;
480                 u8 *p;
481
482                 print_debug("TrainRcvrEn: CH_D_B_RCVRDLY:\n");
483                 for(Channel = 0; Channel < 2; Channel++) {
484                         print_debug("Channel:"); print_debug_hex8(Channel); print_debug("\n");
485                         for(Receiver = 0; Receiver<8; Receiver+=2) {
486                                 print_debug("\t\tReceiver:");
487                                 print_debug_hex8(Receiver);
488                                 p = pDCTstat->CH_D_B_RCVRDLY[Channel][Receiver>>1];
489                                 print_debug(": ");
490                                 for (i=0;i<8; i++) {
491                                         val  = p[i];
492                                         print_debug_hex8(val);
493                                         print_debug(" ");
494                                 }
495                         print_debug("\n");
496                         }
497                 }
498         }
499 #endif
500
501         print_tx("TrainRcvrEn: Status ", pDCTstat->Status);
502         print_tx("TrainRcvrEn: ErrStatus ", pDCTstat->ErrStatus);
503         print_tx("TrainRcvrEn: ErrCode ", pDCTstat->ErrCode);
504         print_t("TrainRcvrEn: Done\n");
505 }
506
507
508 static u8 mct_InitReceiver_D(struct DCTStatStruc *pDCTstat, u8 dct)
509 {
510         if (pDCTstat->DIMMValidDCT[dct] == 0 ) {
511                 return 8;
512         } else {
513                 return 0;
514         }
515 }
516
517
518 static void mct_SetFinalRcvrEnDly_D(struct DCTStatStruc *pDCTstat, u8 RcvrEnDly, u8 where, u8 Channel, u8 Receiver, u32 dev, u32 index_reg, u8 Addl_Index, u8 Pass/*, u8 *p*/)
519 {
520         /*
521          * Program final DqsRcvEnDly to additional index for DQS receiver
522          *  enabled delay
523          */
524         mct_SetRcvrEnDly_D(pDCTstat, RcvrEnDly, where, Channel, Receiver, dev, index_reg, Addl_Index, Pass);
525 }
526
527
528 static void mct_DisableDQSRcvEn_D(struct DCTStatStruc *pDCTstat)
529 {
530         u8 ch_end, ch;
531         u32 reg;
532         u32 dev;
533         u32 val;
534
535         dev = pDCTstat->dev_dct;
536         if (pDCTstat->GangedMode) {
537                 ch_end = 1;
538         } else {
539                 ch_end = 2;
540         }
541
542         for (ch=0; ch<ch_end; ch++) {
543                 reg = 0x78 + 0x100 * ch;
544                 val = Get_NB32(dev, reg);
545                 val &= ~(1 << DqsRcvEnTrain);
546                 Set_NB32(dev, reg, val);
547         }
548 }
549
550
551 /* mct_ModifyIndex_D
552  * Function only used once so it was inlined.
553  */
554
555
556 /* mct_GetInitFlag_D
557  * Function only used once so it was inlined.
558  */
559
560
561 void mct_SetRcvrEnDly_D(struct DCTStatStruc *pDCTstat, u8 RcvrEnDly,
562                         u8 FinalValue, u8 Channel, u8 Receiver, u32 dev,
563                         u32 index_reg, u8 Addl_Index, u8 Pass)
564 {
565         u32 index;
566         u8 i;
567         u8 *p;
568         u32 val;
569
570         if(RcvrEnDly == 0xFE) {
571                 /*set the boudary flag */
572                 pDCTstat->Status |= 1 << SB_DQSRcvLimit;
573         }
574
575         /* DimmOffset not needed for CH_D_B_RCVRDLY array */
576
577
578         for(i=0; i < 8; i++) {
579                 if(FinalValue) {
580                         /*calculate dimm offset */
581                         p = pDCTstat->CH_D_B_RCVRDLY[Channel][Receiver >> 1];
582                         RcvrEnDly = p[i];
583                 }
584
585                 /* if flag=0, set DqsRcvEn value to reg. */
586                 /* get the register index from table */
587                 index = Table_DQSRcvEn_Offset[i >> 1];
588                 index += Addl_Index;    /* DIMMx DqsRcvEn byte0 */
589                 val = Get_NB32_index_wait(dev, index_reg, index);
590                 if(i & 1) {
591                         /* odd byte lane */
592                         val &= ~(0xFF << 16);
593                         val |= (RcvrEnDly << 16);
594                 } else {
595                         /* even byte lane */
596                         val &= ~0xFF;
597                         val |= RcvrEnDly;
598                 }
599                 Set_NB32_index_wait(dev, index_reg, index, val);
600         }
601
602 }
603
604 static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u8 DQSRcvEnDly)
605 {
606         u32 dev;
607         u32 reg;
608         u16 SubTotal;
609         u32 index_reg;
610         u32 reg_off;
611         u32 val;
612         u32 valx;
613
614         if(pDCTstat->GangedMode)
615                 Channel = 0;
616
617         dev = pDCTstat->dev_dct;
618         reg_off = 0x100 * Channel;
619         index_reg = 0x98 + reg_off;
620
621         /* Multiply the CAS Latency by two to get a number of 1/2 MEMCLKs units.*/
622         val = Get_NB32(dev, 0x88 + reg_off);
623         SubTotal = ((val & 0x0f) + 1) << 1;     /* SubTotal is 1/2 Memclk unit */
624
625         /* If registered DIMMs are being used then
626          *  add 1 MEMCLK to the sub-total.
627          */
628         val = Get_NB32(dev, 0x90 + reg_off);
629         if(!(val & (1 << UnBuffDimm)))
630                 SubTotal += 2;
631
632         /* If the address prelaunch is setup for 1/2 MEMCLKs then
633          *  add 1, else add 2 to the sub-total.
634          *  if (AddrCmdSetup || CsOdtSetup || CkeSetup) then K := K + 2;
635          */
636         val = Get_NB32_index_wait(dev, index_reg, 0x04);
637         if(!(val & 0x00202020))
638                 SubTotal += 1;
639         else
640                 SubTotal += 2;
641
642         /* If the F2x[1, 0]78[RdPtrInit] field is 4, 5, 6 or 7 MEMCLKs,
643          * then add 4, 3, 2, or 1 MEMCLKs, respectively to the sub-total. */
644         val = Get_NB32(dev, 0x78 + reg_off);
645         SubTotal += 8 - (val & 0x0f);
646
647         /* Convert bits 7-5 (also referred to as the course delay) of
648          * the current (or worst case) DQS receiver enable delay to
649          * 1/2 MEMCLKs units, rounding up, and add this to the sub-total.
650          */
651         SubTotal += DQSRcvEnDly >> 5;   /*BOZO-no rounding up */
652
653         /* Add 5.5 to the sub-total. 5.5 represents part of the
654          * processor specific constant delay value in the DRAM
655          * clock domain.
656          */
657         SubTotal <<= 1;         /*scale 1/2 MemClk to 1/4 MemClk */
658         SubTotal += 11;         /*add 5.5 1/2MemClk */
659
660         /* Convert the sub-total (in 1/2 MEMCLKs) to northbridge
661          * clocks (NCLKs) as follows (assuming DDR400 and assuming
662          * that no P-state or link speed changes have occurred).
663          */
664
665         /* New formula:
666          * SubTotal *= 3*(Fn2xD4[NBFid]+4)/(3+Fn2x94[MemClkFreq])/2 */
667         val = Get_NB32(dev, 0x94 + reg_off);
668
669         /* SubTotal div 4 to scale 1/4 MemClk back to MemClk */
670         val &= 7;
671         if (val == 4) {
672                 val++;          /* adjust for DDR2-1066 */
673         }
674         valx = (val + 3) << 2;
675
676         val = Get_NB32(pDCTstat->dev_nbmisc, 0xD4);
677         SubTotal *= ((val & 0x1f) + 4 ) * 3;
678
679         SubTotal /= valx;
680         if (SubTotal % valx) {  /* round up */
681                 SubTotal++;
682         }
683
684         /* Add 5 NCLKs to the sub-total. 5 represents part of the
685          * processor specific constant value in the northbridge
686          * clock domain.
687          */
688         SubTotal += 5;
689
690         pDCTstat->CH_MaxRdLat[Channel] = SubTotal;
691         if(pDCTstat->GangedMode) {
692                 pDCTstat->CH_MaxRdLat[1] = SubTotal;
693         }
694
695         /* Program the F2x[1, 0]78[MaxRdLatency] register with
696          * the total delay value (in NCLKs).
697          */
698
699         reg = 0x78 + reg_off;
700         val = Get_NB32(dev, reg);
701         val &= ~(0x3ff << 22);
702         val |= (SubTotal & 0x3ff) << 22;
703
704         /* program MaxRdLatency to correspond with current delay */
705         Set_NB32(dev, reg, val);
706 }
707
708
709 static u8 mct_SavePassRcvEnDly_D(struct DCTStatStruc *pDCTstat,
710                         u8 rcvrEnDly, u8 Channel,
711                         u8 receiver, u8 Pass)
712 {
713         u8 i;
714         u8 mask_Saved, mask_Pass;
715         u8 *p;
716
717         /* calculate dimm offset
718          * not needed for CH_D_B_RCVRDLY array
719          */
720
721         /* cmp if there has new DqsRcvEnDly to be recorded */
722         mask_Pass = pDCTstat->DqsRcvEn_Pass;
723
724         if(Pass == SecondPass) {
725                 mask_Pass = ~mask_Pass;
726         }
727
728         mask_Saved = pDCTstat->DqsRcvEn_Saved;
729         if(mask_Pass != mask_Saved) {
730
731                 /* find desired stack offset according to channel/dimm/byte */
732                 if(Pass == SecondPass) {
733                         // FIXME: SecondPass is never used for Barcelona p = pDCTstat->CH_D_B_RCVRDLY_1[Channel][receiver>>1];
734                         p = 0; // Keep the compiler happy.
735                 } else {
736                         mask_Saved &= mask_Pass;
737                         p = pDCTstat->CH_D_B_RCVRDLY[Channel][receiver>>1];
738                 }
739                 for(i=0; i < 8; i++) {
740                         /* cmp per byte lane */
741                         if(mask_Pass & (1 << i)) {
742                                 if(!(mask_Saved & (1 << i))) {
743                                         /* save RcvEnDly to stack, according to
744                                         the related Dimm/byte lane */
745                                         p[i] = (u8)rcvrEnDly;
746                                         mask_Saved |= 1 << i;
747                                 }
748                         }
749                 }
750                 pDCTstat->DqsRcvEn_Saved = mask_Saved;
751         }
752         return mct_SaveRcvEnDly_D_1Pass(pDCTstat, Pass);
753 }
754
755
756 static u8 mct_CompareTestPatternQW0_D(struct MCTStatStruc *pMCTstat,
757                                         struct DCTStatStruc *pDCTstat,
758                                         u32 addr, u8 channel,
759                                         u8 pattern, u8 Pass)
760 {
761         /* Compare only the first beat of data.  Since target addrs are cache
762          * line aligned, the Channel parameter is used to determine which
763          * cache QW to compare.
764          */
765
766         u8 *test_buf;
767         u8 i;
768         u8 result;
769         u8 *addr_lo_buf;
770
771         SetUpperFSbase(addr);   // needed?
772
773         if(Pass == FirstPass) {
774                 if(pattern==1) {
775                         test_buf = (u8 *)TestPattern1_D;
776                 } else {
777                         test_buf = (u8 *)TestPattern0_D;
778                 }
779         } else {                // Second Pass
780                 test_buf = (u8 *)TestPattern2_D;
781         }
782
783         addr_lo_buf = (u8 *) (addr << 8);
784         result = DQS_FAIL;
785
786         if((pDCTstat->Status & (1<<SB_128bitmode)) && channel ) {
787                 addr_lo_buf += 8;       /* second channel */
788                 test_buf += 8;
789         }
790
791
792 #if DQS_TRAIN_DEBUG > 4
793         print_debug("\t\t\t\t\t\tQW0 :   test_buf  = ");
794         print_debug_hex32((unsigned)test_buf);
795         print_debug(": ");
796         for (i=0; i<8; i++) {
797                 print_debug_hex8(test_buf[i]); print_debug(" ");
798         }
799         print_debug("\n");
800
801         print_debug("\t\t\t\t\t\tQW0 : addr_lo_buf = ");
802         print_debug_hex32((unsigned)addr_lo_buf);
803         print_debug(": ");
804         for (i=0; i<8; i++) {
805                 print_debug_hex8(addr_lo_buf[i]); print_debug(" ");
806         }
807         print_debug("\n");
808 #endif
809
810         /* prevent speculative execution of following instructions */
811         _EXECFENCE;
812
813         for (i=0; i<8; i++) {
814                 if(addr_lo_buf[i] == test_buf[i]) {
815                         pDCTstat->DqsRcvEn_Pass |= (1<<i);
816                 } else {
817                         pDCTstat->DqsRcvEn_Pass &= ~(1<<i);
818                 }
819         }
820
821
822         if (Pass == FirstPass) {
823                 /* if first pass, at least one byte lane pass
824                  * ,then DQS_PASS=1 and will set to related reg.
825                  */
826                 if(pDCTstat->DqsRcvEn_Pass != 0) {
827                         result = DQS_PASS;
828                 } else {
829                         result = DQS_FAIL;
830                 }
831
832         } else {
833                 /* if second pass, at least one byte lane fail
834                  * ,then DQS_FAIL=1 and will set to related reg.
835                  */
836                 if(pDCTstat->DqsRcvEn_Pass != 0xFF) {
837                         result = DQS_FAIL;
838                 } else {
839                         result = DQS_PASS;
840                 }
841         }
842
843         /* if second pass, we can't find the fail until FFh,
844          * then let it fail to save the final delay
845          */
846         if((Pass == SecondPass) && (pDCTstat->Status & (1 << SB_DQSRcvLimit))) {
847                 result = DQS_FAIL;
848                 pDCTstat->DqsRcvEn_Pass = 0;
849         }
850
851         /* second pass needs to be inverted
852          * FIXME? this could be inverted in the above code to start with...
853          */
854         if(Pass == SecondPass) {
855                 if (result == DQS_PASS) {
856                         result = DQS_FAIL;
857                 } else if (result == DQS_FAIL) { /* FIXME: doesn't need to be else if */
858                         result = DQS_PASS;
859                 }
860         }
861
862
863         return result;
864 }
865
866
867
868 static void mct_InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat,
869                                 struct DCTStatStruc *pDCTstat)
870 {
871         /* Initialize the DQS Positions in preparation for
872          * Reciever Enable Training.
873          * Write Position is 1/2 Memclock Delay
874          * Read Position is 1/2 Memclock Delay
875          */
876         u8 i;
877         for(i=0;i<2; i++){
878                 InitDQSPos4RcvrEn_D(pMCTstat, pDCTstat, i);
879         }
880 }
881
882
883 static void InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat,
884                                 struct DCTStatStruc *pDCTstat, u8 Channel)
885 {
886         /* Initialize the DQS Positions in preparation for
887          * Reciever Enable Training.
888          * Write Position is no Delay
889          * Read Position is 1/2 Memclock Delay
890          */
891
892         u8 i, j;
893         u32 dword;
894         u8 dn = 2; // TODO: Rev C could be 4
895         u32 dev = pDCTstat->dev_dct;
896         u32 index_reg = 0x98 + 0x100 * Channel;
897
898
899         // FIXME: add Cx support
900         dword = 0x00000000;
901         for(i=1; i<=3; i++) {
902                 for(j=0; j<dn; j++)
903                         /* DIMM0 Write Data Timing Low */
904                         /* DIMM0 Write ECC Timing */
905                         Set_NB32_index_wait(dev, index_reg, i + 0x100 * j, dword);
906         }
907
908         /* errata #180 */
909         dword = 0x2f2f2f2f;
910         for(i=5; i<=6; i++) {
911                 for(j=0; j<dn; j++)
912                         /* DIMM0 Read DQS Timing Control Low */
913                         Set_NB32_index_wait(dev, index_reg, i + 0x100 * j, dword);
914         }
915
916         dword = 0x0000002f;
917         for(j=0; j<dn; j++)
918                 /* DIMM0 Read DQS ECC Timing Control */
919                 Set_NB32_index_wait(dev, index_reg, 7 + 0x100 * j, dword);
920 }
921
922
923 void SetEccDQSRcvrEn_D(struct DCTStatStruc *pDCTstat, u8 Channel)
924 {
925         u32 dev;
926         u32 index_reg;
927         u32 index;
928         u8 ChipSel;
929         u8 *p;
930         u32 val;
931
932         dev = pDCTstat->dev_dct;
933         index_reg = 0x98 + Channel * 0x100;
934         index = 0x12;
935         p = pDCTstat->CH_D_BC_RCVRDLY[Channel];
936         print_debug_dqs("\t\tSetEccDQSRcvrPos: Channel ", Channel,  2);
937         for(ChipSel = 0; ChipSel < MAX_CS_SUPPORTED; ChipSel += 2) {
938                 val = p[ChipSel>>1];
939                 Set_NB32_index_wait(dev, index_reg, index, val);
940                 print_debug_dqs_pair("\t\tSetEccDQSRcvrPos: ChipSel ",
941                                         ChipSel, " rcvr_delay ",  val, 2);
942                 index += 3;
943         }
944 }
945
946
947 static void CalcEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat,
948                                 struct DCTStatStruc *pDCTstat, u8 Channel)
949 {
950         u8 ChipSel;
951         u16 EccDQSLike;
952         u8 EccDQSScale;
953         u32 val, val0, val1;
954
955         EccDQSLike = pDCTstat->CH_EccDQSLike[Channel];
956         EccDQSScale = pDCTstat->CH_EccDQSScale[Channel];
957
958         for (ChipSel = 0; ChipSel < MAX_CS_SUPPORTED; ChipSel += 2) {
959                 if(mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, ChipSel)) {
960                         u8 *p;
961                         p = pDCTstat->CH_D_B_RCVRDLY[Channel][ChipSel>>1];
962
963                         /* DQS Delay Value of Data Bytelane
964                          * most like ECC byte lane */
965                         val0 = p[EccDQSLike & 0x07];
966                         /* DQS Delay Value of Data Bytelane
967                          * 2nd most like ECC byte lane */
968                         val1 = p[(EccDQSLike>>8) & 0x07];
969
970                         if(val0 > val1) {
971                                 val = val0 - val1;
972                         } else {
973                                 val = val1 - val0;
974                         }
975
976                         val *= ~EccDQSScale;
977                         val >>= 8; // /256
978
979                         if(val0 > val1) {
980                                 val -= val1;
981                         } else {
982                                 val += val0;
983                         }
984
985                 pDCTstat->CH_D_BC_RCVRDLY[Channel][ChipSel>>1] = val;
986                 }
987         }
988         SetEccDQSRcvrEn_D(pDCTstat, Channel);
989 }
990
991 void mctSetEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat,
992                         struct DCTStatStruc *pDCTstatA)
993 {
994         u8 Node;
995         u8 i;
996
997         for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
998                 struct DCTStatStruc *pDCTstat;
999                 pDCTstat = pDCTstatA + Node;
1000                 if (!pDCTstat->NodePresent)
1001                         break;
1002                 if (pDCTstat->DCTSysLimit) {
1003                 for(i=0; i<2; i++)
1004                 CalcEccDQSRcvrEn_D(pMCTstat, pDCTstat, i);
1005                 }
1006         }
1007 }
1008
1009
1010 void phyAssistedMemFnceTraining(struct MCTStatStruc *pMCTstat,
1011                         struct DCTStatStruc *pDCTstatA)
1012 {
1013
1014         u8 Node = 0;
1015         struct DCTStatStruc *pDCTstat;
1016
1017         // FIXME: skip for Ax
1018         while (Node < MAX_NODES_SUPPORTED) {
1019                 pDCTstat = pDCTstatA + Node;
1020
1021                 if(pDCTstat->DCTSysLimit) {
1022                         fenceDynTraining_D(pMCTstat, pDCTstat, 0);
1023                         fenceDynTraining_D(pMCTstat, pDCTstat, 1);
1024                 }
1025                 Node++;
1026         }
1027 }
1028
1029
1030 static void fenceDynTraining_D(struct MCTStatStruc *pMCTstat,
1031                         struct DCTStatStruc *pDCTstat, u8 dct)
1032 {
1033         u16 avRecValue;
1034         u32 val;
1035         u32 dev;
1036         u32 index_reg = 0x98 + 0x100 * dct;
1037         u32 index;
1038
1039         /* BIOS first programs a seed value to the phase recovery engine
1040          *  (recommended 19) registers.
1041          * Dram Phase Recovery Control Register (F2x[1,0]9C_x[51:50] and
1042          * F2x[1,0]9C_x52.) .
1043          */
1044
1045         dev = pDCTstat->dev_dct;
1046         for (index = 0x50; index <= 0x52; index ++) {
1047                 val = Get_NB32_index_wait(dev, index_reg, index);
1048                 val |= (FenceTrnFinDlySeed & 0x1F);
1049                 if (index != 0x52) {
1050                         val &= ~(0xFF << 8);
1051                         val |= (val & 0xFF) << 8;
1052                         val &= 0xFFFF;
1053                         val |= val << 16;
1054                 }
1055                 Set_NB32_index_wait(dev, index_reg, index, val);
1056         }
1057
1058
1059         /* Set F2x[1,0]9C_x08[PhyFenceTrEn]=1. */
1060         val = Get_NB32_index_wait(dev, index_reg, 0x08);
1061         val |= 1 << PhyFenceTrEn;
1062         Set_NB32_index_wait(dev, index_reg, 0x08, val);
1063
1064         /* Wait 200 MEMCLKs. */
1065         mct_Wait_10ns (20000);          /* wait 200us */
1066
1067         /* Clear F2x[1,0]9C_x08[PhyFenceTrEn]=0. */
1068         val = Get_NB32_index_wait(dev, index_reg, 0x08);
1069         val &= ~(1 << PhyFenceTrEn);
1070         Set_NB32_index_wait(dev, index_reg, 0x08, val);
1071
1072         /* BIOS reads the phase recovery engine registers
1073          * F2x[1,0]9C_x[51:50] and F2x[1,0]9C_x52. */
1074         avRecValue = 0;
1075         for (index = 0x50; index <= 0x52; index ++) {
1076                 val = Get_NB32_index_wait(dev, index_reg, index);
1077                 avRecValue += val & 0x7F;
1078                 if (index != 0x52) {
1079                         avRecValue += (val >> 8) & 0x7F;
1080                         avRecValue += (val >> 16) & 0x7F;
1081                         avRecValue += (val >> 24) & 0x7F;
1082                 }
1083         }
1084
1085         val = avRecValue / 9;
1086         if (avRecValue % 9)
1087                 val++;
1088         avRecValue = val;
1089
1090         /* Write the (averaged value -8) to F2x[1,0]9C_x0C[PhyFence]. */
1091         avRecValue -= 8;
1092         val = Get_NB32_index_wait(dev, index_reg, 0x0C);
1093         val &= ~(0x1F << 16);
1094         val |= (avRecValue & 0x1F) << 16;
1095         Set_NB32_index_wait(dev, index_reg, 0x0C, val);
1096
1097         /* Rewrite F2x[1,0]9C_x04-DRAM Address/Command Timing Control Register
1098          * delays (both channels). */
1099         val = Get_NB32_index_wait(dev, index_reg, 0x04);
1100         Set_NB32_index_wait(dev, index_reg, 0x04, val);
1101 }
1102
1103
1104 static void mct_Wait_10ns (u32 cycles)
1105 {
1106         u32 saved, i;
1107         u32 hi, lo, msr;
1108
1109         /* cycles = number of 10ns cycles(or longer) to delay */
1110         /* FIXME: Need to calibrate to CPU/NCLK speed? */
1111
1112         msr = 0x10;                             /* TSC */
1113         for (i = 0; i < cycles; i++) {
1114                 _RDMSR(msr, &lo, &hi);
1115                 saved = lo;
1116
1117                 do {
1118                         _RDMSR(msr, &lo, &hi);
1119                 } while (lo - saved < 8);       /* 8 x 1.25 ns as NCLK is  at 1.25ns */
1120         }
1121 }