Trivial. Re-indent the code.
[coreboot.git] / src / northbridge / amd / amdmct / mct_ddr3 / mctsrc.c
1 /*
2  * This file is part of the coreboot project.
3  *
4  * Copyright (C) 2010 Advanced Micro Devices, Inc.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
19  */
20
21 /******************************************************************************
22  Description: Receiver En and DQS Timing Training feature for DDR 3 MCT
23 ******************************************************************************/
24
25 static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat,
26                                 struct DCTStatStruc *pDCTstat, u8 Pass);
27 static u8 mct_SavePassRcvEnDly_D(struct DCTStatStruc *pDCTstat,
28                                         u8 rcvrEnDly, u8 Channel,
29                                         u8 receiver, u8 Pass);
30 static u8 mct_CompareTestPatternQW0_D(struct MCTStatStruc *pMCTstat,
31                                         struct DCTStatStruc *pDCTstat,
32                                         u32 addr, u8 channel,
33                                         u8 pattern, u8 Pass);
34 static void mct_InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat,
35                                          struct DCTStatStruc *pDCTstat);
36 static void InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat,
37                                 struct DCTStatStruc *pDCTstat, u8 Channel);
38 static void CalcEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat,
39                                 struct DCTStatStruc *pDCTstat, u8 Channel);
40 static void mct_SetFinalRcvrEnDly_D(struct DCTStatStruc *pDCTstat,
41                                 u8 RcvrEnDly, u8 where,
42                                 u8 Channel, u8 Receiver,
43                                 u32 dev, u32 index_reg,
44                                 u8 Addl_Index, u8 Pass);
45 static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u8 DQSRcvEnDly);
46 static void fenceDynTraining_D(struct MCTStatStruc *pMCTstat,
47                         struct DCTStatStruc *pDCTstat, u8 dct);
48 static void mct_DisableDQSRcvEn_D(struct DCTStatStruc *pDCTstat);
49
50 /* Warning:  These must be located so they do not cross a logical 16-bit
51    segment boundary! */
52 const static u32 TestPattern0_D[] = {
53         0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
54         0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
55         0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
56         0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
57 };
58 const static u32 TestPattern1_D[] = {
59         0x55555555, 0x55555555, 0x55555555, 0x55555555,
60         0x55555555, 0x55555555, 0x55555555, 0x55555555,
61         0x55555555, 0x55555555, 0x55555555, 0x55555555,
62         0x55555555, 0x55555555, 0x55555555, 0x55555555,
63 };
64 const static u32 TestPattern2_D[] = {
65         0x12345678, 0x87654321, 0x23456789, 0x98765432,
66         0x59385824, 0x30496724, 0x24490795, 0x99938733,
67         0x40385642, 0x38465245, 0x29432163, 0x05067894,
68         0x12349045, 0x98723467, 0x12387634, 0x34587623,
69 };
70
71 static void SetupRcvrPattern(struct MCTStatStruc *pMCTstat,
72                 struct DCTStatStruc *pDCTstat, u32 *buffer, u8 pass)
73 {
74         /*
75          * 1. Copy the alpha and Beta patterns from ROM to Cache,
76          *     aligning on 16 byte boundary
77          * 2. Set the ptr to DCTStatstruc.PtrPatternBufA for Alpha
78          * 3. Set the ptr to DCTStatstruc.PtrPatternBufB for Beta
79          */
80         u32 *buf_a;
81         u32 *buf_b;
82         u32 *p_A;
83         u32 *p_B;
84         u8 i;
85
86         buf_a = (u32 *)(((u32)buffer + 0x10) & (0xfffffff0));
87         buf_b = buf_a + 32; /* ?? */
88         p_A = (u32 *)SetupDqsPattern_1PassB(pass);
89         p_B = (u32 *)SetupDqsPattern_1PassA(pass);
90
91         for(i=0;i<16;i++) {
92                 buf_a[i] = p_A[i];
93                 buf_b[i] = p_B[i];
94         }
95
96         pDCTstat->PtrPatternBufA = (u32)buf_a;
97         pDCTstat->PtrPatternBufB = (u32)buf_b;
98 }
99
100 void mct_TrainRcvrEn_D(struct MCTStatStruc *pMCTstat,
101                         struct DCTStatStruc *pDCTstat, u8 Pass)
102 {
103         if(mct_checkNumberOfDqsRcvEn_1Pass(Pass))
104                 dqsTrainRcvrEn_SW(pMCTstat, pDCTstat, Pass);
105 }
106
107 static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat,
108                                 struct DCTStatStruc *pDCTstat, u8 Pass)
109 {
110         u8 Channel, RcvrEnDly, RcvrEnDlyRmin;
111         u8 Test0, Test1, CurrTest, CurrTestSide0, CurrTestSide1;
112         u8 CTLRMaxDelay, _2Ranks, PatternA, PatternB;
113         u8 Addl_Index = 0;
114         u8 Receiver;
115         u8 _DisableDramECC = 0, _Wrap32Dis = 0, _SSE2 = 0;
116         u8 RcvrEnDlyLimit, Final_Value, MaxDelay_CH[2];
117         u32 TestAddr0, TestAddr1, TestAddr0B, TestAddr1B;
118         u32 PatternBuffer[64+4]; /* FIXME: need increase 8? */
119         u32 Errors;
120
121         u32 val;
122         u32 reg;
123         u32 dev;
124         u32 index_reg;
125         u32 ch_start, ch_end, ch;
126         u32 msr;
127         u32 cr4;
128         u32 lo, hi;
129
130         u8 valid;
131         u32 tmp;
132         u8 LastTest;
133
134         print_debug_dqs("\nTrainRcvEn: Node", pDCTstat->Node_ID, 0);
135         print_debug_dqs("TrainRcvEn: Pass", Pass, 0);
136
137         dev = pDCTstat->dev_dct;
138         ch_start = 0;
139         if(!pDCTstat->GangedMode) {
140                 ch_end = 2;
141         } else {
142                 ch_end = 1;
143         }
144
145         for (ch = ch_start; ch < ch_end; ch++) {
146                 reg = 0x78 + (0x100 * ch);
147                 val = Get_NB32(dev, reg);
148                 val &= ~(0x3ff << 22);
149                 val |= (0x0c8 << 22);           /* Max Rd Lat */
150                 Set_NB32(dev, reg, val);
151         }
152
153         Final_Value = 1;
154         if (Pass == FirstPass) {
155                 mct_InitDQSPos4RcvrEn_D(pMCTstat, pDCTstat);
156         } else {
157                 pDCTstat->DimmTrainFail = 0;
158                 pDCTstat->CSTrainFail = ~pDCTstat->CSPresent;
159         }
160
161         cr4 = read_cr4();
162         if(cr4 & ( 1 << 9)) {   /* save the old value */
163                 _SSE2 = 1;
164         }
165         cr4 |= (1 << 9);        /* OSFXSR enable SSE2 */
166         write_cr4(cr4);
167
168         msr = HWCR;
169         _RDMSR(msr, &lo, &hi);
170         /* FIXME: Why use SSEDIS */
171         if(lo & (1 << 17)) {    /* save the old value */
172                 _Wrap32Dis = 1;
173         }
174         lo |= (1 << 17);        /* HWCR.wrap32dis */
175         lo &= ~(1 << 15);       /* SSEDIS */
176         _WRMSR(msr, lo, hi);    /* Setting wrap32dis allows 64-bit memory references in real mode */
177
178         _DisableDramECC = mct_DisableDimmEccEn_D(pMCTstat, pDCTstat);
179
180         SetupRcvrPattern(pMCTstat, pDCTstat, PatternBuffer, Pass);
181
182         Errors = 0;
183         dev = pDCTstat->dev_dct;
184         CTLRMaxDelay = 0;
185
186         for (Channel = 0; Channel < 2; Channel++) {
187                 print_debug_dqs("\tTrainRcvEn51: Node ", pDCTstat->Node_ID, 1);
188                 print_debug_dqs("\tTrainRcvEn51: Channel ", Channel, 1);
189                 pDCTstat->Channel = Channel;
190
191                 MaxDelay_CH[Channel] = 0;
192                 index_reg = 0x98 + 0x100 * Channel;
193
194                 Receiver = mct_InitReceiver_D(pDCTstat, Channel);
195                 /* There are four receiver pairs, loosely associated with chipselects. */
196                 for (; Receiver < 8; Receiver += 2) {
197                         Addl_Index = (Receiver >> 1) * 3 + 0x10;
198                         LastTest = DQS_FAIL;
199
200                         /* mct_ModifyIndex_D */
201                         RcvrEnDlyRmin = RcvrEnDlyLimit = 0xff;
202
203                         print_debug_dqs("\t\tTrainRcvEnd52: index ", Addl_Index, 2);
204
205                         if(!mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, Receiver)) {
206                                 continue;
207                         }
208
209                         TestAddr0 = mct_GetRcvrSysAddr_D(pMCTstat, pDCTstat, Channel, Receiver, &valid);
210                         if(!valid) {    /* Address not supported on current CS */
211                                 continue;
212                         }
213
214                         TestAddr0B = TestAddr0 + (BigPagex8_RJ8 << 3);
215
216                         if(mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, Receiver+1)) {
217                                 TestAddr1 = mct_GetRcvrSysAddr_D(pMCTstat, pDCTstat, Channel, Receiver+1, &valid);
218                                 if(!valid) {    /* Address not supported on current CS */
219                                         continue;
220                                 }
221                                 TestAddr1B = TestAddr1 + (BigPagex8_RJ8 << 3);
222                                 _2Ranks = 1;
223                         } else {
224                                 _2Ranks = TestAddr1 = TestAddr1B = 0;
225                         }
226
227                         print_debug_dqs("\t\tTrainRcvEn53: TestAddr0 ", TestAddr0, 2);
228                         print_debug_dqs("\t\tTrainRcvEn53: TestAddr0B ", TestAddr0B, 2);
229                         print_debug_dqs("\t\tTrainRcvEn53: TestAddr1 ", TestAddr1, 2);
230                         print_debug_dqs("\t\tTrainRcvEn53: TestAddr1B ", TestAddr1B, 2);
231
232                         /*
233                          * Get starting RcvrEnDly value
234                          */
235                         RcvrEnDly = mct_Get_Start_RcvrEnDly_1Pass(Pass);
236
237                         /* mct_GetInitFlag_D*/
238                         if (Pass == FirstPass) {
239                                 pDCTstat->DqsRcvEn_Pass = 0;
240                         } else {
241                                 pDCTstat->DqsRcvEn_Pass=0xFF;
242                         }
243                         pDCTstat->DqsRcvEn_Saved = 0;
244
245
246                         while(RcvrEnDly < RcvrEnDlyLimit) {     /* sweep Delay value here */
247                                 print_debug_dqs("\t\t\tTrainRcvEn541: RcvrEnDly ", RcvrEnDly, 3);
248
249                                 /* callback not required
250                                 if(mct_AdjustDelay_D(pDCTstat, RcvrEnDly))
251                                         goto skipDly;
252                                 */
253
254                                 /* Odd steps get another pattern such that even
255                                  and odd steps alternate. The pointers to the
256                                  patterns will be swaped at the end of the loop
257                                  so that they correspond. */
258                                 if(RcvrEnDly & 1) {
259                                         PatternA = 1;
260                                         PatternB = 0;
261                                 } else {
262                                         /* Even step */
263                                         PatternA = 0;
264                                         PatternB = 1;
265                                 }
266
267                                 mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0, PatternA); /* rank 0 of DIMM, testpattern 0 */
268                                 mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0B, PatternB); /* rank 0 of DIMM, testpattern 1 */
269                                 if(_2Ranks) {
270                                         mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1, PatternA); /*rank 1 of DIMM, testpattern 0 */
271                                         mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1B, PatternB); /*rank 1 of DIMM, testpattern 1 */
272                                 }
273
274                                 mct_SetRcvrEnDly_D(pDCTstat, RcvrEnDly, 0, Channel, Receiver, dev, index_reg, Addl_Index, Pass);
275
276                                 CurrTest = DQS_FAIL;
277                                 CurrTestSide0 = DQS_FAIL;
278                                 CurrTestSide1 = DQS_FAIL;
279
280                                 mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0); /*cache fills */
281                                 Test0 = mct_CompareTestPatternQW0_D(pMCTstat, pDCTstat, TestAddr0, Channel, PatternA, Pass);/* ROM vs cache compare */
282                                 proc_IOCLFLUSH_D(TestAddr0);
283                                 ResetDCTWrPtr_D(dev, index_reg, Addl_Index);
284
285                                 print_debug_dqs("\t\t\tTrainRcvEn542: Test0 result ", Test0, 3);
286
287                                 /* != 0x00 mean pass */
288
289                                 if(Test0 == DQS_PASS) {
290                                         mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0B);        /*cache fills */
291                                         /* ROM vs cache compare */
292                                         Test1 = mct_CompareTestPatternQW0_D(pMCTstat, pDCTstat, TestAddr0B, Channel, PatternB, Pass);
293                                         proc_IOCLFLUSH_D(TestAddr0B);
294                                         ResetDCTWrPtr_D(dev, index_reg, Addl_Index);
295
296                                         print_debug_dqs("\t\t\tTrainRcvEn543: Test1 result ", Test1, 3);
297
298                                         if(Test1 == DQS_PASS) {
299                                                 CurrTestSide0 = DQS_PASS;
300                                         }
301                                 }
302                                 if(_2Ranks) {
303                                         mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1); /*cache fills */
304                                         /* ROM vs cache compare */
305                                         Test0 = mct_CompareTestPatternQW0_D(pMCTstat, pDCTstat, TestAddr1, Channel, PatternA, Pass);
306                                         proc_IOCLFLUSH_D(TestAddr1);
307                                         ResetDCTWrPtr_D(dev, index_reg, Addl_Index);
308
309                                         print_debug_dqs("\t\t\tTrainRcvEn544: Test0 result ", Test0, 3);
310
311                                         if(Test0 == DQS_PASS) {
312                                                 mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1B);        /*cache fills */
313                                                 /* ROM vs cache compare */
314                                                 Test1 = mct_CompareTestPatternQW0_D(pMCTstat, pDCTstat, TestAddr1B, Channel, PatternB, Pass);
315                                                 proc_IOCLFLUSH_D(TestAddr1B);
316                                                 ResetDCTWrPtr_D(dev, index_reg, Addl_Index);
317
318                                                 print_debug_dqs("\t\t\tTrainRcvEn545: Test1 result ", Test1, 3);
319                                                 if(Test1 == DQS_PASS) {
320                                                         CurrTestSide1 = DQS_PASS;
321                                                 }
322                                         }
323                                 }
324
325                                 if(_2Ranks) {
326                                         if ((CurrTestSide0 == DQS_PASS) && (CurrTestSide1 == DQS_PASS)) {
327                                                 CurrTest = DQS_PASS;
328                                         }
329                                 } else if (CurrTestSide0 == DQS_PASS) {
330                                         CurrTest = DQS_PASS;
331                                 }
332
333                                 /* record first pass DqsRcvEn to stack */
334                                 valid = mct_SavePassRcvEnDly_D(pDCTstat, RcvrEnDly, Channel, Receiver, Pass);
335
336                                 /* Break(1:RevF,2:DR) or not(0) FIXME: This comment deosn't make sense */
337                                 if(valid == 2 || (LastTest == DQS_FAIL && valid == 1)) {
338                                         RcvrEnDlyRmin = RcvrEnDly;
339                                         break;
340                                 }
341
342                                 LastTest = CurrTest;
343
344                                 /* swap the rank 0 pointers */
345                                 tmp = TestAddr0;
346                                 TestAddr0 = TestAddr0B;
347                                 TestAddr0B = tmp;
348
349                                 /* swap the rank 1 pointers */
350                                 tmp = TestAddr1;
351                                 TestAddr1 = TestAddr1B;
352                                 TestAddr1B = tmp;
353
354                                 print_debug_dqs("\t\t\tTrainRcvEn56: RcvrEnDly ", RcvrEnDly, 3);
355
356                                 RcvrEnDly++;
357
358                         }       /* while RcvrEnDly */
359
360                         print_debug_dqs("\t\tTrainRcvEn61: RcvrEnDly ", RcvrEnDly, 2);
361                         print_debug_dqs("\t\tTrainRcvEn61: RcvrEnDlyRmin ", RcvrEnDlyRmin, 3);
362                         print_debug_dqs("\t\tTrainRcvEn61: RcvrEnDlyLimit ", RcvrEnDlyLimit, 3);
363                         if(RcvrEnDlyRmin == RcvrEnDlyLimit) {
364                                 /* no passing window */
365                                 pDCTstat->ErrStatus |= 1 << SB_NORCVREN;
366                                 Errors |= 1 << SB_NORCVREN;
367                                 pDCTstat->ErrCode = SC_FatalErr;
368                         }
369
370                         if(RcvrEnDly > (RcvrEnDlyLimit - 1)) {
371                                 /* passing window too narrow, too far delayed*/
372                                 pDCTstat->ErrStatus |= 1 << SB_SmallRCVR;
373                                 Errors |= 1 << SB_SmallRCVR;
374                                 pDCTstat->ErrCode = SC_FatalErr;
375                                 RcvrEnDly = RcvrEnDlyLimit - 1;
376                                 pDCTstat->CSTrainFail |= 1 << Receiver;
377                                 pDCTstat->DimmTrainFail |= 1 << (Receiver + Channel);
378                         }
379
380                         /* CHB_D0_B0_RCVRDLY set in mct_Average_RcvrEnDly_Pass */
381                         mct_Average_RcvrEnDly_Pass(pDCTstat, RcvrEnDly, RcvrEnDlyLimit, Channel, Receiver, Pass);
382
383                         mct_SetFinalRcvrEnDly_D(pDCTstat, RcvrEnDly, Final_Value, Channel, Receiver, dev, index_reg, Addl_Index, Pass);
384
385                         if(pDCTstat->ErrStatus & (1 << SB_SmallRCVR)) {
386                                 Errors |= 1 << SB_SmallRCVR;
387                         }
388
389                         RcvrEnDly += Pass1MemClkDly;
390                         if(RcvrEnDly > CTLRMaxDelay) {
391                                 CTLRMaxDelay = RcvrEnDly;
392                         }
393
394                 }       /* while Receiver */
395                 MaxDelay_CH[Channel] = CTLRMaxDelay;
396         }       /* for Channel */
397
398         CTLRMaxDelay = MaxDelay_CH[0];
399         if (MaxDelay_CH[1] > CTLRMaxDelay)
400                 CTLRMaxDelay = MaxDelay_CH[1];
401
402         for (Channel = 0; Channel < 2; Channel++) {
403                 mct_SetMaxLatency_D(pDCTstat, Channel, CTLRMaxDelay); /* program Ch A/B MaxAsyncLat to correspond with max delay */
404         }
405
406         ResetDCTWrPtr_D(dev, index_reg, Addl_Index);
407
408         if(_DisableDramECC) {
409                 mct_EnableDimmEccEn_D(pMCTstat, pDCTstat, _DisableDramECC);
410         }
411
412         if (Pass == FirstPass) {
413                 /*Disable DQSRcvrEn training mode */
414                 mct_DisableDQSRcvEn_D(pDCTstat);
415         }
416
417         if(!_Wrap32Dis) {
418                 msr = HWCR;
419                 _RDMSR(msr, &lo, &hi);
420                 lo &= ~(1<<17);         /* restore HWCR.wrap32dis */
421                 _WRMSR(msr, lo, hi);
422         }
423         if(!_SSE2){
424                 cr4 = read_cr4();
425                 cr4 &= ~(1<<9);         /* restore cr4.OSFXSR */
426                 write_cr4(cr4);
427         }
428
429 #if DQS_TRAIN_DEBUG > 0
430         {
431                 u8 Channel;
432                 printk(BIOS_DEBUG, "TrainRcvrEn: CH_MaxRdLat:\n");
433                 for(Channel = 0; Channel<2; Channel++) {
434                         printk(BIOS_DEBUG, "Channel:%x: %x\n",
435                                Channel, pDCTstat->CH_MaxRdLat[Channel]);
436                 }
437         }
438 #endif
439
440 #if DQS_TRAIN_DEBUG > 0
441         {
442                 u8 val;
443                 u8 Channel, Receiver;
444                 u8 i;
445                 u8 *p;
446
447                 printk(BIOS_DEBUG, "TrainRcvrEn: CH_D_B_RCVRDLY:\n");
448                 for(Channel = 0; Channel < 2; Channel++) {
449                         printk(BIOS_DEBUG, "Channel:%x\n", Channel);
450                         for(Receiver = 0; Receiver<8; Receiver+=2) {
451                                 printk(BIOS_DEBUG, "\t\tReceiver:%x:", Receiver);
452                                 p = pDCTstat->CH_D_B_RCVRDLY[Channel][Receiver>>1];
453                                 for (i=0;i<8; i++) {
454                                         val  = p[i];
455                                         printk(BIOS_DEBUG, "%x ", val);
456                                 }
457                                 printk(BIOS_DEBUG, "\n");
458                         }
459                 }
460         }
461 #endif
462
463         printk(BIOS_DEBUG, "TrainRcvrEn: Status %x\n", pDCTstat->Status);
464         printk(BIOS_DEBUG, "TrainRcvrEn: ErrStatus %x\n", pDCTstat->ErrStatus);
465         printk(BIOS_DEBUG, "TrainRcvrEn: ErrCode %x\n", pDCTstat->ErrCode);
466         printk(BIOS_DEBUG, "TrainRcvrEn: Done\n\n");
467 }
468
469 u8 mct_InitReceiver_D(struct DCTStatStruc *pDCTstat, u8 dct)
470 {
471         if (pDCTstat->DIMMValidDCT[dct] == 0 ) {
472                 return 8;
473         } else {
474                 return 0;
475         }
476 }
477
478 static void mct_SetFinalRcvrEnDly_D(struct DCTStatStruc *pDCTstat, u8 RcvrEnDly, u8 where, u8 Channel, u8 Receiver, u32 dev, u32 index_reg, u8 Addl_Index, u8 Pass/*, u8 *p*/)
479 {
480         /*
481          * Program final DqsRcvEnDly to additional index for DQS receiver
482          *  enabled delay
483          */
484         mct_SetRcvrEnDly_D(pDCTstat, RcvrEnDly, where, Channel, Receiver, dev, index_reg, Addl_Index, Pass);
485 }
486
487 static void mct_DisableDQSRcvEn_D(struct DCTStatStruc *pDCTstat)
488 {
489         u8 ch_end, ch;
490         u32 reg;
491         u32 dev;
492         u32 val;
493
494         dev = pDCTstat->dev_dct;
495         if (pDCTstat->GangedMode) {
496                 ch_end = 1;
497         } else {
498                 ch_end = 2;
499         }
500
501         for (ch=0; ch<ch_end; ch++) {
502                 reg = 0x78 + 0x100 * ch;
503                 val = Get_NB32(dev, reg);
504                 val &= ~(1 << DqsRcvEnTrain);
505                 Set_NB32(dev, reg, val);
506         }
507 }
508
509 /* mct_ModifyIndex_D
510  * Function only used once so it was inlined.
511  */
512
513 /* mct_GetInitFlag_D
514  * Function only used once so it was inlined.
515  */
516
517 void mct_SetRcvrEnDly_D(struct DCTStatStruc *pDCTstat, u8 RcvrEnDly,
518                         u8 FinalValue, u8 Channel, u8 Receiver, u32 dev,
519                         u32 index_reg, u8 Addl_Index, u8 Pass)
520 {
521         u32 index;
522         u8 i;
523         u8 *p;
524         u32 val;
525
526         if(RcvrEnDly == 0xFE) {
527                 /*set the boudary flag */
528                 pDCTstat->Status |= 1 << SB_DQSRcvLimit;
529         }
530
531         /* DimmOffset not needed for CH_D_B_RCVRDLY array */
532         for(i=0; i < 8; i++) {
533                 if(FinalValue) {
534                         /*calculate dimm offset */
535                         p = pDCTstat->CH_D_B_RCVRDLY[Channel][Receiver >> 1];
536                         RcvrEnDly = p[i];
537                 }
538
539                 /* if flag=0, set DqsRcvEn value to reg. */
540                 /* get the register index from table */
541                 index = Table_DQSRcvEn_Offset[i >> 1];
542                 index += Addl_Index;    /* DIMMx DqsRcvEn byte0 */
543                 val = Get_NB32_index_wait(dev, index_reg, index);
544                 if(i & 1) {
545                         /* odd byte lane */
546                         val &= ~(0xFF << 16);
547                         val |= (RcvrEnDly << 16);
548                 } else {
549                         /* even byte lane */
550                         val &= ~0xFF;
551                         val |= RcvrEnDly;
552                 }
553                 Set_NB32_index_wait(dev, index_reg, index, val);
554         }
555
556 }
557
558 static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u8 DQSRcvEnDly)
559 {
560         u32 dev;
561         u32 reg;
562         u16 SubTotal;
563         u32 index_reg;
564         u32 reg_off;
565         u32 val;
566         u32 valx;
567
568         if(pDCTstat->GangedMode)
569                 Channel = 0;
570
571         dev = pDCTstat->dev_dct;
572         reg_off = 0x100 * Channel;
573         index_reg = 0x98 + reg_off;
574
575         /* Multiply the CAS Latency by two to get a number of 1/2 MEMCLKs units.*/
576         val = Get_NB32(dev, 0x88 + reg_off);
577         SubTotal = ((val & 0x0f) + 4) << 1;     /* SubTotal is 1/2 Memclk unit */
578
579         /* If registered DIMMs are being used then
580          *  add 1 MEMCLK to the sub-total.
581          */
582         val = Get_NB32(dev, 0x90 + reg_off);
583         if(!(val & (1 << UnBuffDimm)))
584                 SubTotal += 2;
585
586         /* If the address prelaunch is setup for 1/2 MEMCLKs then
587          *  add 1, else add 2 to the sub-total.
588          *  if (AddrCmdSetup || CsOdtSetup || CkeSetup) then K := K + 2;
589          */
590         val = Get_NB32_index_wait(dev, index_reg, 0x04);
591         if(!(val & 0x00202020))
592                 SubTotal += 1;
593         else
594                 SubTotal += 2;
595
596         /* If the F2x[1, 0]78[RdPtrInit] field is 4, 5, 6 or 7 MEMCLKs,
597          * then add 4, 3, 2, or 1 MEMCLKs, respectively to the sub-total. */
598         val = Get_NB32(dev, 0x78 + reg_off);
599         SubTotal += 8 - (val & 0x0f);
600
601         /* Convert bits 7-5 (also referred to as the course delay) of
602          * the current (or worst case) DQS receiver enable delay to
603          * 1/2 MEMCLKs units, rounding up, and add this to the sub-total.
604          */
605         SubTotal += DQSRcvEnDly >> 5;   /*BOZO-no rounding up */
606
607         /* Add 5.5 to the sub-total. 5.5 represents part of the
608          * processor specific constant delay value in the DRAM
609          * clock domain.
610          */
611         SubTotal <<= 1;         /*scale 1/2 MemClk to 1/4 MemClk */
612         SubTotal += 11;         /*add 5.5 1/2MemClk */
613
614         /* Convert the sub-total (in 1/2 MEMCLKs) to northbridge
615          * clocks (NCLKs) as follows (assuming DDR400 and assuming
616          * that no P-state or link speed changes have occurred).
617          */
618
619         /* New formula:
620          * SubTotal *= 3*(Fn2xD4[NBFid]+4)/(3+Fn2x94[MemClkFreq])/2 */
621         val = Get_NB32(dev, 0x94 + reg_off);
622
623         /* SubTotal div 4 to scale 1/4 MemClk back to MemClk */
624         val &= 7;
625         if (val >= 3) {
626                 val <<= 1;
627         } else
628                 val += 3;
629         valx = val << 2;
630
631         val = Get_NB32(pDCTstat->dev_nbmisc, 0xD4);
632         SubTotal *= ((val & 0x1f) + 4 ) * 3;
633
634         SubTotal /= valx;
635         if (SubTotal % valx) {  /* round up */
636                 SubTotal++;
637         }
638
639         /* Add 5 NCLKs to the sub-total. 5 represents part of the
640          * processor specific constant value in the northbridge
641          * clock domain.
642          */
643         SubTotal += 5;
644
645         pDCTstat->CH_MaxRdLat[Channel] = SubTotal;
646         if(pDCTstat->GangedMode) {
647                 pDCTstat->CH_MaxRdLat[1] = SubTotal;
648         }
649
650         /* Program the F2x[1, 0]78[MaxRdLatency] register with
651          * the total delay value (in NCLKs).
652          */
653         reg = 0x78 + reg_off;
654         val = Get_NB32(dev, reg);
655         val &= ~(0x3ff << 22);
656         val |= (SubTotal & 0x3ff) << 22;
657
658         /* program MaxRdLatency to correspond with current delay */
659         Set_NB32(dev, reg, val);
660 }
661
662 static u8 mct_SavePassRcvEnDly_D(struct DCTStatStruc *pDCTstat,
663                         u8 rcvrEnDly, u8 Channel,
664                         u8 receiver, u8 Pass)
665 {
666         u8 i;
667         u8 mask_Saved, mask_Pass;
668         u8 *p;
669
670         /* calculate dimm offset
671          * not needed for CH_D_B_RCVRDLY array
672          */
673
674         /* cmp if there has new DqsRcvEnDly to be recorded */
675         mask_Pass = pDCTstat->DqsRcvEn_Pass;
676
677         if(Pass == SecondPass) {
678                 mask_Pass = ~mask_Pass;
679         }
680
681         mask_Saved = pDCTstat->DqsRcvEn_Saved;
682         if(mask_Pass != mask_Saved) {
683
684                 /* find desired stack offset according to channel/dimm/byte */
685                 if(Pass == SecondPass) {
686                         /* FIXME: SecondPass is never used for Barcelona p = pDCTstat->CH_D_B_RCVRDLY_1[Channel][receiver>>1]; */
687                         p = 0; /* Keep the compiler happy. */
688                 } else {
689                         mask_Saved &= mask_Pass;
690                         p = pDCTstat->CH_D_B_RCVRDLY[Channel][receiver>>1];
691                 }
692                 for(i=0; i < 8; i++) {
693                         /* cmp per byte lane */
694                         if(mask_Pass & (1 << i)) {
695                                 if(!(mask_Saved & (1 << i))) {
696                                         /* save RcvEnDly to stack, according to
697                                         the related Dimm/byte lane */
698                                         p[i] = (u8)rcvrEnDly;
699                                         mask_Saved |= 1 << i;
700                                 }
701                         }
702                 }
703                 pDCTstat->DqsRcvEn_Saved = mask_Saved;
704         }
705         return mct_SaveRcvEnDly_D_1Pass(pDCTstat, Pass);
706 }
707
708 static u8 mct_CompareTestPatternQW0_D(struct MCTStatStruc *pMCTstat,
709                                         struct DCTStatStruc *pDCTstat,
710                                         u32 addr, u8 channel,
711                                         u8 pattern, u8 Pass)
712 {
713         /* Compare only the first beat of data.  Since target addrs are cache
714          * line aligned, the Channel parameter is used to determine which
715          * cache QW to compare.
716          */
717
718         u8 *test_buf;
719         u8 i;
720         u8 result;
721         u8 value;
722
723         if(Pass == FirstPass) {
724                 if(pattern==1) {
725                         test_buf = (u8 *)TestPattern1_D;
726                 } else {
727                         test_buf = (u8 *)TestPattern0_D;
728                 }
729         } else {                /* Second Pass */
730                 test_buf = (u8 *)TestPattern2_D;
731         }
732
733         SetUpperFSbase(addr);
734         addr <<= 8;
735
736         if((pDCTstat->Status & (1<<SB_128bitmode)) && channel ) {
737                 addr += 8;      /* second channel */
738                 test_buf += 8;
739         }
740
741         print_debug_dqs_pair("\t\t\t\t\t\t  test_buf = ", (u32)test_buf, "  |  addr_lo = ", addr,  4);
742         for (i=0; i<8; i++, addr ++) {
743                 value = read32_fs(addr);
744                 print_debug_dqs_pair("\t\t\t\t\t\t\t\t ", test_buf[i], "  |  ", value, 4);
745
746                 if (value == test_buf[i]) {
747                         pDCTstat->DqsRcvEn_Pass |= (1<<i);
748                 } else {
749                         pDCTstat->DqsRcvEn_Pass &= ~(1<<i);
750                 }
751         }
752
753         result = DQS_FAIL;
754
755         if (Pass == FirstPass) {
756                 /* if first pass, at least one byte lane pass
757                  * ,then DQS_PASS=1 and will set to related reg.
758                  */
759                 if(pDCTstat->DqsRcvEn_Pass != 0) {
760                         result = DQS_PASS;
761                 } else {
762                         result = DQS_FAIL;
763                 }
764
765         } else {
766                 /* if second pass, at least one byte lane fail
767                  * ,then DQS_FAIL=1 and will set to related reg.
768                  */
769                 if(pDCTstat->DqsRcvEn_Pass != 0xFF) {
770                         result = DQS_FAIL;
771                 } else {
772                         result = DQS_PASS;
773                 }
774         }
775
776         /* if second pass, we can't find the fail until FFh,
777          * then let it fail to save the final delay
778          */
779         if((Pass == SecondPass) && (pDCTstat->Status & (1 << SB_DQSRcvLimit))) {
780                 result = DQS_FAIL;
781                 pDCTstat->DqsRcvEn_Pass = 0;
782         }
783
784         /* second pass needs to be inverted
785          * FIXME? this could be inverted in the above code to start with...
786          */
787         if(Pass == SecondPass) {
788                 if (result == DQS_PASS) {
789                         result = DQS_FAIL;
790                 } else if (result == DQS_FAIL) { /* FIXME: doesn't need to be else if */
791                         result = DQS_PASS;
792                 }
793         }
794
795
796         return result;
797 }
798
799 static void mct_InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat,
800                                 struct DCTStatStruc *pDCTstat)
801 {
802         /* Initialize the DQS Positions in preparation for
803          * Reciever Enable Training.
804          * Write Position is 1/2 Memclock Delay
805          * Read Position is 1/2 Memclock Delay
806          */
807         u8 i;
808         for(i=0;i<2; i++){
809                 InitDQSPos4RcvrEn_D(pMCTstat, pDCTstat, i);
810         }
811 }
812
813 static void InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat,
814                                 struct DCTStatStruc *pDCTstat, u8 Channel)
815 {
816         /* Initialize the DQS Positions in preparation for
817          * Reciever Enable Training.
818          * Write Position is no Delay
819          * Read Position is 1/2 Memclock Delay
820          */
821
822         u8 i, j;
823         u32 dword;
824         u8 dn = 4; /* TODO: Rev C could be 4 */
825         u32 dev = pDCTstat->dev_dct;
826         u32 index_reg = 0x98 + 0x100 * Channel;
827
828         /* FIXME: add Cx support */
829         dword = 0x00000000;
830         for(i=1; i<=3; i++) {
831                 for(j=0; j<dn; j++)
832                         /* DIMM0 Write Data Timing Low */
833                         /* DIMM0 Write ECC Timing */
834                         Set_NB32_index_wait(dev, index_reg, i + 0x100 * j, dword);
835         }
836
837         /* errata #180 */
838         dword = 0x2f2f2f2f;
839         for(i=5; i<=6; i++) {
840                 for(j=0; j<dn; j++)
841                         /* DIMM0 Read DQS Timing Control Low */
842                         Set_NB32_index_wait(dev, index_reg, i + 0x100 * j, dword);
843         }
844
845         dword = 0x0000002f;
846         for(j=0; j<dn; j++)
847                 /* DIMM0 Read DQS ECC Timing Control */
848                 Set_NB32_index_wait(dev, index_reg, 7 + 0x100 * j, dword);
849 }
850
851 void SetEccDQSRcvrEn_D(struct DCTStatStruc *pDCTstat, u8 Channel)
852 {
853         u32 dev;
854         u32 index_reg;
855         u32 index;
856         u8 ChipSel;
857         u8 *p;
858         u32 val;
859
860         dev = pDCTstat->dev_dct;
861         index_reg = 0x98 + Channel * 0x100;
862         index = 0x12;
863         p = pDCTstat->CH_D_BC_RCVRDLY[Channel];
864         print_debug_dqs("\t\tSetEccDQSRcvrPos: Channel ", Channel,  2);
865         for(ChipSel = 0; ChipSel < MAX_CS_SUPPORTED; ChipSel += 2) {
866                 val = p[ChipSel>>1];
867                 Set_NB32_index_wait(dev, index_reg, index, val);
868                 print_debug_dqs_pair("\t\tSetEccDQSRcvrPos: ChipSel ",
869                                         ChipSel, " rcvr_delay ",  val, 2);
870                 index += 3;
871         }
872 }
873
874 static void CalcEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat,
875                                 struct DCTStatStruc *pDCTstat, u8 Channel)
876 {
877         u8 ChipSel;
878         u16 EccDQSLike;
879         u8 EccDQSScale;
880         u32 val, val0, val1;
881
882         EccDQSLike = pDCTstat->CH_EccDQSLike[Channel];
883         EccDQSScale = pDCTstat->CH_EccDQSScale[Channel];
884
885         for (ChipSel = 0; ChipSel < MAX_CS_SUPPORTED; ChipSel += 2) {
886                 if(mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, ChipSel)) {
887                         u8 *p;
888                         p = pDCTstat->CH_D_B_RCVRDLY[Channel][ChipSel>>1];
889
890                         /* DQS Delay Value of Data Bytelane
891                          * most like ECC byte lane */
892                         val0 = p[EccDQSLike & 0x07];
893                         /* DQS Delay Value of Data Bytelane
894                          * 2nd most like ECC byte lane */
895                         val1 = p[(EccDQSLike>>8) & 0x07];
896
897                         if (!(pDCTstat->Status & (1 << SB_Registered))) {
898                                 if(val0 > val1) {
899                                         val = val0 - val1;
900                                 } else {
901                                         val = val1 - val0;
902                                 }
903
904                                 val *= ~EccDQSScale;
905                                 val >>= 8; /* /256 */
906
907                                 if(val0 > val1) {
908                                         val -= val1;
909                                 } else {
910                                         val += val0;
911                                 }
912                         } else {
913                                 val = val1 - val0;
914                                 val += val1;
915                         }
916
917                         pDCTstat->CH_D_BC_RCVRDLY[Channel][ChipSel>>1] = val;
918                 }
919         }
920         SetEccDQSRcvrEn_D(pDCTstat, Channel);
921 }
922
923 void mctSetEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat,
924                         struct DCTStatStruc *pDCTstatA)
925 {
926         u8 Node;
927         u8 i;
928
929         for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
930                 struct DCTStatStruc *pDCTstat;
931                 pDCTstat = pDCTstatA + Node;
932                 if (!pDCTstat->NodePresent)
933                         break;
934                 if (pDCTstat->DCTSysLimit) {
935                         for(i=0; i<2; i++)
936                                 CalcEccDQSRcvrEn_D(pMCTstat, pDCTstat, i);
937                 }
938         }
939 }
940
941 void phyAssistedMemFnceTraining(struct MCTStatStruc *pMCTstat,
942                         struct DCTStatStruc *pDCTstatA)
943 {
944         u8 Node = 0;
945         struct DCTStatStruc *pDCTstat;
946
947         /* FIXME: skip for Ax */
948         while (Node < MAX_NODES_SUPPORTED) {
949                 pDCTstat = pDCTstatA + Node;
950
951                 if(pDCTstat->DCTSysLimit) {
952                         fenceDynTraining_D(pMCTstat, pDCTstat, 0);
953                         fenceDynTraining_D(pMCTstat, pDCTstat, 1);
954                 }
955                 Node++;
956         }
957 }
958
959 static void fenceDynTraining_D(struct MCTStatStruc *pMCTstat,
960                         struct DCTStatStruc *pDCTstat, u8 dct)
961 {
962         u16 avRecValue;
963         u32 val;
964         u32 dev;
965         u32 index_reg = 0x98 + 0x100 * dct;
966         u32 index;
967
968         /* BIOS first programs a seed value to the phase recovery engine
969          *  (recommended 19) registers.
970          * Dram Phase Recovery Control Register (F2x[1,0]9C_x[51:50] and
971          * F2x[1,0]9C_x52.) .
972          */
973         dev = pDCTstat->dev_dct;
974         for (index = 0x50; index <= 0x52; index ++) {
975                 val = Get_NB32_index_wait(dev, index_reg, index) & ~0xFF;
976                 val |= (FenceTrnFinDlySeed & 0x1F);
977                 if (index != 0x52) {
978                         val &= ~(0xFF << 8);
979                         val |= (val & 0xFF) << 8;
980                         val &= 0xFFFF;
981                         val |= val << 16;
982                 }
983                 Set_NB32_index_wait(dev, index_reg, index, val);
984         }
985
986         /* Set F2x[1,0]9C_x08[PhyFenceTrEn]=1. */
987         val = Get_NB32_index_wait(dev, index_reg, 0x08);
988         val |= 1 << PhyFenceTrEn;
989         Set_NB32_index_wait(dev, index_reg, 0x08, val);
990
991         /* Wait 200 MEMCLKs. */
992         mct_Wait(50000);                /* wait 200us */
993
994         /* Clear F2x[1,0]9C_x08[PhyFenceTrEn]=0. */
995         val = Get_NB32_index_wait(dev, index_reg, 0x08);
996         val &= ~(1 << PhyFenceTrEn);
997         Set_NB32_index_wait(dev, index_reg, 0x08, val);
998
999         /* BIOS reads the phase recovery engine registers
1000          * F2x[1,0]9C_x[51:50] and F2x[1,0]9C_x52. */
1001         avRecValue = 0;
1002         for (index = 0x50; index <= 0x52; index ++) {
1003                 val = Get_NB32_index_wait(dev, index_reg, index);
1004                 avRecValue += val & 0x7F;
1005                 if (index != 0x52) {
1006                         avRecValue += (val >> 8) & 0x7F;
1007                         avRecValue += (val >> 16) & 0x7F;
1008                         avRecValue += (val >> 24) & 0x7F;
1009                 }
1010         }
1011
1012         val = avRecValue / 9;
1013         if (avRecValue % 9)
1014                 val++;
1015         avRecValue = val;
1016
1017         /* Write the (averaged value -8) to F2x[1,0]9C_x0C[PhyFence]. */
1018         /* inlined mct_AdjustFenceValue() */
1019         /* The RBC0 is not supported. */
1020         /* if (pDCTstat->LogicalCPUID & AMD_RB_C0)
1021                 avRecValue -= 3;
1022         else
1023         */
1024         if (pDCTstat->LogicalCPUID & AMD_DR_Cx)
1025                 avRecValue -= 8;
1026         else if (pDCTstat->LogicalCPUID & AMD_DR_Bx)
1027                 avRecValue -= 8;
1028
1029         val = Get_NB32_index_wait(dev, index_reg, 0x0C);
1030         val &= ~(0x1F << 16);
1031         val |= (avRecValue & 0x1F) << 16;
1032         Set_NB32_index_wait(dev, index_reg, 0x0C, val);
1033
1034         /* Rewrite F2x[1,0]9C_x04-DRAM Address/Command Timing Control Register
1035          * delays (both channels). */
1036         val = Get_NB32_index_wait(dev, index_reg, 0x04);
1037         Set_NB32_index_wait(dev, index_reg, 0x04, val);
1038 }
1039
1040 void mct_Wait(u32 cycles)
1041 {
1042         u32 saved;
1043         u32 hi, lo, msr;
1044
1045         /* Wait # of 50ns cycles
1046            This seems like a hack to me...  */
1047
1048         cycles <<= 3;           /* x8 (number of 1.25ns ticks) */
1049
1050         msr = 0x10;                     /* TSC */
1051         _RDMSR(msr, &lo, &hi);
1052         saved = lo;
1053         do {
1054                 _RDMSR(msr, &lo, &hi);
1055         } while (lo - saved < cycles );
1056 }