More explicite and straight way to set seed.
[coreboot.git] / src / northbridge / amd / amdmct / mct / mctsrc.c
1 /*
2  * This file is part of the coreboot project.
3  *
4  * Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
19  */
20
21 /******************************************************************************
22  Description: Receiver En and DQS Timing Training feature for DDR 2 MCT
23 ******************************************************************************/
24
25 static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat,
26                                 struct DCTStatStruc *pDCTstat, u8 Pass);
27 static u8 mct_SavePassRcvEnDly_D(struct DCTStatStruc *pDCTstat,
28                                         u8 rcvrEnDly, u8 Channel,
29                                         u8 receiver, u8 Pass);
30 static u8 mct_CompareTestPatternQW0_D(struct MCTStatStruc *pMCTstat,
31                                         struct DCTStatStruc *pDCTstat,
32                                         u32 addr, u8 channel,
33                                         u8 pattern, u8 Pass);
34 static void mct_InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat,
35                                          struct DCTStatStruc *pDCTstat);
36 static void InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat,
37                                 struct DCTStatStruc *pDCTstat, u8 Channel);
38 static void CalcEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat,
39                                 struct DCTStatStruc *pDCTstat, u8 Channel);
40 static void mct_SetFinalRcvrEnDly_D(struct DCTStatStruc *pDCTstat,
41                                 u8 RcvrEnDly, u8 where,
42                                 u8 Channel, u8 Receiver,
43                                 u32 dev, u32 index_reg,
44                                 u8 Addl_Index, u8 Pass);
45 static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u8 DQSRcvEnDly);
46 static void fenceDynTraining_D(struct MCTStatStruc *pMCTstat,
47                         struct DCTStatStruc *pDCTstat, u8 dct);
48 static void mct_DisableDQSRcvEn_D(struct DCTStatStruc *pDCTstat);
49
50
51 /* Warning:  These must be located so they do not cross a logical 16-bit
52    segment boundary! */
53 const static u32 TestPattern0_D[] = {
54         0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
55         0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
56         0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
57         0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
58 };
59 const static u32 TestPattern1_D[] = {
60         0x55555555, 0x55555555, 0x55555555, 0x55555555,
61         0x55555555, 0x55555555, 0x55555555, 0x55555555,
62         0x55555555, 0x55555555, 0x55555555, 0x55555555,
63         0x55555555, 0x55555555, 0x55555555, 0x55555555,
64 };
65 const static u32 TestPattern2_D[] = {
66         0x12345678, 0x87654321, 0x23456789, 0x98765432,
67         0x59385824, 0x30496724, 0x24490795, 0x99938733,
68         0x40385642, 0x38465245, 0x29432163, 0x05067894,
69         0x12349045, 0x98723467, 0x12387634, 0x34587623,
70 };
71
72 static void SetupRcvrPattern(struct MCTStatStruc *pMCTstat,
73                 struct DCTStatStruc *pDCTstat, u32 *buffer, u8 pass)
74 {
75         /*
76          * 1. Copy the alpha and Beta patterns from ROM to Cache,
77          *     aligning on 16 byte boundary
78          * 2. Set the ptr to DCTStatstruc.PtrPatternBufA for Alpha
79          * 3. Set the ptr to DCTStatstruc.PtrPatternBufB for Beta
80          */
81
82         u32 *buf_a;
83         u32 *buf_b;
84         u32 *p_A;
85         u32 *p_B;
86         u8 i;
87
88         buf_a = (u32 *)(((u32)buffer + 0x10) & (0xfffffff0));
89         buf_b = buf_a + 32; //??
90         p_A = (u32 *)SetupDqsPattern_1PassB(pass);
91         p_B = (u32 *)SetupDqsPattern_1PassA(pass);
92
93         for(i=0;i<16;i++) {
94                 buf_a[i] = p_A[i];
95                 buf_b[i] = p_B[i];
96         }
97
98         pDCTstat->PtrPatternBufA = (u32)buf_a;
99         pDCTstat->PtrPatternBufB = (u32)buf_b;
100 }
101
102
103 void mct_TrainRcvrEn_D(struct MCTStatStruc *pMCTstat,
104                         struct DCTStatStruc *pDCTstat, u8 Pass)
105 {
106         if(mct_checkNumberOfDqsRcvEn_1Pass(Pass))
107                 dqsTrainRcvrEn_SW(pMCTstat, pDCTstat, Pass);
108 }
109
110
111 static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat,
112                                 struct DCTStatStruc *pDCTstat, u8 Pass)
113 {
114         u8 Channel, RcvrEnDly, RcvrEnDlyRmin;
115         u8 Test0, Test1, CurrTest, CurrTestSide0, CurrTestSide1;
116         u8 CTLRMaxDelay, _2Ranks, PatternA, PatternB;
117         u8 Addl_Index = 0;
118         u8 Receiver;
119         u8 _DisableDramECC = 0, _Wrap32Dis = 0, _SSE2 = 0;
120         u8 RcvrEnDlyLimit, Final_Value, MaxDelay_CH[2];
121         u32 TestAddr0, TestAddr1, TestAddr0B, TestAddr1B;
122         u32 PatternBuffer[64+4]; /* FIXME: need increase 8? */
123         u32 Errors;
124
125         u32 val;
126         u32 reg;
127         u32 dev;
128         u32 index_reg;
129         u32 ch_start, ch_end, ch;
130         u32 msr;
131         u32 cr4;
132         u32 lo, hi;
133
134         u8 valid;
135         u32 tmp;
136         u8 LastTest;
137
138         print_debug_dqs("\nTrainRcvEn: Node", pDCTstat->Node_ID, 0);
139         print_debug_dqs("TrainRcvEn: Pass", Pass, 0);
140
141
142         dev = pDCTstat->dev_dct;
143         ch_start = 0;
144         if(!pDCTstat->GangedMode) {
145                 ch_end = 2;
146         } else {
147                 ch_end = 1;
148         }
149
150         for (ch = ch_start; ch < ch_end; ch++) {
151                 reg = 0x78 + (0x100 * ch);
152                 val = Get_NB32(dev, reg);
153                 val &= ~(0x3ff << 22);
154                 val |= (0x0c8 << 22);           /* Max Rd Lat */
155                 Set_NB32(dev, reg, val);
156         }
157
158         Final_Value = 1;
159         if (Pass == FirstPass) {
160                 mct_InitDQSPos4RcvrEn_D(pMCTstat, pDCTstat);
161         } else {
162                 pDCTstat->DimmTrainFail = 0;
163                 pDCTstat->CSTrainFail = ~pDCTstat->CSPresent;
164         }
165         print_t("TrainRcvrEn: 1\n");
166
167         cr4 = read_cr4();
168         if(cr4 & ( 1 << 9)) {   /* save the old value */
169                 _SSE2 = 1;
170         }
171         cr4 |= (1 << 9);        /* OSFXSR enable SSE2 */
172         write_cr4(cr4);
173         print_t("TrainRcvrEn: 2\n");
174
175         msr = HWCR;
176         _RDMSR(msr, &lo, &hi);
177         //FIXME: Why use SSEDIS
178         if(lo & (1 << 17)) {    /* save the old value */
179                 _Wrap32Dis = 1;
180         }
181         lo |= (1 << 17);        /* HWCR.wrap32dis */
182         lo &= ~(1 << 15);       /* SSEDIS */
183         _WRMSR(msr, lo, hi);    /* Setting wrap32dis allows 64-bit memory references in real mode */
184         print_t("TrainRcvrEn: 3\n");
185
186         _DisableDramECC = mct_DisableDimmEccEn_D(pMCTstat, pDCTstat);
187
188
189         if(pDCTstat->Speed == 1) {
190                 pDCTstat->T1000 = 5000; /* get the T1000 figure (cycle time (ns)*1K */
191         } else if(pDCTstat->Speed == 2) {
192                 pDCTstat->T1000 = 3759;
193         } else if(pDCTstat->Speed == 3) {
194                 pDCTstat->T1000 = 3003;
195         } else if(pDCTstat->Speed == 4) {
196                 pDCTstat->T1000 = 2500;
197         } else if(pDCTstat->Speed  == 5) {
198                 pDCTstat->T1000 = 1876;
199         } else {
200                 pDCTstat->T1000 = 0;
201         }
202
203         SetupRcvrPattern(pMCTstat, pDCTstat, PatternBuffer, Pass);
204         print_t("TrainRcvrEn: 4\n");
205
206         Errors = 0;
207         dev = pDCTstat->dev_dct;
208         CTLRMaxDelay = 0;
209
210         for (Channel = 0; Channel < 2; Channel++) {
211                 print_debug_dqs("\tTrainRcvEn51: Node ", pDCTstat->Node_ID, 1);
212                 print_debug_dqs("\tTrainRcvEn51: Channel ", Channel, 1);
213                 pDCTstat->Channel = Channel;
214
215                 MaxDelay_CH[Channel] = 0;
216                 index_reg = 0x98 + 0x100 * Channel;
217
218                 Receiver = mct_InitReceiver_D(pDCTstat, Channel);
219                 /* There are four receiver pairs, loosely associated with chipselects. */
220                 for (; Receiver < 8; Receiver += 2) {
221                         Addl_Index = (Receiver >> 1) * 3 + 0x10;
222                         LastTest = DQS_FAIL;
223
224                         /* mct_ModifyIndex_D */
225                         RcvrEnDlyRmin = RcvrEnDlyLimit = 0xff;
226
227                         print_debug_dqs("\t\tTrainRcvEnd52: index ", Addl_Index, 2);
228
229                         if(!mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, Receiver)) {
230                                 print_t("\t\t\tRank not enabled_D\n");
231                                 continue;
232                         }
233
234                         TestAddr0 = mct_GetRcvrSysAddr_D(pMCTstat, pDCTstat, Channel, Receiver, &valid);
235                         if(!valid) {    /* Address not supported on current CS */
236                                 print_t("\t\t\tAddress not supported on current CS\n");
237                                 continue;
238                         }
239
240                         TestAddr0B = TestAddr0 + (BigPagex8_RJ8 << 3);
241
242                         if(mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, Receiver+1)) {
243                                 TestAddr1 = mct_GetRcvrSysAddr_D(pMCTstat, pDCTstat, Channel, Receiver+1, &valid);
244                                 if(!valid) {    /* Address not supported on current CS */
245                                         print_t("\t\t\tAddress not supported on current CS+1\n");
246                                         continue;
247                                 }
248                                 TestAddr1B = TestAddr1 + (BigPagex8_RJ8 << 3);
249                                 _2Ranks = 1;
250                         } else {
251                                 _2Ranks = TestAddr1 = TestAddr1B = 0;
252                         }
253
254                         print_debug_dqs("\t\tTrainRcvEn53: TestAddr0 ", TestAddr0, 2);
255                         print_debug_dqs("\t\tTrainRcvEn53: TestAddr0B ", TestAddr0B, 2);
256                         print_debug_dqs("\t\tTrainRcvEn53: TestAddr1 ", TestAddr1, 2);
257                         print_debug_dqs("\t\tTrainRcvEn53: TestAddr1B ", TestAddr1B, 2);
258
259                         /*
260                          * Get starting RcvrEnDly value
261                          */
262                         RcvrEnDly = mct_Get_Start_RcvrEnDly_1Pass(Pass);
263
264                         /* mct_GetInitFlag_D*/
265                         if (Pass == FirstPass) {
266                                 pDCTstat->DqsRcvEn_Pass = 0;
267                         } else {
268                                 pDCTstat->DqsRcvEn_Pass=0xFF;
269                         }
270                         pDCTstat->DqsRcvEn_Saved = 0;
271
272
273                         while(RcvrEnDly < RcvrEnDlyLimit) {     /* sweep Delay value here */
274                                 print_debug_dqs("\t\t\tTrainRcvEn541: RcvrEnDly ", RcvrEnDly, 3);
275
276                                 /* callback not required
277                                 if(mct_AdjustDelay_D(pDCTstat, RcvrEnDly))
278                                         goto skipDly;
279                                 */
280
281                                 /* Odd steps get another pattern such that even
282                                  and odd steps alternate. The pointers to the
283                                  patterns will be swaped at the end of the loop
284                                  so that they correspond. */
285                                 if(RcvrEnDly & 1) {
286                                         PatternA = 1;
287                                         PatternB = 0;
288                                 } else {
289                                         /* Even step */
290                                         PatternA = 0;
291                                         PatternB = 1;
292                                 }
293
294                                 mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0, PatternA); /* rank 0 of DIMM, testpattern 0 */
295                                 mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0B, PatternB); /* rank 0 of DIMM, testpattern 1 */
296                                 if(_2Ranks) {
297                                         mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1, PatternA); /*rank 1 of DIMM, testpattern 0 */
298                                         mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1B, PatternB); /*rank 1 of DIMM, testpattern 1 */
299                                 }
300
301                                 mct_SetRcvrEnDly_D(pDCTstat, RcvrEnDly, 0, Channel, Receiver, dev, index_reg, Addl_Index, Pass);
302
303                                 CurrTest = DQS_FAIL;
304                                 CurrTestSide0 = DQS_FAIL;
305                                 CurrTestSide1 = DQS_FAIL;
306
307                                 mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0); /*cache fills */
308                                 Test0 = mct_CompareTestPatternQW0_D(pMCTstat, pDCTstat, TestAddr0, Channel, PatternA, Pass);/* ROM vs cache compare */
309                                 proc_IOCLFLUSH_D(TestAddr0);
310                                 ResetDCTWrPtr_D(dev, index_reg, Addl_Index);
311
312                                 print_debug_dqs("\t\t\tTrainRcvEn542: Test0 result ", Test0, 3);
313
314                                 // != 0x00 mean pass
315
316                                 if(Test0 == DQS_PASS) {
317                                         mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0B);        /*cache fills */
318                                         /* ROM vs cache compare */
319                                         Test1 = mct_CompareTestPatternQW0_D(pMCTstat, pDCTstat, TestAddr0B, Channel, PatternB, Pass);
320                                         proc_IOCLFLUSH_D(TestAddr0B);
321                                         ResetDCTWrPtr_D(dev, index_reg, Addl_Index);
322
323                                         print_debug_dqs("\t\t\tTrainRcvEn543: Test1 result ", Test1, 3);
324
325                                         if(Test1 == DQS_PASS) {
326                                                 CurrTestSide0 = DQS_PASS;
327                                         }
328                                 }
329                                 if(_2Ranks) {
330                                         mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1); /*cache fills */
331                                         /* ROM vs cache compare */
332                                         Test0 = mct_CompareTestPatternQW0_D(pMCTstat, pDCTstat, TestAddr1, Channel, PatternA, Pass);
333                                         proc_IOCLFLUSH_D(TestAddr1);
334                                         ResetDCTWrPtr_D(dev, index_reg, Addl_Index);
335
336                                         print_debug_dqs("\t\t\tTrainRcvEn544: Test0 result ", Test0, 3);
337
338                                         if(Test0 == DQS_PASS) {
339                                                 mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1B);        /*cache fills */
340                                                 /* ROM vs cache compare */
341                                                 Test1 = mct_CompareTestPatternQW0_D(pMCTstat, pDCTstat, TestAddr1B, Channel, PatternB, Pass);
342                                                 proc_IOCLFLUSH_D(TestAddr1B);
343                                                 ResetDCTWrPtr_D(dev, index_reg, Addl_Index);
344
345                                                 print_debug_dqs("\t\t\tTrainRcvEn545: Test1 result ", Test1, 3);
346                                                 if(Test1 == DQS_PASS) {
347                                                         CurrTestSide1 = DQS_PASS;
348                                                 }
349                                         }
350                                 }
351
352                                 if(_2Ranks) {
353                                         if ((CurrTestSide0 == DQS_PASS) && (CurrTestSide1 == DQS_PASS)) {
354                                                 CurrTest = DQS_PASS;
355                                         }
356                                 } else if (CurrTestSide0 == DQS_PASS) {
357                                         CurrTest = DQS_PASS;
358                                 }
359
360
361                                 /* record first pass DqsRcvEn to stack */
362                                 valid = mct_SavePassRcvEnDly_D(pDCTstat, RcvrEnDly, Channel, Receiver, Pass);
363
364                                 /* Break(1:RevF,2:DR) or not(0) FIXME: This comment deosn't make sense */
365                                 if(valid == 2 || (LastTest == DQS_FAIL && valid == 1)) {
366                                         RcvrEnDlyRmin = RcvrEnDly;
367                                         break;
368                                 }
369
370                                 LastTest = CurrTest;
371
372                                 /* swap the rank 0 pointers */
373                                 tmp = TestAddr0;
374                                 TestAddr0 = TestAddr0B;
375                                 TestAddr0B = tmp;
376
377                                 /* swap the rank 1 pointers */
378                                 tmp = TestAddr1;
379                                 TestAddr1 = TestAddr1B;
380                                 TestAddr1B = tmp;
381
382                                 print_debug_dqs("\t\t\tTrainRcvEn56: RcvrEnDly ", RcvrEnDly, 3);
383
384                                 RcvrEnDly++;
385
386                         }       /* while RcvrEnDly */
387
388                         print_debug_dqs("\t\tTrainRcvEn61: RcvrEnDly ", RcvrEnDly, 2);
389                         print_debug_dqs("\t\tTrainRcvEn61: RcvrEnDlyRmin ", RcvrEnDlyRmin, 3);
390                         print_debug_dqs("\t\tTrainRcvEn61: RcvrEnDlyLimit ", RcvrEnDlyLimit, 3);
391                         if(RcvrEnDlyRmin == RcvrEnDlyLimit) {
392                                 /* no passing window */
393                                 pDCTstat->ErrStatus |= 1 << SB_NORCVREN;
394                                 Errors |= 1 << SB_NORCVREN;
395                                 pDCTstat->ErrCode = SC_FatalErr;
396                         }
397
398                         if(RcvrEnDly > (RcvrEnDlyLimit - 1)) {
399                                 /* passing window too narrow, too far delayed*/
400                                 pDCTstat->ErrStatus |= 1 << SB_SmallRCVR;
401                                 Errors |= 1 << SB_SmallRCVR;
402                                 pDCTstat->ErrCode = SC_FatalErr;
403                                 RcvrEnDly = RcvrEnDlyLimit - 1;
404                                 pDCTstat->CSTrainFail |= 1 << Receiver;
405                                 pDCTstat->DimmTrainFail |= 1 << (Receiver + Channel);
406                         }
407
408                         // CHB_D0_B0_RCVRDLY set in mct_Average_RcvrEnDly_Pass
409                         mct_Average_RcvrEnDly_Pass(pDCTstat, RcvrEnDly, RcvrEnDlyLimit, Channel, Receiver, Pass);
410
411                         mct_SetFinalRcvrEnDly_D(pDCTstat, RcvrEnDly, Final_Value, Channel, Receiver, dev, index_reg, Addl_Index, Pass);
412
413                         if(pDCTstat->ErrStatus & (1 << SB_SmallRCVR)) {
414                                 Errors |= 1 << SB_SmallRCVR;
415                         }
416
417                         RcvrEnDly += Pass1MemClkDly;
418                         if(RcvrEnDly > CTLRMaxDelay) {
419                                 CTLRMaxDelay = RcvrEnDly;
420                         }
421
422                 }       /* while Receiver */
423
424                 MaxDelay_CH[Channel] = CTLRMaxDelay;
425         }       /* for Channel */
426
427         CTLRMaxDelay = MaxDelay_CH[0];
428         if (MaxDelay_CH[1] > CTLRMaxDelay)
429                 CTLRMaxDelay = MaxDelay_CH[1];
430
431         for (Channel = 0; Channel < 2; Channel++) {
432                 mct_SetMaxLatency_D(pDCTstat, Channel, CTLRMaxDelay); /* program Ch A/B MaxAsyncLat to correspond with max delay */
433         }
434
435         ResetDCTWrPtr_D(dev, index_reg, Addl_Index);
436
437         if(_DisableDramECC) {
438                 mct_EnableDimmEccEn_D(pMCTstat, pDCTstat, _DisableDramECC);
439         }
440
441         if (Pass == FirstPass) {
442                 /*Disable DQSRcvrEn training mode */
443                 print_t("TrainRcvrEn: mct_DisableDQSRcvEn_D\n");
444                 mct_DisableDQSRcvEn_D(pDCTstat);
445         }
446
447         if(!_Wrap32Dis) {
448                 msr = HWCR;
449                 _RDMSR(msr, &lo, &hi);
450                 lo &= ~(1<<17);         /* restore HWCR.wrap32dis */
451                 _WRMSR(msr, lo, hi);
452         }
453         if(!_SSE2){
454                 cr4 = read_cr4();
455                 cr4 &= ~(1<<9);         /* restore cr4.OSFXSR */
456                 write_cr4(cr4);
457         }
458
459 #if DQS_TRAIN_DEBUG > 0
460         {
461                 u8 Channel;
462                 print_debug("TrainRcvrEn: CH_MaxRdLat:\n");
463                 for(Channel = 0; Channel<2; Channel++) {
464                         print_debug("Channel:"); print_debug_hex8(Channel);
465                         print_debug(": ");
466                         print_debug_hex8( pDCTstat->CH_MaxRdLat[Channel] );
467                         print_debug("\n");
468                 }
469         }
470 #endif
471
472 #if DQS_TRAIN_DEBUG > 0
473         {
474                 u8 val;
475                 u8 Channel, Receiver;
476                 u8 i;
477                 u8 *p;
478
479                 print_debug("TrainRcvrEn: CH_D_B_RCVRDLY:\n");
480                 for(Channel = 0; Channel < 2; Channel++) {
481                         print_debug("Channel:"); print_debug_hex8(Channel); print_debug("\n");
482                         for(Receiver = 0; Receiver<8; Receiver+=2) {
483                                 print_debug("\t\tReceiver:");
484                                 print_debug_hex8(Receiver);
485                                 p = pDCTstat->CH_D_B_RCVRDLY[Channel][Receiver>>1];
486                                 print_debug(": ");
487                                 for (i=0;i<8; i++) {
488                                         val  = p[i];
489                                         print_debug_hex8(val);
490                                         print_debug(" ");
491                                 }
492                         print_debug("\n");
493                         }
494                 }
495         }
496 #endif
497
498         print_tx("TrainRcvrEn: Status ", pDCTstat->Status);
499         print_tx("TrainRcvrEn: ErrStatus ", pDCTstat->ErrStatus);
500         print_tx("TrainRcvrEn: ErrCode ", pDCTstat->ErrCode);
501         print_t("TrainRcvrEn: Done\n");
502 }
503
504
505 u8 mct_InitReceiver_D(struct DCTStatStruc *pDCTstat, u8 dct)
506 {
507         if (pDCTstat->DIMMValidDCT[dct] == 0 ) {
508                 return 8;
509         } else {
510                 return 0;
511         }
512 }
513
514
515 static void mct_SetFinalRcvrEnDly_D(struct DCTStatStruc *pDCTstat, u8 RcvrEnDly, u8 where, u8 Channel, u8 Receiver, u32 dev, u32 index_reg, u8 Addl_Index, u8 Pass/*, u8 *p*/)
516 {
517         /*
518          * Program final DqsRcvEnDly to additional index for DQS receiver
519          *  enabled delay
520          */
521         mct_SetRcvrEnDly_D(pDCTstat, RcvrEnDly, where, Channel, Receiver, dev, index_reg, Addl_Index, Pass);
522 }
523
524
525 static void mct_DisableDQSRcvEn_D(struct DCTStatStruc *pDCTstat)
526 {
527         u8 ch_end, ch;
528         u32 reg;
529         u32 dev;
530         u32 val;
531
532         dev = pDCTstat->dev_dct;
533         if (pDCTstat->GangedMode) {
534                 ch_end = 1;
535         } else {
536                 ch_end = 2;
537         }
538
539         for (ch=0; ch<ch_end; ch++) {
540                 reg = 0x78 + 0x100 * ch;
541                 val = Get_NB32(dev, reg);
542                 val &= ~(1 << DqsRcvEnTrain);
543                 Set_NB32(dev, reg, val);
544         }
545 }
546
547
548 /* mct_ModifyIndex_D
549  * Function only used once so it was inlined.
550  */
551
552
553 /* mct_GetInitFlag_D
554  * Function only used once so it was inlined.
555  */
556
557
558 void mct_SetRcvrEnDly_D(struct DCTStatStruc *pDCTstat, u8 RcvrEnDly,
559                         u8 FinalValue, u8 Channel, u8 Receiver, u32 dev,
560                         u32 index_reg, u8 Addl_Index, u8 Pass)
561 {
562         u32 index;
563         u8 i;
564         u8 *p;
565         u32 val;
566
567         if(RcvrEnDly == 0xFE) {
568                 /*set the boudary flag */
569                 pDCTstat->Status |= 1 << SB_DQSRcvLimit;
570         }
571
572         /* DimmOffset not needed for CH_D_B_RCVRDLY array */
573
574
575         for(i=0; i < 8; i++) {
576                 if(FinalValue) {
577                         /*calculate dimm offset */
578                         p = pDCTstat->CH_D_B_RCVRDLY[Channel][Receiver >> 1];
579                         RcvrEnDly = p[i];
580                 }
581
582                 /* if flag=0, set DqsRcvEn value to reg. */
583                 /* get the register index from table */
584                 index = Table_DQSRcvEn_Offset[i >> 1];
585                 index += Addl_Index;    /* DIMMx DqsRcvEn byte0 */
586                 val = Get_NB32_index_wait(dev, index_reg, index);
587                 if(i & 1) {
588                         /* odd byte lane */
589                         val &= ~(0xFF << 16);
590                         val |= (RcvrEnDly << 16);
591                 } else {
592                         /* even byte lane */
593                         val &= ~0xFF;
594                         val |= RcvrEnDly;
595                 }
596                 Set_NB32_index_wait(dev, index_reg, index, val);
597         }
598
599 }
600
601 static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u8 DQSRcvEnDly)
602 {
603         u32 dev;
604         u32 reg;
605         u16 SubTotal;
606         u32 index_reg;
607         u32 reg_off;
608         u32 val;
609         u32 valx;
610
611         if(pDCTstat->GangedMode)
612                 Channel = 0;
613
614         dev = pDCTstat->dev_dct;
615         reg_off = 0x100 * Channel;
616         index_reg = 0x98 + reg_off;
617
618         /* Multiply the CAS Latency by two to get a number of 1/2 MEMCLKs units.*/
619         val = Get_NB32(dev, 0x88 + reg_off);
620         SubTotal = ((val & 0x0f) + 1) << 1;     /* SubTotal is 1/2 Memclk unit */
621
622         /* If registered DIMMs are being used then
623          *  add 1 MEMCLK to the sub-total.
624          */
625         val = Get_NB32(dev, 0x90 + reg_off);
626         if(!(val & (1 << UnBuffDimm)))
627                 SubTotal += 2;
628
629         /* If the address prelaunch is setup for 1/2 MEMCLKs then
630          *  add 1, else add 2 to the sub-total.
631          *  if (AddrCmdSetup || CsOdtSetup || CkeSetup) then K := K + 2;
632          */
633         val = Get_NB32_index_wait(dev, index_reg, 0x04);
634         if(!(val & 0x00202020))
635                 SubTotal += 1;
636         else
637                 SubTotal += 2;
638
639         /* If the F2x[1, 0]78[RdPtrInit] field is 4, 5, 6 or 7 MEMCLKs,
640          * then add 4, 3, 2, or 1 MEMCLKs, respectively to the sub-total. */
641         val = Get_NB32(dev, 0x78 + reg_off);
642         SubTotal += 8 - (val & 0x0f);
643
644         /* Convert bits 7-5 (also referred to as the course delay) of
645          * the current (or worst case) DQS receiver enable delay to
646          * 1/2 MEMCLKs units, rounding up, and add this to the sub-total.
647          */
648         SubTotal += DQSRcvEnDly >> 5;   /*BOZO-no rounding up */
649
650         /* Add 5.5 to the sub-total. 5.5 represents part of the
651          * processor specific constant delay value in the DRAM
652          * clock domain.
653          */
654         SubTotal <<= 1;         /*scale 1/2 MemClk to 1/4 MemClk */
655         SubTotal += 11;         /*add 5.5 1/2MemClk */
656
657         /* Convert the sub-total (in 1/2 MEMCLKs) to northbridge
658          * clocks (NCLKs) as follows (assuming DDR400 and assuming
659          * that no P-state or link speed changes have occurred).
660          */
661
662         /* New formula:
663          * SubTotal *= 3*(Fn2xD4[NBFid]+4)/(3+Fn2x94[MemClkFreq])/2 */
664         val = Get_NB32(dev, 0x94 + reg_off);
665
666         /* SubTotal div 4 to scale 1/4 MemClk back to MemClk */
667         val &= 7;
668         if (val == 4) {
669                 val++;          /* adjust for DDR2-1066 */
670         }
671         valx = (val + 3) << 2;
672
673         val = Get_NB32(pDCTstat->dev_nbmisc, 0xD4);
674         SubTotal *= ((val & 0x1f) + 4 ) * 3;
675
676         SubTotal /= valx;
677         if (SubTotal % valx) {  /* round up */
678                 SubTotal++;
679         }
680
681         /* Add 5 NCLKs to the sub-total. 5 represents part of the
682          * processor specific constant value in the northbridge
683          * clock domain.
684          */
685         SubTotal += 5;
686
687         pDCTstat->CH_MaxRdLat[Channel] = SubTotal;
688         if(pDCTstat->GangedMode) {
689                 pDCTstat->CH_MaxRdLat[1] = SubTotal;
690         }
691
692         /* Program the F2x[1, 0]78[MaxRdLatency] register with
693          * the total delay value (in NCLKs).
694          */
695
696         reg = 0x78 + reg_off;
697         val = Get_NB32(dev, reg);
698         val &= ~(0x3ff << 22);
699         val |= (SubTotal & 0x3ff) << 22;
700
701         /* program MaxRdLatency to correspond with current delay */
702         Set_NB32(dev, reg, val);
703 }
704
705
706 static u8 mct_SavePassRcvEnDly_D(struct DCTStatStruc *pDCTstat,
707                         u8 rcvrEnDly, u8 Channel,
708                         u8 receiver, u8 Pass)
709 {
710         u8 i;
711         u8 mask_Saved, mask_Pass;
712         u8 *p;
713
714         /* calculate dimm offset
715          * not needed for CH_D_B_RCVRDLY array
716          */
717
718         /* cmp if there has new DqsRcvEnDly to be recorded */
719         mask_Pass = pDCTstat->DqsRcvEn_Pass;
720
721         if(Pass == SecondPass) {
722                 mask_Pass = ~mask_Pass;
723         }
724
725         mask_Saved = pDCTstat->DqsRcvEn_Saved;
726         if(mask_Pass != mask_Saved) {
727
728                 /* find desired stack offset according to channel/dimm/byte */
729                 if(Pass == SecondPass) {
730                         // FIXME: SecondPass is never used for Barcelona p = pDCTstat->CH_D_B_RCVRDLY_1[Channel][receiver>>1];
731                         p = 0; // Keep the compiler happy.
732                 } else {
733                         mask_Saved &= mask_Pass;
734                         p = pDCTstat->CH_D_B_RCVRDLY[Channel][receiver>>1];
735                 }
736                 for(i=0; i < 8; i++) {
737                         /* cmp per byte lane */
738                         if(mask_Pass & (1 << i)) {
739                                 if(!(mask_Saved & (1 << i))) {
740                                         /* save RcvEnDly to stack, according to
741                                         the related Dimm/byte lane */
742                                         p[i] = (u8)rcvrEnDly;
743                                         mask_Saved |= 1 << i;
744                                 }
745                         }
746                 }
747                 pDCTstat->DqsRcvEn_Saved = mask_Saved;
748         }
749         return mct_SaveRcvEnDly_D_1Pass(pDCTstat, Pass);
750 }
751
752
753 static u8 mct_CompareTestPatternQW0_D(struct MCTStatStruc *pMCTstat,
754                                         struct DCTStatStruc *pDCTstat,
755                                         u32 addr, u8 channel,
756                                         u8 pattern, u8 Pass)
757 {
758         /* Compare only the first beat of data.  Since target addrs are cache
759          * line aligned, the Channel parameter is used to determine which
760          * cache QW to compare.
761          */
762
763         u8 *test_buf;
764         u8 i;
765         u8 result;
766         u8 value;
767
768
769         if(Pass == FirstPass) {
770                 if(pattern==1) {
771                         test_buf = (u8 *)TestPattern1_D;
772                 } else {
773                         test_buf = (u8 *)TestPattern0_D;
774                 }
775         } else {                // Second Pass
776                 test_buf = (u8 *)TestPattern2_D;
777         }
778
779         SetUpperFSbase(addr);
780         addr <<= 8;
781
782         if((pDCTstat->Status & (1<<SB_128bitmode)) && channel ) {
783                 addr += 8;      /* second channel */
784                 test_buf += 8;
785         }
786
787         print_debug_dqs_pair("\t\t\t\t\t\t  test_buf = ", (u32)test_buf, "  |  addr_lo = ", addr,  4);
788         for (i=0; i<8; i++) {
789                 value = read32_fs(addr);
790                 print_debug_dqs_pair("\t\t\t\t\t\t\t\t ", test_buf[i], "  |  ", value, 4);
791
792                 if (value == test_buf[i]) {
793                         pDCTstat->DqsRcvEn_Pass |= (1<<i);
794                 } else {
795                         pDCTstat->DqsRcvEn_Pass &= ~(1<<i);
796                 }
797         }
798
799         result = DQS_FAIL;
800
801         if (Pass == FirstPass) {
802                 /* if first pass, at least one byte lane pass
803                  * ,then DQS_PASS=1 and will set to related reg.
804                  */
805                 if(pDCTstat->DqsRcvEn_Pass != 0) {
806                         result = DQS_PASS;
807                 } else {
808                         result = DQS_FAIL;
809                 }
810
811         } else {
812                 /* if second pass, at least one byte lane fail
813                  * ,then DQS_FAIL=1 and will set to related reg.
814                  */
815                 if(pDCTstat->DqsRcvEn_Pass != 0xFF) {
816                         result = DQS_FAIL;
817                 } else {
818                         result = DQS_PASS;
819                 }
820         }
821
822         /* if second pass, we can't find the fail until FFh,
823          * then let it fail to save the final delay
824          */
825         if((Pass == SecondPass) && (pDCTstat->Status & (1 << SB_DQSRcvLimit))) {
826                 result = DQS_FAIL;
827                 pDCTstat->DqsRcvEn_Pass = 0;
828         }
829
830         /* second pass needs to be inverted
831          * FIXME? this could be inverted in the above code to start with...
832          */
833         if(Pass == SecondPass) {
834                 if (result == DQS_PASS) {
835                         result = DQS_FAIL;
836                 } else if (result == DQS_FAIL) { /* FIXME: doesn't need to be else if */
837                         result = DQS_PASS;
838                 }
839         }
840
841
842         return result;
843 }
844
845
846
847 static void mct_InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat,
848                                 struct DCTStatStruc *pDCTstat)
849 {
850         /* Initialize the DQS Positions in preparation for
851          * Receiver Enable Training.
852          * Write Position is 1/2 Memclock Delay
853          * Read Position is 1/2 Memclock Delay
854          */
855         u8 i;
856         for(i=0;i<2; i++){
857                 InitDQSPos4RcvrEn_D(pMCTstat, pDCTstat, i);
858         }
859 }
860
861
862 static void InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat,
863                                 struct DCTStatStruc *pDCTstat, u8 Channel)
864 {
865         /* Initialize the DQS Positions in preparation for
866          * Receiver Enable Training.
867          * Write Position is no Delay
868          * Read Position is 1/2 Memclock Delay
869          */
870
871         u8 i, j;
872         u32 dword;
873         u8 dn = 2; // TODO: Rev C could be 4
874         u32 dev = pDCTstat->dev_dct;
875         u32 index_reg = 0x98 + 0x100 * Channel;
876
877
878         // FIXME: add Cx support
879         dword = 0x00000000;
880         for(i=1; i<=3; i++) {
881                 for(j=0; j<dn; j++)
882                         /* DIMM0 Write Data Timing Low */
883                         /* DIMM0 Write ECC Timing */
884                         Set_NB32_index_wait(dev, index_reg, i + 0x100 * j, dword);
885         }
886
887         /* errata #180 */
888         dword = 0x2f2f2f2f;
889         for(i=5; i<=6; i++) {
890                 for(j=0; j<dn; j++)
891                         /* DIMM0 Read DQS Timing Control Low */
892                         Set_NB32_index_wait(dev, index_reg, i + 0x100 * j, dword);
893         }
894
895         dword = 0x0000002f;
896         for(j=0; j<dn; j++)
897                 /* DIMM0 Read DQS ECC Timing Control */
898                 Set_NB32_index_wait(dev, index_reg, 7 + 0x100 * j, dword);
899 }
900
901
902 void SetEccDQSRcvrEn_D(struct DCTStatStruc *pDCTstat, u8 Channel)
903 {
904         u32 dev;
905         u32 index_reg;
906         u32 index;
907         u8 ChipSel;
908         u8 *p;
909         u32 val;
910
911         dev = pDCTstat->dev_dct;
912         index_reg = 0x98 + Channel * 0x100;
913         index = 0x12;
914         p = pDCTstat->CH_D_BC_RCVRDLY[Channel];
915         print_debug_dqs("\t\tSetEccDQSRcvrPos: Channel ", Channel,  2);
916         for(ChipSel = 0; ChipSel < MAX_CS_SUPPORTED; ChipSel += 2) {
917                 val = p[ChipSel>>1];
918                 Set_NB32_index_wait(dev, index_reg, index, val);
919                 print_debug_dqs_pair("\t\tSetEccDQSRcvrPos: ChipSel ",
920                                         ChipSel, " rcvr_delay ",  val, 2);
921                 index += 3;
922         }
923 }
924
925
926 static void CalcEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat,
927                                 struct DCTStatStruc *pDCTstat, u8 Channel)
928 {
929         u8 ChipSel;
930         u16 EccDQSLike;
931         u8 EccDQSScale;
932         u32 val, val0, val1;
933
934         EccDQSLike = pDCTstat->CH_EccDQSLike[Channel];
935         EccDQSScale = pDCTstat->CH_EccDQSScale[Channel];
936
937         for (ChipSel = 0; ChipSel < MAX_CS_SUPPORTED; ChipSel += 2) {
938                 if(mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, ChipSel)) {
939                         u8 *p;
940                         p = pDCTstat->CH_D_B_RCVRDLY[Channel][ChipSel>>1];
941
942                         /* DQS Delay Value of Data Bytelane
943                          * most like ECC byte lane */
944                         val0 = p[EccDQSLike & 0x07];
945                         /* DQS Delay Value of Data Bytelane
946                          * 2nd most like ECC byte lane */
947                         val1 = p[(EccDQSLike>>8) & 0x07];
948
949                         if(val0 > val1) {
950                                 val = val0 - val1;
951                         } else {
952                                 val = val1 - val0;
953                         }
954
955                         val *= ~EccDQSScale;
956                         val >>= 8; // /256
957
958                         if(val0 > val1) {
959                                 val -= val1;
960                         } else {
961                                 val += val0;
962                         }
963
964                         pDCTstat->CH_D_BC_RCVRDLY[Channel][ChipSel>>1] = val;
965                 }
966         }
967         SetEccDQSRcvrEn_D(pDCTstat, Channel);
968 }
969
970 void mctSetEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat,
971                         struct DCTStatStruc *pDCTstatA)
972 {
973         u8 Node;
974         u8 i;
975
976         for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
977                 struct DCTStatStruc *pDCTstat;
978                 pDCTstat = pDCTstatA + Node;
979                 if (!pDCTstat->NodePresent)
980                         break;
981                 if (pDCTstat->DCTSysLimit) {
982                         for(i=0; i<2; i++)
983                                 CalcEccDQSRcvrEn_D(pMCTstat, pDCTstat, i);
984                 }
985         }
986 }
987
988
989 void phyAssistedMemFnceTraining(struct MCTStatStruc *pMCTstat,
990                         struct DCTStatStruc *pDCTstatA)
991 {
992
993         u8 Node = 0;
994         struct DCTStatStruc *pDCTstat;
995
996         // FIXME: skip for Ax
997         while (Node < MAX_NODES_SUPPORTED) {
998                 pDCTstat = pDCTstatA + Node;
999
1000                 if(pDCTstat->DCTSysLimit) {
1001                         fenceDynTraining_D(pMCTstat, pDCTstat, 0);
1002                         fenceDynTraining_D(pMCTstat, pDCTstat, 1);
1003                 }
1004                 Node++;
1005         }
1006 }
1007
1008
1009 static void fenceDynTraining_D(struct MCTStatStruc *pMCTstat,
1010                         struct DCTStatStruc *pDCTstat, u8 dct)
1011 {
1012         u16 avRecValue;
1013         u32 val;
1014         u32 dev;
1015         u32 index_reg = 0x98 + 0x100 * dct;
1016         u32 index;
1017
1018         /* BIOS first programs a seed value to the phase recovery engine
1019          *  (recommended 19) registers.
1020          * Dram Phase Recovery Control Register (F2x[1,0]9C_x[51:50] and
1021          * F2x[1,0]9C_x52.) .
1022          */
1023
1024         dev = pDCTstat->dev_dct;
1025         for (index = 0x50; index <= 0x52; index ++) {
1026                 val = (FenceTrnFinDlySeed & 0x1F);
1027                 if (index != 0x52) {
1028                         val |= val << 8 | val << 16 | val << 24;
1029                 }
1030                 Set_NB32_index_wait(dev, index_reg, index, val);
1031         }
1032
1033
1034         /* Set F2x[1,0]9C_x08[PhyFenceTrEn]=1. */
1035         val = Get_NB32_index_wait(dev, index_reg, 0x08);
1036         val |= 1 << PhyFenceTrEn;
1037         Set_NB32_index_wait(dev, index_reg, 0x08, val);
1038
1039         /* Wait 200 MEMCLKs. */
1040         mct_Wait(50000);                /* wait 200us */
1041
1042         /* Clear F2x[1,0]9C_x08[PhyFenceTrEn]=0. */
1043         val = Get_NB32_index_wait(dev, index_reg, 0x08);
1044         val &= ~(1 << PhyFenceTrEn);
1045         Set_NB32_index_wait(dev, index_reg, 0x08, val);
1046
1047         /* BIOS reads the phase recovery engine registers
1048          * F2x[1,0]9C_x[51:50] and F2x[1,0]9C_x52. */
1049         avRecValue = 0;
1050         for (index = 0x50; index <= 0x52; index ++) {
1051                 val = Get_NB32_index_wait(dev, index_reg, index);
1052                 avRecValue += val & 0x7F;
1053                 if (index != 0x52) {
1054                         avRecValue += (val >> 8) & 0x7F;
1055                         avRecValue += (val >> 16) & 0x7F;
1056                         avRecValue += (val >> 24) & 0x7F;
1057                 }
1058         }
1059
1060         val = avRecValue / 9;
1061         if (avRecValue % 9)
1062                 val++;
1063         avRecValue = val;
1064
1065         /* Write the (averaged value -8) to F2x[1,0]9C_x0C[PhyFence]. */
1066         avRecValue -= 8;
1067         val = Get_NB32_index_wait(dev, index_reg, 0x0C);
1068         val &= ~(0x1F << 16);
1069         val |= (avRecValue & 0x1F) << 16;
1070         Set_NB32_index_wait(dev, index_reg, 0x0C, val);
1071
1072         /* Rewrite F2x[1,0]9C_x04-DRAM Address/Command Timing Control Register
1073          * delays (both channels). */
1074         val = Get_NB32_index_wait(dev, index_reg, 0x04);
1075         Set_NB32_index_wait(dev, index_reg, 0x04, val);
1076 }
1077
1078
1079 void mct_Wait(u32 cycles)
1080 {
1081         u32 saved;
1082         u32 hi, lo, msr;
1083
1084         /* Wait # of 50ns cycles
1085            This seems like a hack to me...  */
1086
1087         cycles <<= 3;           /* x8 (number of 1.25ns ticks) */
1088
1089         msr = 0x10;                     /* TSC */
1090         _RDMSR(msr, &lo, &hi);
1091         saved = lo;
1092         do {
1093                 _RDMSR(msr, &lo, &hi);
1094         } while (lo - saved < cycles );
1095 }