2 * This file is part of the coreboot project.
4 * Copyright (C) 2007 Advanced Micro Devices, Inc.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 /******************************************************************************
22 Description: Receiver En and DQS Timing Training feature for DDR 2 MCT
23 ******************************************************************************/
25 static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat,
26 struct DCTStatStruc *pDCTstat, u8 Pass);
27 static u8 mct_SavePassRcvEnDly_D(struct DCTStatStruc *pDCTstat,
28 u8 rcvrEnDly, u8 Channel,
29 u8 receiver, u8 Pass);
30 static u8 mct_CompareTestPatternQW0_D(struct MCTStatStruc *pMCTstat,
31 struct DCTStatStruc *pDCTstat,
34 static void mct_InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat,
35 struct DCTStatStruc *pDCTstat);
36 static void InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat,
37 struct DCTStatStruc *pDCTstat, u8 Channel);
38 static void CalcEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat,
39 struct DCTStatStruc *pDCTstat, u8 Channel);
40 static void mct_SetFinalRcvrEnDly_D(struct DCTStatStruc *pDCTstat,
41 u8 RcvrEnDly, u8 where,
42 u8 Channel, u8 Receiver,
43 u32 dev, u32 index_reg,
44 u8 Addl_Index, u8 Pass);
45 static void CalcMaxLatency_D(struct DCTStatStruc *pDCTstat,
46 u8 DQSRcvrEnDly, u8 Channel);
47 static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u8 DQSRcvEnDly);
48 static void mct_SetDQSRcvEn_D(struct DCTStatStruc *pDCTstat, u32 val);
49 static void fenceDynTraining_D(struct MCTStatStruc *pMCTstat,
50 struct DCTStatStruc *pDCTstat, u8 dct);
51 static void mct_DisableDQSRcvEn_D(struct DCTStatStruc *pDCTstat);
54 /* Warning: These must be located so they do not cross a logical 16-bit
56 const static u32 TestPattern0_D[] = {
57 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
58 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
59 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
60 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
62 const static u32 TestPattern1_D[] = {
63 0x55555555, 0x55555555, 0x55555555, 0x55555555,
64 0x55555555, 0x55555555, 0x55555555, 0x55555555,
65 0x55555555, 0x55555555, 0x55555555, 0x55555555,
66 0x55555555, 0x55555555, 0x55555555, 0x55555555,
68 const static u32 TestPattern2_D[] = {
69 0x12345678, 0x87654321, 0x23456789, 0x98765432,
70 0x59385824, 0x30496724, 0x24490795, 0x99938733,
71 0x40385642, 0x38465245, 0x29432163, 0x05067894,
72 0x12349045, 0x98723467, 0x12387634, 0x34587623,
75 static void SetupRcvrPattern(struct MCTStatStruc *pMCTstat,
76 struct DCTStatStruc *pDCTstat, u32 *buffer, u8 pass)
79 * 1. Copy the alpha and Beta patterns from ROM to Cache,
80 * aligning on 16 byte boundary
81 * 2. Set the ptr to DCTStatstruc.PtrPatternBufA for Alpha
82 * 3. Set the ptr to DCTStatstruc.PtrPatternBufB for Beta
91 buf_a = (u32 *)(((u32)buffer + 0x10) & (0xfffffff0));
92 buf_b = buf_a + 32; //??
93 p_A = (u32 *)SetupDqsPattern_1PassB(pass);
94 p_B = (u32 *)SetupDqsPattern_1PassA(pass);
101 pDCTstat->PtrPatternBufA = (u32)buf_a;
102 pDCTstat->PtrPatternBufB = (u32)buf_b;
106 void mct_TrainRcvrEn_D(struct MCTStatStruc *pMCTstat,
107 struct DCTStatStruc *pDCTstat, u8 Pass)
109 if(mct_checkNumberOfDqsRcvEn_1Pass(Pass))
110 dqsTrainRcvrEn_SW(pMCTstat, pDCTstat, Pass);
114 static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat,
115 struct DCTStatStruc *pDCTstat, u8 Pass)
117 u8 Channel, RcvrEnDly, RcvrEnDlyRmin;
118 u8 Test0, Test1, CurrTest, CurrTestSide0, CurrTestSide1;
119 u8 CTLRMaxDelay, _2Ranks, PatternA, PatternB;
122 u8 _DisableDramECC = 0, _Wrap32Dis = 0, _SSE2 = 0;
123 u8 RcvrEnDlyLimit, Final_Value, MaxDelay_CH[2];
124 u32 TestAddr0, TestAddr1, TestAddr0B, TestAddr1B;
125 u32 PatternBuffer[64+4]; /* FIXME: need increase 8? */
132 u32 ch_start, ch_end, ch;
141 print_debug_dqs("\nTrainRcvEn: Node", pDCTstat->Node_ID, 0);
142 print_debug_dqs("TrainRcvEn: Pass", Pass, 0);
145 dev = pDCTstat->dev_dct;
147 if(!pDCTstat->GangedMode) {
153 for (ch = ch_start; ch < ch_end; ch++) {
154 reg = 0x78 + (0x100 * ch);
155 val = Get_NB32(dev, reg);
156 val &= ~(0x3ff << 22);
157 val |= (0x0c8 << 22); /* Max Rd Lat */
158 Set_NB32(dev, reg, val);
162 if (Pass == FirstPass) {
163 mct_InitDQSPos4RcvrEn_D(pMCTstat, pDCTstat);
165 pDCTstat->DimmTrainFail = 0;
166 pDCTstat->CSTrainFail = ~pDCTstat->CSPresent;
168 print_t("TrainRcvrEn: 1\n");
171 if(cr4 & ( 1 << 9)) { /* save the old value */
174 cr4 |= (1 << 9); /* OSFXSR enable SSE2 */
176 print_t("TrainRcvrEn: 2\n");
179 _RDMSR(msr, &lo, &hi);
180 //FIXME: Why use SSEDIS
181 if(lo & (1 << 17)) { /* save the old value */
184 lo |= (1 << 17); /* HWCR.wrap32dis */
185 lo &= ~(1 << 15); /* SSEDIS */
186 _WRMSR(msr, lo, hi); /* Setting wrap32dis allows 64-bit memory references in real mode */
187 print_t("TrainRcvrEn: 3\n");
189 _DisableDramECC = mct_DisableDimmEccEn_D(pMCTstat, pDCTstat);
192 if(pDCTstat->Speed == 1) {
193 pDCTstat->T1000 = 5000; /* get the T1000 figure (cycle time (ns)*1K */
194 } else if(pDCTstat->Speed == 2) {
195 pDCTstat->T1000 = 3759;
196 } else if(pDCTstat->Speed == 3) {
197 pDCTstat->T1000 = 3003;
198 } else if(pDCTstat->Speed == 4) {
199 pDCTstat->T1000 = 2500;
200 } else if(pDCTstat->Speed == 5) {
201 pDCTstat->T1000 = 1876;
206 SetupRcvrPattern(pMCTstat, pDCTstat, PatternBuffer, Pass);
207 print_t("TrainRcvrEn: 4\n");
210 dev = pDCTstat->dev_dct;
213 for (Channel = 0; Channel < 2; Channel++) {
214 print_debug_dqs("\tTrainRcvEn51: Node ", pDCTstat->Node_ID, 1);
215 print_debug_dqs("\tTrainRcvEn51: Channel ", Channel, 1);
216 pDCTstat->Channel = Channel;
218 MaxDelay_CH[Channel] = 0;
219 index_reg = 0x98 + 0x100 * Channel;
221 Receiver = mct_InitReceiver_D(pDCTstat, Channel);
222 /* There are four receiver pairs, loosely associated with chipselects. */
223 for (; Receiver < 8; Receiver += 2) {
224 Addl_Index = (Receiver >> 1) * 3 + 0x10;
227 /* mct_ModifyIndex_D */
228 RcvrEnDlyRmin = RcvrEnDlyLimit = 0xff;
230 print_debug_dqs("\t\tTrainRcvEnd52: index ", Addl_Index, 2);
232 if(!mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, Receiver)) {
233 print_t("\t\t\tRank not enabled_D\n");
237 TestAddr0 = mct_GetRcvrSysAddr_D(pMCTstat, pDCTstat, Channel, Receiver, &valid);
238 if(!valid) { /* Address not supported on current CS */
239 print_t("\t\t\tAddress not supported on current CS\n");
243 TestAddr0B = TestAddr0 + (BigPagex8_RJ8 << 3);
245 if(mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, Receiver+1)) {
246 TestAddr1 = mct_GetRcvrSysAddr_D(pMCTstat, pDCTstat, Channel, Receiver+1, &valid);
247 if(!valid) { /* Address not supported on current CS */
248 print_t("\t\t\tAddress not supported on current CS+1\n");
251 TestAddr1B = TestAddr1 + (BigPagex8_RJ8 << 3);
254 _2Ranks = TestAddr1 = TestAddr1B = 0;
257 print_debug_dqs("\t\tTrainRcvEn53: TestAddr0 ", TestAddr0, 2);
258 print_debug_dqs("\t\tTrainRcvEn53: TestAddr0B ", TestAddr0B, 2);
259 print_debug_dqs("\t\tTrainRcvEn53: TestAddr1 ", TestAddr1, 2);
260 print_debug_dqs("\t\tTrainRcvEn53: TestAddr1B ", TestAddr1B, 2);
263 * Get starting RcvrEnDly value
265 RcvrEnDly = mct_Get_Start_RcvrEnDly_1Pass(Pass);
267 /* mct_GetInitFlag_D*/
268 if (Pass == FirstPass) {
269 pDCTstat->DqsRcvEn_Pass = 0;
271 pDCTstat->DqsRcvEn_Pass=0xFF;
273 pDCTstat->DqsRcvEn_Saved = 0;
276 while(RcvrEnDly < RcvrEnDlyLimit) { /* sweep Delay value here */
277 print_debug_dqs("\t\t\tTrainRcvEn541: RcvrEnDly ", RcvrEnDly, 3);
279 /* callback not required
280 if(mct_AdjustDelay_D(pDCTstat, RcvrEnDly))
284 /* Odd steps get another pattern such that even
285 and odd steps alternate. The pointers to the
286 patterns will be swaped at the end of the loop
287 so that they correspond. */
297 mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0, PatternA); /* rank 0 of DIMM, testpattern 0 */
298 mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0B, PatternB); /* rank 0 of DIMM, testpattern 1 */
300 mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1, PatternA); /*rank 1 of DIMM, testpattern 0 */
301 mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1B, PatternB); /*rank 1 of DIMM, testpattern 1 */
304 mct_SetRcvrEnDly_D(pDCTstat, RcvrEnDly, 0, Channel, Receiver, dev, index_reg, Addl_Index, Pass);
307 CurrTestSide0 = DQS_FAIL;
308 CurrTestSide1 = DQS_FAIL;
310 mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0); /*cache fills */
311 Test0 = mct_CompareTestPatternQW0_D(pMCTstat, pDCTstat, TestAddr0, Channel, PatternA, Pass);/* ROM vs cache compare */
312 proc_IOCLFLUSH_D(TestAddr0);
313 ResetDCTWrPtr_D(dev, index_reg, Addl_Index);
315 print_debug_dqs("\t\t\tTrainRcvEn542: Test0 result ", Test0, 3);
319 if(Test0 == DQS_PASS) {
320 mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0B); /*cache fills */
321 /* ROM vs cache compare */
322 Test1 = mct_CompareTestPatternQW0_D(pMCTstat, pDCTstat, TestAddr0B, Channel, PatternB, Pass);
323 proc_IOCLFLUSH_D(TestAddr0B);
324 ResetDCTWrPtr_D(dev, index_reg, Addl_Index);
326 print_debug_dqs("\t\t\tTrainRcvEn543: Test1 result ", Test1, 3);
328 if(Test1 == DQS_PASS) {
329 CurrTestSide0 = DQS_PASS;
333 mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1); /*cache fills */
334 /* ROM vs cache compare */
335 Test0 = mct_CompareTestPatternQW0_D(pMCTstat, pDCTstat, TestAddr1, Channel, PatternA, Pass);
336 proc_IOCLFLUSH_D(TestAddr1);
337 ResetDCTWrPtr_D(dev, index_reg, Addl_Index);
339 print_debug_dqs("\t\t\tTrainRcvEn544: Test0 result ", Test0, 3);
341 if(Test0 == DQS_PASS) {
342 mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1B); /*cache fills */
343 /* ROM vs cache compare */
344 Test1 = mct_CompareTestPatternQW0_D(pMCTstat, pDCTstat, TestAddr1B, Channel, PatternB, Pass);
345 proc_IOCLFLUSH_D(TestAddr1B);
346 ResetDCTWrPtr_D(dev, index_reg, Addl_Index);
348 print_debug_dqs("\t\t\tTrainRcvEn545: Test1 result ", Test1, 3);
349 if(Test1 == DQS_PASS) {
350 CurrTestSide1 = DQS_PASS;
356 if ((CurrTestSide0 == DQS_PASS) && (CurrTestSide1 == DQS_PASS)) {
359 } else if (CurrTestSide0 == DQS_PASS) {
364 /* record first pass DqsRcvEn to stack */
365 valid = mct_SavePassRcvEnDly_D(pDCTstat, RcvrEnDly, Channel, Receiver, Pass);
367 /* Break(1:RevF,2:DR) or not(0) FIXME: This comment deosn't make sense */
368 if(valid == 2 || (LastTest == DQS_FAIL && valid == 1)) {
369 RcvrEnDlyRmin = RcvrEnDly;
375 /* swap the rank 0 pointers */
377 TestAddr0 = TestAddr0B;
380 /* swap the rank 1 pointers */
382 TestAddr1 = TestAddr1B;
385 print_debug_dqs("\t\t\tTrainRcvEn56: RcvrEnDly ", RcvrEnDly, 3);
389 } /* while RcvrEnDly */
391 print_debug_dqs("\t\tTrainRcvEn61: RcvrEnDly ", RcvrEnDly, 2);
392 print_debug_dqs("\t\tTrainRcvEn61: RcvrEnDlyRmin ", RcvrEnDlyRmin, 3);
393 print_debug_dqs("\t\tTrainRcvEn61: RcvrEnDlyLimit ", RcvrEnDlyLimit, 3);
394 if(RcvrEnDlyRmin == RcvrEnDlyLimit) {
395 /* no passing window */
396 pDCTstat->ErrStatus |= 1 << SB_NORCVREN;
397 Errors |= 1 << SB_NORCVREN;
398 pDCTstat->ErrCode = SC_FatalErr;
401 if(RcvrEnDly > (RcvrEnDlyLimit - 1)) {
402 /* passing window too narrow, too far delayed*/
403 pDCTstat->ErrStatus |= 1 << SB_SmallRCVR;
404 Errors |= 1 << SB_SmallRCVR;
405 pDCTstat->ErrCode = SC_FatalErr;
406 RcvrEnDly = RcvrEnDlyLimit - 1;
407 pDCTstat->CSTrainFail |= 1 << Receiver;
408 pDCTstat->DimmTrainFail |= 1 << (Receiver + Channel);
411 // CHB_D0_B0_RCVRDLY set in mct_Average_RcvrEnDly_Pass
412 mct_Average_RcvrEnDly_Pass(pDCTstat, RcvrEnDly, RcvrEnDlyLimit, Channel, Receiver, Pass);
414 mct_SetFinalRcvrEnDly_D(pDCTstat, RcvrEnDly, Final_Value, Channel, Receiver, dev, index_reg, Addl_Index, Pass);
416 if(pDCTstat->ErrStatus & (1 << SB_SmallRCVR)) {
417 Errors |= 1 << SB_SmallRCVR;
420 RcvrEnDly += Pass1MemClkDly;
421 if(RcvrEnDly > CTLRMaxDelay) {
422 CTLRMaxDelay = RcvrEnDly;
425 } /* while Receiver */
427 MaxDelay_CH[Channel] = CTLRMaxDelay;
430 CTLRMaxDelay = MaxDelay_CH[0];
431 if (MaxDelay_CH[1] > CTLRMaxDelay)
432 CTLRMaxDelay = MaxDelay_CH[1];
434 for (Channel = 0; Channel < 2; Channel++) {
435 mct_SetMaxLatency_D(pDCTstat, Channel, CTLRMaxDelay); /* program Ch A/B MaxAsyncLat to correspond with max delay */
438 ResetDCTWrPtr_D(dev, index_reg, Addl_Index);
440 if(_DisableDramECC) {
441 mct_EnableDimmEccEn_D(pMCTstat, pDCTstat, _DisableDramECC);
444 if (Pass == FirstPass) {
445 /*Disable DQSRcvrEn training mode */
446 print_t("TrainRcvrEn: mct_DisableDQSRcvEn_D\n");
447 mct_DisableDQSRcvEn_D(pDCTstat);
452 _RDMSR(msr, &lo, &hi);
453 lo &= ~(1<<17); /* restore HWCR.wrap32dis */
458 cr4 &= ~(1<<9); /* restore cr4.OSFXSR */
462 #if DQS_TRAIN_DEBUG > 0
465 print_debug("TrainRcvrEn: CH_MaxRdLat:\n");
466 for(Channel = 0; Channel<2; Channel++) {
467 print_debug("Channel:"); print_debug_hex8(Channel);
469 print_debug_hex8( pDCTstat->CH_MaxRdLat[Channel] );
475 #if DQS_TRAIN_DEBUG > 0
478 u8 Channel, Receiver;
482 print_debug("TrainRcvrEn: CH_D_B_RCVRDLY:\n");
483 for(Channel = 0; Channel < 2; Channel++) {
484 print_debug("Channel:"); print_debug_hex8(Channel); print_debug("\n");
485 for(Receiver = 0; Receiver<8; Receiver+=2) {
486 print_debug("\t\tReceiver:");
487 print_debug_hex8(Receiver);
488 p = pDCTstat->CH_D_B_RCVRDLY[Channel][Receiver>>1];
492 print_debug_hex8(val);
501 print_tx("TrainRcvrEn: Status ", pDCTstat->Status);
502 print_tx("TrainRcvrEn: ErrStatus ", pDCTstat->ErrStatus);
503 print_tx("TrainRcvrEn: ErrCode ", pDCTstat->ErrCode);
504 print_t("TrainRcvrEn: Done\n");
508 static u8 mct_InitReceiver_D(struct DCTStatStruc *pDCTstat, u8 dct)
510 if (pDCTstat->DIMMValidDCT[dct] == 0 ) {
518 static void mct_SetFinalRcvrEnDly_D(struct DCTStatStruc *pDCTstat, u8 RcvrEnDly, u8 where, u8 Channel, u8 Receiver, u32 dev, u32 index_reg, u8 Addl_Index, u8 Pass/*, u8 *p*/)
521 * Program final DqsRcvEnDly to additional index for DQS receiver
524 mct_SetRcvrEnDly_D(pDCTstat, RcvrEnDly, where, Channel, Receiver, dev, index_reg, Addl_Index, Pass);
528 static void mct_DisableDQSRcvEn_D(struct DCTStatStruc *pDCTstat)
535 dev = pDCTstat->dev_dct;
536 if (pDCTstat->GangedMode) {
542 for (ch=0; ch<ch_end; ch++) {
543 reg = 0x78 + 0x100 * ch;
544 val = Get_NB32(dev, reg);
545 val &= ~(1 << DqsRcvEnTrain);
546 Set_NB32(dev, reg, val);
552 * Function only used once so it was inlined.
557 * Function only used once so it was inlined.
561 void mct_SetRcvrEnDly_D(struct DCTStatStruc *pDCTstat, u8 RcvrEnDly,
562 u8 FinalValue, u8 Channel, u8 Receiver, u32 dev,
563 u32 index_reg, u8 Addl_Index, u8 Pass)
570 if(RcvrEnDly == 0xFE) {
571 /*set the boudary flag */
572 pDCTstat->Status |= 1 << SB_DQSRcvLimit;
575 /* DimmOffset not needed for CH_D_B_RCVRDLY array */
578 for(i=0; i < 8; i++) {
580 /*calculate dimm offset */
581 p = pDCTstat->CH_D_B_RCVRDLY[Channel][Receiver >> 1];
585 /* if flag=0, set DqsRcvEn value to reg. */
586 /* get the register index from table */
587 index = Table_DQSRcvEn_Offset[i >> 1];
588 index += Addl_Index; /* DIMMx DqsRcvEn byte0 */
589 val = Get_NB32_index_wait(dev, index_reg, index);
592 val &= ~(0xFF << 16);
593 val |= (RcvrEnDly << 16);
599 Set_NB32_index_wait(dev, index_reg, index, val);
604 static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u8 DQSRcvEnDly)
614 if(pDCTstat->GangedMode)
617 dev = pDCTstat->dev_dct;
618 reg_off = 0x100 * Channel;
619 index_reg = 0x98 + reg_off;
621 /* Multiply the CAS Latency by two to get a number of 1/2 MEMCLKs units.*/
622 val = Get_NB32(dev, 0x88 + reg_off);
623 SubTotal = ((val & 0x0f) + 1) << 1; /* SubTotal is 1/2 Memclk unit */
625 /* If registered DIMMs are being used then
626 * add 1 MEMCLK to the sub-total.
628 val = Get_NB32(dev, 0x90 + reg_off);
629 if(!(val & (1 << UnBuffDimm)))
632 /* If the address prelaunch is setup for 1/2 MEMCLKs then
633 * add 1, else add 2 to the sub-total.
634 * if (AddrCmdSetup || CsOdtSetup || CkeSetup) then K := K + 2;
636 val = Get_NB32_index_wait(dev, index_reg, 0x04);
637 if(!(val & 0x00202020))
642 /* If the F2x[1, 0]78[RdPtrInit] field is 4, 5, 6 or 7 MEMCLKs,
643 * then add 4, 3, 2, or 1 MEMCLKs, respectively to the sub-total. */
644 val = Get_NB32(dev, 0x78 + reg_off);
645 SubTotal += 8 - (val & 0x0f);
647 /* Convert bits 7-5 (also referred to as the course delay) of
648 * the current (or worst case) DQS receiver enable delay to
649 * 1/2 MEMCLKs units, rounding up, and add this to the sub-total.
651 SubTotal += DQSRcvEnDly >> 5; /*BOZO-no rounding up */
653 /* Add 5.5 to the sub-total. 5.5 represents part of the
654 * processor specific constant delay value in the DRAM
657 SubTotal <<= 1; /*scale 1/2 MemClk to 1/4 MemClk */
658 SubTotal += 11; /*add 5.5 1/2MemClk */
660 /* Convert the sub-total (in 1/2 MEMCLKs) to northbridge
661 * clocks (NCLKs) as follows (assuming DDR400 and assuming
662 * that no P-state or link speed changes have occurred).
666 * SubTotal *= 3*(Fn2xD4[NBFid]+4)/(3+Fn2x94[MemClkFreq])/2 */
667 val = Get_NB32(dev, 0x94 + reg_off);
669 /* SubTotal div 4 to scale 1/4 MemClk back to MemClk */
672 val++; /* adjust for DDR2-1066 */
674 valx = (val + 3) << 2;
676 val = Get_NB32(pDCTstat->dev_nbmisc, 0xD4);
677 SubTotal *= ((val & 0x1f) + 4 ) * 3;
680 if (SubTotal % valx) { /* round up */
684 /* Add 5 NCLKs to the sub-total. 5 represents part of the
685 * processor specific constant value in the northbridge
690 pDCTstat->CH_MaxRdLat[Channel] = SubTotal;
691 if(pDCTstat->GangedMode) {
692 pDCTstat->CH_MaxRdLat[1] = SubTotal;
695 /* Program the F2x[1, 0]78[MaxRdLatency] register with
696 * the total delay value (in NCLKs).
699 reg = 0x78 + reg_off;
700 val = Get_NB32(dev, reg);
701 val &= ~(0x3ff << 22);
702 val |= (SubTotal & 0x3ff) << 22;
704 /* program MaxRdLatency to correspond with current delay */
705 Set_NB32(dev, reg, val);
709 static u8 mct_SavePassRcvEnDly_D(struct DCTStatStruc *pDCTstat,
710 u8 rcvrEnDly, u8 Channel,
711 u8 receiver, u8 Pass)
714 u8 mask_Saved, mask_Pass;
717 /* calculate dimm offset
718 * not needed for CH_D_B_RCVRDLY array
721 /* cmp if there has new DqsRcvEnDly to be recorded */
722 mask_Pass = pDCTstat->DqsRcvEn_Pass;
724 if(Pass == SecondPass) {
725 mask_Pass = ~mask_Pass;
728 mask_Saved = pDCTstat->DqsRcvEn_Saved;
729 if(mask_Pass != mask_Saved) {
731 /* find desired stack offset according to channel/dimm/byte */
732 if(Pass == SecondPass) {
733 // FIXME: SecondPass is never used for Barcelona p = pDCTstat->CH_D_B_RCVRDLY_1[Channel][receiver>>1];
734 p = 0; // Keep the compiler happy.
736 mask_Saved &= mask_Pass;
737 p = pDCTstat->CH_D_B_RCVRDLY[Channel][receiver>>1];
739 for(i=0; i < 8; i++) {
740 /* cmp per byte lane */
741 if(mask_Pass & (1 << i)) {
742 if(!(mask_Saved & (1 << i))) {
743 /* save RcvEnDly to stack, according to
744 the related Dimm/byte lane */
745 p[i] = (u8)rcvrEnDly;
746 mask_Saved |= 1 << i;
750 pDCTstat->DqsRcvEn_Saved = mask_Saved;
752 return mct_SaveRcvEnDly_D_1Pass(pDCTstat, Pass);
756 static u8 mct_CompareTestPatternQW0_D(struct MCTStatStruc *pMCTstat,
757 struct DCTStatStruc *pDCTstat,
758 u32 addr, u8 channel,
761 /* Compare only the first beat of data. Since target addrs are cache
762 * line aligned, the Channel parameter is used to determine which
763 * cache QW to compare.
771 SetUpperFSbase(addr); // needed?
773 if(Pass == FirstPass) {
775 test_buf = (u8 *)TestPattern1_D;
777 test_buf = (u8 *)TestPattern0_D;
779 } else { // Second Pass
780 test_buf = (u8 *)TestPattern2_D;
783 addr_lo_buf = (u8 *) (addr << 8);
786 if((pDCTstat->Status & (1<<SB_128bitmode)) && channel ) {
787 addr_lo_buf += 8; /* second channel */
792 #if DQS_TRAIN_DEBUG > 4
793 print_debug("\t\t\t\t\t\tQW0 : test_buf = ");
794 print_debug_hex32((unsigned)test_buf);
796 for (i=0; i<8; i++) {
797 print_debug_hex8(test_buf[i]); print_debug(" ");
801 print_debug("\t\t\t\t\t\tQW0 : addr_lo_buf = ");
802 print_debug_hex32((unsigned)addr_lo_buf);
804 for (i=0; i<8; i++) {
805 print_debug_hex8(addr_lo_buf[i]); print_debug(" ");
810 /* prevent speculative execution of following instructions */
813 for (i=0; i<8; i++) {
814 if(addr_lo_buf[i] == test_buf[i]) {
815 pDCTstat->DqsRcvEn_Pass |= (1<<i);
817 pDCTstat->DqsRcvEn_Pass &= ~(1<<i);
822 if (Pass == FirstPass) {
823 /* if first pass, at least one byte lane pass
824 * ,then DQS_PASS=1 and will set to related reg.
826 if(pDCTstat->DqsRcvEn_Pass != 0) {
833 /* if second pass, at least one byte lane fail
834 * ,then DQS_FAIL=1 and will set to related reg.
836 if(pDCTstat->DqsRcvEn_Pass != 0xFF) {
843 /* if second pass, we can't find the fail until FFh,
844 * then let it fail to save the final delay
846 if((Pass == SecondPass) && (pDCTstat->Status & (1 << SB_DQSRcvLimit))) {
848 pDCTstat->DqsRcvEn_Pass = 0;
851 /* second pass needs to be inverted
852 * FIXME? this could be inverted in the above code to start with...
854 if(Pass == SecondPass) {
855 if (result == DQS_PASS) {
857 } else if (result == DQS_FAIL) { /* FIXME: doesn't need to be else if */
868 static void mct_InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat,
869 struct DCTStatStruc *pDCTstat)
871 /* Initialize the DQS Positions in preparation for
872 * Reciever Enable Training.
873 * Write Position is 1/2 Memclock Delay
874 * Read Position is 1/2 Memclock Delay
878 InitDQSPos4RcvrEn_D(pMCTstat, pDCTstat, i);
883 static void InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat,
884 struct DCTStatStruc *pDCTstat, u8 Channel)
886 /* Initialize the DQS Positions in preparation for
887 * Reciever Enable Training.
888 * Write Position is no Delay
889 * Read Position is 1/2 Memclock Delay
894 u8 dn = 2; // TODO: Rev C could be 4
895 u32 dev = pDCTstat->dev_dct;
896 u32 index_reg = 0x98 + 0x100 * Channel;
899 // FIXME: add Cx support
901 for(i=1; i<=3; i++) {
903 /* DIMM0 Write Data Timing Low */
904 /* DIMM0 Write ECC Timing */
905 Set_NB32_index_wait(dev, index_reg, i + 0x100 * j, dword);
910 for(i=5; i<=6; i++) {
912 /* DIMM0 Read DQS Timing Control Low */
913 Set_NB32_index_wait(dev, index_reg, i + 0x100 * j, dword);
918 /* DIMM0 Read DQS ECC Timing Control */
919 Set_NB32_index_wait(dev, index_reg, 7 + 0x100 * j, dword);
923 void SetEccDQSRcvrEn_D(struct DCTStatStruc *pDCTstat, u8 Channel)
932 dev = pDCTstat->dev_dct;
933 index_reg = 0x98 + Channel * 0x100;
935 p = pDCTstat->CH_D_BC_RCVRDLY[Channel];
936 print_debug_dqs("\t\tSetEccDQSRcvrPos: Channel ", Channel, 2);
937 for(ChipSel = 0; ChipSel < MAX_CS_SUPPORTED; ChipSel += 2) {
939 Set_NB32_index_wait(dev, index_reg, index, val);
940 print_debug_dqs_pair("\t\tSetEccDQSRcvrPos: ChipSel ",
941 ChipSel, " rcvr_delay ", val, 2);
947 static void CalcEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat,
948 struct DCTStatStruc *pDCTstat, u8 Channel)
955 EccDQSLike = pDCTstat->CH_EccDQSLike[Channel];
956 EccDQSScale = pDCTstat->CH_EccDQSScale[Channel];
958 for (ChipSel = 0; ChipSel < MAX_CS_SUPPORTED; ChipSel += 2) {
959 if(mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, ChipSel)) {
961 p = pDCTstat->CH_D_B_RCVRDLY[Channel][ChipSel>>1];
963 /* DQS Delay Value of Data Bytelane
964 * most like ECC byte lane */
965 val0 = p[EccDQSLike & 0x07];
966 /* DQS Delay Value of Data Bytelane
967 * 2nd most like ECC byte lane */
968 val1 = p[(EccDQSLike>>8) & 0x07];
985 pDCTstat->CH_D_BC_RCVRDLY[Channel][ChipSel>>1] = val;
988 SetEccDQSRcvrEn_D(pDCTstat, Channel);
991 void mctSetEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat,
992 struct DCTStatStruc *pDCTstatA)
997 for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
998 struct DCTStatStruc *pDCTstat;
999 pDCTstat = pDCTstatA + Node;
1000 if (!pDCTstat->NodePresent)
1002 if (pDCTstat->DCTSysLimit) {
1004 CalcEccDQSRcvrEn_D(pMCTstat, pDCTstat, i);
1010 void phyAssistedMemFnceTraining(struct MCTStatStruc *pMCTstat,
1011 struct DCTStatStruc *pDCTstatA)
1015 struct DCTStatStruc *pDCTstat;
1017 // FIXME: skip for Ax
1018 while (Node < MAX_NODES_SUPPORTED) {
1019 pDCTstat = pDCTstatA + Node;
1021 if(pDCTstat->DCTSysLimit) {
1022 fenceDynTraining_D(pMCTstat, pDCTstat, 0);
1023 fenceDynTraining_D(pMCTstat, pDCTstat, 1);
1030 static void fenceDynTraining_D(struct MCTStatStruc *pMCTstat,
1031 struct DCTStatStruc *pDCTstat, u8 dct)
1036 u32 index_reg = 0x98 + 0x100 * dct;
1039 /* BIOS first programs a seed value to the phase recovery engine
1040 * (recommended 19) registers.
1041 * Dram Phase Recovery Control Register (F2x[1,0]9C_x[51:50] and
1042 * F2x[1,0]9C_x52.) .
1045 dev = pDCTstat->dev_dct;
1046 for (index = 0x50; index <= 0x52; index ++) {
1047 val = Get_NB32_index_wait(dev, index_reg, index);
1048 val |= (FenceTrnFinDlySeed & 0x1F);
1049 if (index != 0x52) {
1050 val &= ~(0xFF << 8);
1051 val |= (val & 0xFF) << 8;
1055 Set_NB32_index_wait(dev, index_reg, index, val);
1059 /* Set F2x[1,0]9C_x08[PhyFenceTrEn]=1. */
1060 val = Get_NB32_index_wait(dev, index_reg, 0x08);
1061 val |= 1 << PhyFenceTrEn;
1062 Set_NB32_index_wait(dev, index_reg, 0x08, val);
1064 /* Wait 200 MEMCLKs. */
1065 mct_Wait_10ns (20000); /* wait 200us */
1067 /* Clear F2x[1,0]9C_x08[PhyFenceTrEn]=0. */
1068 val = Get_NB32_index_wait(dev, index_reg, 0x08);
1069 val &= ~(1 << PhyFenceTrEn);
1070 Set_NB32_index_wait(dev, index_reg, 0x08, val);
1072 /* BIOS reads the phase recovery engine registers
1073 * F2x[1,0]9C_x[51:50] and F2x[1,0]9C_x52. */
1075 for (index = 0x50; index <= 0x52; index ++) {
1076 val = Get_NB32_index_wait(dev, index_reg, index);
1077 avRecValue += val & 0x7F;
1078 if (index != 0x52) {
1079 avRecValue += (val >> 8) & 0x7F;
1080 avRecValue += (val >> 16) & 0x7F;
1081 avRecValue += (val >> 24) & 0x7F;
1085 val = avRecValue / 9;
1090 /* Write the (averaged value -8) to F2x[1,0]9C_x0C[PhyFence]. */
1092 val = Get_NB32_index_wait(dev, index_reg, 0x0C);
1093 val &= ~(0x1F << 16);
1094 val |= (avRecValue & 0x1F) << 16;
1095 Set_NB32_index_wait(dev, index_reg, 0x0C, val);
1097 /* Rewrite F2x[1,0]9C_x04-DRAM Address/Command Timing Control Register
1098 * delays (both channels). */
1099 val = Get_NB32_index_wait(dev, index_reg, 0x04);
1100 Set_NB32_index_wait(dev, index_reg, 0x04, val);
1104 static void mct_Wait_10ns (u32 cycles)
1109 /* cycles = number of 10ns cycles(or longer) to delay */
1110 /* FIXME: Need to calibrate to CPU/NCLK speed? */
1112 msr = 0x10; /* TSC */
1113 for (i = 0; i < cycles; i++) {
1114 _RDMSR(msr, &lo, &hi);
1118 _RDMSR(msr, &lo, &hi);
1119 } while (lo - saved < 8); /* 8 x 1.25 ns as NCLK is at 1.25ns */