2 * This file is part of the coreboot project.
4 * Copyright (C) 2010 Advanced Micro Devices, Inc.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 /******************************************************************************
22 Description: Receiver En and DQS Timing Training feature for DDR 3 MCT
23 ******************************************************************************/
25 static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat,
26 struct DCTStatStruc *pDCTstat, u8 Pass);
27 static u8 mct_SavePassRcvEnDly_D(struct DCTStatStruc *pDCTstat,
28 u8 rcvrEnDly, u8 Channel,
29 u8 receiver, u8 Pass);
30 static u8 mct_CompareTestPatternQW0_D(struct MCTStatStruc *pMCTstat,
31 struct DCTStatStruc *pDCTstat,
34 static void mct_InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat,
35 struct DCTStatStruc *pDCTstat);
36 static void InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat,
37 struct DCTStatStruc *pDCTstat, u8 Channel);
38 static void CalcEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat,
39 struct DCTStatStruc *pDCTstat, u8 Channel);
40 static void mct_SetFinalRcvrEnDly_D(struct DCTStatStruc *pDCTstat,
41 u8 RcvrEnDly, u8 where,
42 u8 Channel, u8 Receiver,
43 u32 dev, u32 index_reg,
44 u8 Addl_Index, u8 Pass);
45 static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u8 DQSRcvEnDly);
46 static void fenceDynTraining_D(struct MCTStatStruc *pMCTstat,
47 struct DCTStatStruc *pDCTstat, u8 dct);
48 static void mct_DisableDQSRcvEn_D(struct DCTStatStruc *pDCTstat);
50 /* Warning: These must be located so they do not cross a logical 16-bit
52 const static u32 TestPattern0_D[] = {
53 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
54 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
55 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
56 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
58 const static u32 TestPattern1_D[] = {
59 0x55555555, 0x55555555, 0x55555555, 0x55555555,
60 0x55555555, 0x55555555, 0x55555555, 0x55555555,
61 0x55555555, 0x55555555, 0x55555555, 0x55555555,
62 0x55555555, 0x55555555, 0x55555555, 0x55555555,
64 const static u32 TestPattern2_D[] = {
65 0x12345678, 0x87654321, 0x23456789, 0x98765432,
66 0x59385824, 0x30496724, 0x24490795, 0x99938733,
67 0x40385642, 0x38465245, 0x29432163, 0x05067894,
68 0x12349045, 0x98723467, 0x12387634, 0x34587623,
71 static void SetupRcvrPattern(struct MCTStatStruc *pMCTstat,
72 struct DCTStatStruc *pDCTstat, u32 *buffer, u8 pass)
75 * 1. Copy the alpha and Beta patterns from ROM to Cache,
76 * aligning on 16 byte boundary
77 * 2. Set the ptr to DCTStatstruc.PtrPatternBufA for Alpha
78 * 3. Set the ptr to DCTStatstruc.PtrPatternBufB for Beta
86 buf_a = (u32 *)(((u32)buffer + 0x10) & (0xfffffff0));
87 buf_b = buf_a + 32; /* ?? */
88 p_A = (u32 *)SetupDqsPattern_1PassB(pass);
89 p_B = (u32 *)SetupDqsPattern_1PassA(pass);
96 pDCTstat->PtrPatternBufA = (u32)buf_a;
97 pDCTstat->PtrPatternBufB = (u32)buf_b;
100 void mct_TrainRcvrEn_D(struct MCTStatStruc *pMCTstat,
101 struct DCTStatStruc *pDCTstat, u8 Pass)
103 if(mct_checkNumberOfDqsRcvEn_1Pass(Pass))
104 dqsTrainRcvrEn_SW(pMCTstat, pDCTstat, Pass);
107 static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat,
108 struct DCTStatStruc *pDCTstat, u8 Pass)
110 u8 Channel, RcvrEnDly, RcvrEnDlyRmin;
111 u8 Test0, Test1, CurrTest, CurrTestSide0, CurrTestSide1;
112 u8 CTLRMaxDelay, _2Ranks, PatternA, PatternB;
115 u8 _DisableDramECC = 0, _Wrap32Dis = 0, _SSE2 = 0;
116 u8 RcvrEnDlyLimit, Final_Value, MaxDelay_CH[2];
117 u32 TestAddr0, TestAddr1, TestAddr0B, TestAddr1B;
118 u32 PatternBuffer[64+4]; /* FIXME: need increase 8? */
125 u32 ch_start, ch_end, ch;
134 print_debug_dqs("\nTrainRcvEn: Node", pDCTstat->Node_ID, 0);
135 print_debug_dqs("TrainRcvEn: Pass", Pass, 0);
137 dev = pDCTstat->dev_dct;
139 if(!pDCTstat->GangedMode) {
145 for (ch = ch_start; ch < ch_end; ch++) {
146 reg = 0x78 + (0x100 * ch);
147 val = Get_NB32(dev, reg);
148 val &= ~(0x3ff << 22);
149 val |= (0x0c8 << 22); /* Max Rd Lat */
150 Set_NB32(dev, reg, val);
154 if (Pass == FirstPass) {
155 mct_InitDQSPos4RcvrEn_D(pMCTstat, pDCTstat);
157 pDCTstat->DimmTrainFail = 0;
158 pDCTstat->CSTrainFail = ~pDCTstat->CSPresent;
162 if(cr4 & ( 1 << 9)) { /* save the old value */
165 cr4 |= (1 << 9); /* OSFXSR enable SSE2 */
169 _RDMSR(msr, &lo, &hi);
170 /* FIXME: Why use SSEDIS */
171 if(lo & (1 << 17)) { /* save the old value */
174 lo |= (1 << 17); /* HWCR.wrap32dis */
175 lo &= ~(1 << 15); /* SSEDIS */
176 _WRMSR(msr, lo, hi); /* Setting wrap32dis allows 64-bit memory references in real mode */
178 _DisableDramECC = mct_DisableDimmEccEn_D(pMCTstat, pDCTstat);
180 SetupRcvrPattern(pMCTstat, pDCTstat, PatternBuffer, Pass);
183 dev = pDCTstat->dev_dct;
186 for (Channel = 0; Channel < 2; Channel++) {
187 print_debug_dqs("\tTrainRcvEn51: Node ", pDCTstat->Node_ID, 1);
188 print_debug_dqs("\tTrainRcvEn51: Channel ", Channel, 1);
189 pDCTstat->Channel = Channel;
191 MaxDelay_CH[Channel] = 0;
192 index_reg = 0x98 + 0x100 * Channel;
194 Receiver = mct_InitReceiver_D(pDCTstat, Channel);
195 /* There are four receiver pairs, loosely associated with chipselects. */
196 for (; Receiver < 8; Receiver += 2) {
197 Addl_Index = (Receiver >> 1) * 3 + 0x10;
200 /* mct_ModifyIndex_D */
201 RcvrEnDlyRmin = RcvrEnDlyLimit = 0xff;
203 print_debug_dqs("\t\tTrainRcvEnd52: index ", Addl_Index, 2);
205 if(!mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, Receiver)) {
209 TestAddr0 = mct_GetRcvrSysAddr_D(pMCTstat, pDCTstat, Channel, Receiver, &valid);
210 if(!valid) { /* Address not supported on current CS */
214 TestAddr0B = TestAddr0 + (BigPagex8_RJ8 << 3);
216 if(mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, Receiver+1)) {
217 TestAddr1 = mct_GetRcvrSysAddr_D(pMCTstat, pDCTstat, Channel, Receiver+1, &valid);
218 if(!valid) { /* Address not supported on current CS */
221 TestAddr1B = TestAddr1 + (BigPagex8_RJ8 << 3);
224 _2Ranks = TestAddr1 = TestAddr1B = 0;
227 print_debug_dqs("\t\tTrainRcvEn53: TestAddr0 ", TestAddr0, 2);
228 print_debug_dqs("\t\tTrainRcvEn53: TestAddr0B ", TestAddr0B, 2);
229 print_debug_dqs("\t\tTrainRcvEn53: TestAddr1 ", TestAddr1, 2);
230 print_debug_dqs("\t\tTrainRcvEn53: TestAddr1B ", TestAddr1B, 2);
233 * Get starting RcvrEnDly value
235 RcvrEnDly = mct_Get_Start_RcvrEnDly_1Pass(Pass);
237 /* mct_GetInitFlag_D*/
238 if (Pass == FirstPass) {
239 pDCTstat->DqsRcvEn_Pass = 0;
241 pDCTstat->DqsRcvEn_Pass=0xFF;
243 pDCTstat->DqsRcvEn_Saved = 0;
246 while(RcvrEnDly < RcvrEnDlyLimit) { /* sweep Delay value here */
247 print_debug_dqs("\t\t\tTrainRcvEn541: RcvrEnDly ", RcvrEnDly, 3);
249 /* callback not required
250 if(mct_AdjustDelay_D(pDCTstat, RcvrEnDly))
254 /* Odd steps get another pattern such that even
255 and odd steps alternate. The pointers to the
256 patterns will be swaped at the end of the loop
257 so that they correspond. */
267 mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0, PatternA); /* rank 0 of DIMM, testpattern 0 */
268 mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0B, PatternB); /* rank 0 of DIMM, testpattern 1 */
270 mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1, PatternA); /*rank 1 of DIMM, testpattern 0 */
271 mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1B, PatternB); /*rank 1 of DIMM, testpattern 1 */
274 mct_SetRcvrEnDly_D(pDCTstat, RcvrEnDly, 0, Channel, Receiver, dev, index_reg, Addl_Index, Pass);
277 CurrTestSide0 = DQS_FAIL;
278 CurrTestSide1 = DQS_FAIL;
280 mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0); /*cache fills */
281 Test0 = mct_CompareTestPatternQW0_D(pMCTstat, pDCTstat, TestAddr0, Channel, PatternA, Pass);/* ROM vs cache compare */
282 proc_IOCLFLUSH_D(TestAddr0);
283 ResetDCTWrPtr_D(dev, index_reg, Addl_Index);
285 print_debug_dqs("\t\t\tTrainRcvEn542: Test0 result ", Test0, 3);
287 /* != 0x00 mean pass */
289 if(Test0 == DQS_PASS) {
290 mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0B); /*cache fills */
291 /* ROM vs cache compare */
292 Test1 = mct_CompareTestPatternQW0_D(pMCTstat, pDCTstat, TestAddr0B, Channel, PatternB, Pass);
293 proc_IOCLFLUSH_D(TestAddr0B);
294 ResetDCTWrPtr_D(dev, index_reg, Addl_Index);
296 print_debug_dqs("\t\t\tTrainRcvEn543: Test1 result ", Test1, 3);
298 if(Test1 == DQS_PASS) {
299 CurrTestSide0 = DQS_PASS;
303 mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1); /*cache fills */
304 /* ROM vs cache compare */
305 Test0 = mct_CompareTestPatternQW0_D(pMCTstat, pDCTstat, TestAddr1, Channel, PatternA, Pass);
306 proc_IOCLFLUSH_D(TestAddr1);
307 ResetDCTWrPtr_D(dev, index_reg, Addl_Index);
309 print_debug_dqs("\t\t\tTrainRcvEn544: Test0 result ", Test0, 3);
311 if(Test0 == DQS_PASS) {
312 mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1B); /*cache fills */
313 /* ROM vs cache compare */
314 Test1 = mct_CompareTestPatternQW0_D(pMCTstat, pDCTstat, TestAddr1B, Channel, PatternB, Pass);
315 proc_IOCLFLUSH_D(TestAddr1B);
316 ResetDCTWrPtr_D(dev, index_reg, Addl_Index);
318 print_debug_dqs("\t\t\tTrainRcvEn545: Test1 result ", Test1, 3);
319 if(Test1 == DQS_PASS) {
320 CurrTestSide1 = DQS_PASS;
326 if ((CurrTestSide0 == DQS_PASS) && (CurrTestSide1 == DQS_PASS)) {
329 } else if (CurrTestSide0 == DQS_PASS) {
333 /* record first pass DqsRcvEn to stack */
334 valid = mct_SavePassRcvEnDly_D(pDCTstat, RcvrEnDly, Channel, Receiver, Pass);
336 /* Break(1:RevF,2:DR) or not(0) FIXME: This comment deosn't make sense */
337 if(valid == 2 || (LastTest == DQS_FAIL && valid == 1)) {
338 RcvrEnDlyRmin = RcvrEnDly;
344 /* swap the rank 0 pointers */
346 TestAddr0 = TestAddr0B;
349 /* swap the rank 1 pointers */
351 TestAddr1 = TestAddr1B;
354 print_debug_dqs("\t\t\tTrainRcvEn56: RcvrEnDly ", RcvrEnDly, 3);
358 } /* while RcvrEnDly */
360 print_debug_dqs("\t\tTrainRcvEn61: RcvrEnDly ", RcvrEnDly, 2);
361 print_debug_dqs("\t\tTrainRcvEn61: RcvrEnDlyRmin ", RcvrEnDlyRmin, 3);
362 print_debug_dqs("\t\tTrainRcvEn61: RcvrEnDlyLimit ", RcvrEnDlyLimit, 3);
363 if(RcvrEnDlyRmin == RcvrEnDlyLimit) {
364 /* no passing window */
365 pDCTstat->ErrStatus |= 1 << SB_NORCVREN;
366 Errors |= 1 << SB_NORCVREN;
367 pDCTstat->ErrCode = SC_FatalErr;
370 if(RcvrEnDly > (RcvrEnDlyLimit - 1)) {
371 /* passing window too narrow, too far delayed*/
372 pDCTstat->ErrStatus |= 1 << SB_SmallRCVR;
373 Errors |= 1 << SB_SmallRCVR;
374 pDCTstat->ErrCode = SC_FatalErr;
375 RcvrEnDly = RcvrEnDlyLimit - 1;
376 pDCTstat->CSTrainFail |= 1 << Receiver;
377 pDCTstat->DimmTrainFail |= 1 << (Receiver + Channel);
380 /* CHB_D0_B0_RCVRDLY set in mct_Average_RcvrEnDly_Pass */
381 mct_Average_RcvrEnDly_Pass(pDCTstat, RcvrEnDly, RcvrEnDlyLimit, Channel, Receiver, Pass);
383 mct_SetFinalRcvrEnDly_D(pDCTstat, RcvrEnDly, Final_Value, Channel, Receiver, dev, index_reg, Addl_Index, Pass);
385 if(pDCTstat->ErrStatus & (1 << SB_SmallRCVR)) {
386 Errors |= 1 << SB_SmallRCVR;
389 RcvrEnDly += Pass1MemClkDly;
390 if(RcvrEnDly > CTLRMaxDelay) {
391 CTLRMaxDelay = RcvrEnDly;
394 } /* while Receiver */
395 MaxDelay_CH[Channel] = CTLRMaxDelay;
398 CTLRMaxDelay = MaxDelay_CH[0];
399 if (MaxDelay_CH[1] > CTLRMaxDelay)
400 CTLRMaxDelay = MaxDelay_CH[1];
402 for (Channel = 0; Channel < 2; Channel++) {
403 mct_SetMaxLatency_D(pDCTstat, Channel, CTLRMaxDelay); /* program Ch A/B MaxAsyncLat to correspond with max delay */
406 ResetDCTWrPtr_D(dev, index_reg, Addl_Index);
408 if(_DisableDramECC) {
409 mct_EnableDimmEccEn_D(pMCTstat, pDCTstat, _DisableDramECC);
412 if (Pass == FirstPass) {
413 /*Disable DQSRcvrEn training mode */
414 mct_DisableDQSRcvEn_D(pDCTstat);
419 _RDMSR(msr, &lo, &hi);
420 lo &= ~(1<<17); /* restore HWCR.wrap32dis */
425 cr4 &= ~(1<<9); /* restore cr4.OSFXSR */
429 #if DQS_TRAIN_DEBUG > 0
432 printk(BIOS_DEBUG, "TrainRcvrEn: CH_MaxRdLat:\n");
433 for(Channel = 0; Channel<2; Channel++) {
434 printk(BIOS_DEBUG, "Channel:%x: %x\n",
435 Channel, pDCTstat->CH_MaxRdLat[Channel]);
440 #if DQS_TRAIN_DEBUG > 0
443 u8 Channel, Receiver;
447 printk(BIOS_DEBUG, "TrainRcvrEn: CH_D_B_RCVRDLY:\n");
448 for(Channel = 0; Channel < 2; Channel++) {
449 printk(BIOS_DEBUG, "Channel:%x\n", Channel);
450 for(Receiver = 0; Receiver<8; Receiver+=2) {
451 printk(BIOS_DEBUG, "\t\tReceiver:%x:", Receiver);
452 p = pDCTstat->CH_D_B_RCVRDLY[Channel][Receiver>>1];
455 printk(BIOS_DEBUG, "%x ", val);
457 printk(BIOS_DEBUG, "\n");
463 printk(BIOS_DEBUG, "TrainRcvrEn: Status %x\n", pDCTstat->Status);
464 printk(BIOS_DEBUG, "TrainRcvrEn: ErrStatus %x\n", pDCTstat->ErrStatus);
465 printk(BIOS_DEBUG, "TrainRcvrEn: ErrCode %x\n", pDCTstat->ErrCode);
466 printk(BIOS_DEBUG, "TrainRcvrEn: Done\n\n");
469 u8 mct_InitReceiver_D(struct DCTStatStruc *pDCTstat, u8 dct)
471 if (pDCTstat->DIMMValidDCT[dct] == 0 ) {
478 static void mct_SetFinalRcvrEnDly_D(struct DCTStatStruc *pDCTstat, u8 RcvrEnDly, u8 where, u8 Channel, u8 Receiver, u32 dev, u32 index_reg, u8 Addl_Index, u8 Pass/*, u8 *p*/)
481 * Program final DqsRcvEnDly to additional index for DQS receiver
484 mct_SetRcvrEnDly_D(pDCTstat, RcvrEnDly, where, Channel, Receiver, dev, index_reg, Addl_Index, Pass);
487 static void mct_DisableDQSRcvEn_D(struct DCTStatStruc *pDCTstat)
494 dev = pDCTstat->dev_dct;
495 if (pDCTstat->GangedMode) {
501 for (ch=0; ch<ch_end; ch++) {
502 reg = 0x78 + 0x100 * ch;
503 val = Get_NB32(dev, reg);
504 val &= ~(1 << DqsRcvEnTrain);
505 Set_NB32(dev, reg, val);
510 * Function only used once so it was inlined.
514 * Function only used once so it was inlined.
517 void mct_SetRcvrEnDly_D(struct DCTStatStruc *pDCTstat, u8 RcvrEnDly,
518 u8 FinalValue, u8 Channel, u8 Receiver, u32 dev,
519 u32 index_reg, u8 Addl_Index, u8 Pass)
526 if(RcvrEnDly == 0xFE) {
527 /*set the boudary flag */
528 pDCTstat->Status |= 1 << SB_DQSRcvLimit;
531 /* DimmOffset not needed for CH_D_B_RCVRDLY array */
532 for(i=0; i < 8; i++) {
534 /*calculate dimm offset */
535 p = pDCTstat->CH_D_B_RCVRDLY[Channel][Receiver >> 1];
539 /* if flag=0, set DqsRcvEn value to reg. */
540 /* get the register index from table */
541 index = Table_DQSRcvEn_Offset[i >> 1];
542 index += Addl_Index; /* DIMMx DqsRcvEn byte0 */
543 val = Get_NB32_index_wait(dev, index_reg, index);
546 val &= ~(0xFF << 16);
547 val |= (RcvrEnDly << 16);
553 Set_NB32_index_wait(dev, index_reg, index, val);
558 static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u8 DQSRcvEnDly)
568 if(pDCTstat->GangedMode)
571 dev = pDCTstat->dev_dct;
572 reg_off = 0x100 * Channel;
573 index_reg = 0x98 + reg_off;
575 /* Multiply the CAS Latency by two to get a number of 1/2 MEMCLKs units.*/
576 val = Get_NB32(dev, 0x88 + reg_off);
577 SubTotal = ((val & 0x0f) + 4) << 1; /* SubTotal is 1/2 Memclk unit */
579 /* If registered DIMMs are being used then
580 * add 1 MEMCLK to the sub-total.
582 val = Get_NB32(dev, 0x90 + reg_off);
583 if(!(val & (1 << UnBuffDimm)))
586 /* If the address prelaunch is setup for 1/2 MEMCLKs then
587 * add 1, else add 2 to the sub-total.
588 * if (AddrCmdSetup || CsOdtSetup || CkeSetup) then K := K + 2;
590 val = Get_NB32_index_wait(dev, index_reg, 0x04);
591 if(!(val & 0x00202020))
596 /* If the F2x[1, 0]78[RdPtrInit] field is 4, 5, 6 or 7 MEMCLKs,
597 * then add 4, 3, 2, or 1 MEMCLKs, respectively to the sub-total. */
598 val = Get_NB32(dev, 0x78 + reg_off);
599 SubTotal += 8 - (val & 0x0f);
601 /* Convert bits 7-5 (also referred to as the course delay) of
602 * the current (or worst case) DQS receiver enable delay to
603 * 1/2 MEMCLKs units, rounding up, and add this to the sub-total.
605 SubTotal += DQSRcvEnDly >> 5; /*BOZO-no rounding up */
607 /* Add 5.5 to the sub-total. 5.5 represents part of the
608 * processor specific constant delay value in the DRAM
611 SubTotal <<= 1; /*scale 1/2 MemClk to 1/4 MemClk */
612 SubTotal += 11; /*add 5.5 1/2MemClk */
614 /* Convert the sub-total (in 1/2 MEMCLKs) to northbridge
615 * clocks (NCLKs) as follows (assuming DDR400 and assuming
616 * that no P-state or link speed changes have occurred).
620 * SubTotal *= 3*(Fn2xD4[NBFid]+4)/(3+Fn2x94[MemClkFreq])/2 */
621 val = Get_NB32(dev, 0x94 + reg_off);
623 /* SubTotal div 4 to scale 1/4 MemClk back to MemClk */
631 val = Get_NB32(pDCTstat->dev_nbmisc, 0xD4);
632 SubTotal *= ((val & 0x1f) + 4 ) * 3;
635 if (SubTotal % valx) { /* round up */
639 /* Add 5 NCLKs to the sub-total. 5 represents part of the
640 * processor specific constant value in the northbridge
645 pDCTstat->CH_MaxRdLat[Channel] = SubTotal;
646 if(pDCTstat->GangedMode) {
647 pDCTstat->CH_MaxRdLat[1] = SubTotal;
650 /* Program the F2x[1, 0]78[MaxRdLatency] register with
651 * the total delay value (in NCLKs).
653 reg = 0x78 + reg_off;
654 val = Get_NB32(dev, reg);
655 val &= ~(0x3ff << 22);
656 val |= (SubTotal & 0x3ff) << 22;
658 /* program MaxRdLatency to correspond with current delay */
659 Set_NB32(dev, reg, val);
662 static u8 mct_SavePassRcvEnDly_D(struct DCTStatStruc *pDCTstat,
663 u8 rcvrEnDly, u8 Channel,
664 u8 receiver, u8 Pass)
667 u8 mask_Saved, mask_Pass;
670 /* calculate dimm offset
671 * not needed for CH_D_B_RCVRDLY array
674 /* cmp if there has new DqsRcvEnDly to be recorded */
675 mask_Pass = pDCTstat->DqsRcvEn_Pass;
677 if(Pass == SecondPass) {
678 mask_Pass = ~mask_Pass;
681 mask_Saved = pDCTstat->DqsRcvEn_Saved;
682 if(mask_Pass != mask_Saved) {
684 /* find desired stack offset according to channel/dimm/byte */
685 if(Pass == SecondPass) {
686 /* FIXME: SecondPass is never used for Barcelona p = pDCTstat->CH_D_B_RCVRDLY_1[Channel][receiver>>1]; */
687 p = 0; /* Keep the compiler happy. */
689 mask_Saved &= mask_Pass;
690 p = pDCTstat->CH_D_B_RCVRDLY[Channel][receiver>>1];
692 for(i=0; i < 8; i++) {
693 /* cmp per byte lane */
694 if(mask_Pass & (1 << i)) {
695 if(!(mask_Saved & (1 << i))) {
696 /* save RcvEnDly to stack, according to
697 the related Dimm/byte lane */
698 p[i] = (u8)rcvrEnDly;
699 mask_Saved |= 1 << i;
703 pDCTstat->DqsRcvEn_Saved = mask_Saved;
705 return mct_SaveRcvEnDly_D_1Pass(pDCTstat, Pass);
708 static u8 mct_CompareTestPatternQW0_D(struct MCTStatStruc *pMCTstat,
709 struct DCTStatStruc *pDCTstat,
710 u32 addr, u8 channel,
713 /* Compare only the first beat of data. Since target addrs are cache
714 * line aligned, the Channel parameter is used to determine which
715 * cache QW to compare.
723 if(Pass == FirstPass) {
725 test_buf = (u8 *)TestPattern1_D;
727 test_buf = (u8 *)TestPattern0_D;
729 } else { /* Second Pass */
730 test_buf = (u8 *)TestPattern2_D;
733 SetUpperFSbase(addr);
736 if((pDCTstat->Status & (1<<SB_128bitmode)) && channel ) {
737 addr += 8; /* second channel */
741 print_debug_dqs_pair("\t\t\t\t\t\t test_buf = ", (u32)test_buf, " | addr_lo = ", addr, 4);
742 for (i=0; i<8; i++, addr ++) {
743 value = read32_fs(addr);
744 print_debug_dqs_pair("\t\t\t\t\t\t\t\t ", test_buf[i], " | ", value, 4);
746 if (value == test_buf[i]) {
747 pDCTstat->DqsRcvEn_Pass |= (1<<i);
749 pDCTstat->DqsRcvEn_Pass &= ~(1<<i);
755 if (Pass == FirstPass) {
756 /* if first pass, at least one byte lane pass
757 * ,then DQS_PASS=1 and will set to related reg.
759 if(pDCTstat->DqsRcvEn_Pass != 0) {
766 /* if second pass, at least one byte lane fail
767 * ,then DQS_FAIL=1 and will set to related reg.
769 if(pDCTstat->DqsRcvEn_Pass != 0xFF) {
776 /* if second pass, we can't find the fail until FFh,
777 * then let it fail to save the final delay
779 if((Pass == SecondPass) && (pDCTstat->Status & (1 << SB_DQSRcvLimit))) {
781 pDCTstat->DqsRcvEn_Pass = 0;
784 /* second pass needs to be inverted
785 * FIXME? this could be inverted in the above code to start with...
787 if(Pass == SecondPass) {
788 if (result == DQS_PASS) {
790 } else if (result == DQS_FAIL) { /* FIXME: doesn't need to be else if */
799 static void mct_InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat,
800 struct DCTStatStruc *pDCTstat)
802 /* Initialize the DQS Positions in preparation for
803 * Reciever Enable Training.
804 * Write Position is 1/2 Memclock Delay
805 * Read Position is 1/2 Memclock Delay
809 InitDQSPos4RcvrEn_D(pMCTstat, pDCTstat, i);
813 static void InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat,
814 struct DCTStatStruc *pDCTstat, u8 Channel)
816 /* Initialize the DQS Positions in preparation for
817 * Reciever Enable Training.
818 * Write Position is no Delay
819 * Read Position is 1/2 Memclock Delay
824 u8 dn = 4; /* TODO: Rev C could be 4 */
825 u32 dev = pDCTstat->dev_dct;
826 u32 index_reg = 0x98 + 0x100 * Channel;
828 /* FIXME: add Cx support */
830 for(i=1; i<=3; i++) {
832 /* DIMM0 Write Data Timing Low */
833 /* DIMM0 Write ECC Timing */
834 Set_NB32_index_wait(dev, index_reg, i + 0x100 * j, dword);
839 for(i=5; i<=6; i++) {
841 /* DIMM0 Read DQS Timing Control Low */
842 Set_NB32_index_wait(dev, index_reg, i + 0x100 * j, dword);
847 /* DIMM0 Read DQS ECC Timing Control */
848 Set_NB32_index_wait(dev, index_reg, 7 + 0x100 * j, dword);
851 void SetEccDQSRcvrEn_D(struct DCTStatStruc *pDCTstat, u8 Channel)
860 dev = pDCTstat->dev_dct;
861 index_reg = 0x98 + Channel * 0x100;
863 p = pDCTstat->CH_D_BC_RCVRDLY[Channel];
864 print_debug_dqs("\t\tSetEccDQSRcvrPos: Channel ", Channel, 2);
865 for(ChipSel = 0; ChipSel < MAX_CS_SUPPORTED; ChipSel += 2) {
867 Set_NB32_index_wait(dev, index_reg, index, val);
868 print_debug_dqs_pair("\t\tSetEccDQSRcvrPos: ChipSel ",
869 ChipSel, " rcvr_delay ", val, 2);
874 static void CalcEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat,
875 struct DCTStatStruc *pDCTstat, u8 Channel)
882 EccDQSLike = pDCTstat->CH_EccDQSLike[Channel];
883 EccDQSScale = pDCTstat->CH_EccDQSScale[Channel];
885 for (ChipSel = 0; ChipSel < MAX_CS_SUPPORTED; ChipSel += 2) {
886 if(mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, ChipSel)) {
888 p = pDCTstat->CH_D_B_RCVRDLY[Channel][ChipSel>>1];
890 /* DQS Delay Value of Data Bytelane
891 * most like ECC byte lane */
892 val0 = p[EccDQSLike & 0x07];
893 /* DQS Delay Value of Data Bytelane
894 * 2nd most like ECC byte lane */
895 val1 = p[(EccDQSLike>>8) & 0x07];
897 if (!(pDCTstat->Status & (1 << SB_Registered))) {
905 val >>= 8; /* /256 */
917 pDCTstat->CH_D_BC_RCVRDLY[Channel][ChipSel>>1] = val;
920 SetEccDQSRcvrEn_D(pDCTstat, Channel);
923 void mctSetEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat,
924 struct DCTStatStruc *pDCTstatA)
929 for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
930 struct DCTStatStruc *pDCTstat;
931 pDCTstat = pDCTstatA + Node;
932 if (!pDCTstat->NodePresent)
934 if (pDCTstat->DCTSysLimit) {
936 CalcEccDQSRcvrEn_D(pMCTstat, pDCTstat, i);
941 void phyAssistedMemFnceTraining(struct MCTStatStruc *pMCTstat,
942 struct DCTStatStruc *pDCTstatA)
945 struct DCTStatStruc *pDCTstat;
947 /* FIXME: skip for Ax */
948 while (Node < MAX_NODES_SUPPORTED) {
949 pDCTstat = pDCTstatA + Node;
951 if(pDCTstat->DCTSysLimit) {
952 fenceDynTraining_D(pMCTstat, pDCTstat, 0);
953 fenceDynTraining_D(pMCTstat, pDCTstat, 1);
959 static void fenceDynTraining_D(struct MCTStatStruc *pMCTstat,
960 struct DCTStatStruc *pDCTstat, u8 dct)
965 u32 index_reg = 0x98 + 0x100 * dct;
968 /* BIOS first programs a seed value to the phase recovery engine
969 * (recommended 19) registers.
970 * Dram Phase Recovery Control Register (F2x[1,0]9C_x[51:50] and
973 dev = pDCTstat->dev_dct;
974 for (index = 0x50; index <= 0x52; index ++) {
975 val = Get_NB32_index_wait(dev, index_reg, index) & ~0xFF;
976 val |= (FenceTrnFinDlySeed & 0x1F);
979 val |= (val & 0xFF) << 8;
983 Set_NB32_index_wait(dev, index_reg, index, val);
986 /* Set F2x[1,0]9C_x08[PhyFenceTrEn]=1. */
987 val = Get_NB32_index_wait(dev, index_reg, 0x08);
988 val |= 1 << PhyFenceTrEn;
989 Set_NB32_index_wait(dev, index_reg, 0x08, val);
991 /* Wait 200 MEMCLKs. */
992 mct_Wait(50000); /* wait 200us */
994 /* Clear F2x[1,0]9C_x08[PhyFenceTrEn]=0. */
995 val = Get_NB32_index_wait(dev, index_reg, 0x08);
996 val &= ~(1 << PhyFenceTrEn);
997 Set_NB32_index_wait(dev, index_reg, 0x08, val);
999 /* BIOS reads the phase recovery engine registers
1000 * F2x[1,0]9C_x[51:50] and F2x[1,0]9C_x52. */
1002 for (index = 0x50; index <= 0x52; index ++) {
1003 val = Get_NB32_index_wait(dev, index_reg, index);
1004 avRecValue += val & 0x7F;
1005 if (index != 0x52) {
1006 avRecValue += (val >> 8) & 0x7F;
1007 avRecValue += (val >> 16) & 0x7F;
1008 avRecValue += (val >> 24) & 0x7F;
1012 val = avRecValue / 9;
1017 /* Write the (averaged value -8) to F2x[1,0]9C_x0C[PhyFence]. */
1018 /* inlined mct_AdjustFenceValue() */
1019 /* The RBC0 is not supported. */
1020 /* if (pDCTstat->LogicalCPUID & AMD_RB_C0)
1024 if (pDCTstat->LogicalCPUID & AMD_DR_Cx)
1026 else if (pDCTstat->LogicalCPUID & AMD_DR_Bx)
1029 val = Get_NB32_index_wait(dev, index_reg, 0x0C);
1030 val &= ~(0x1F << 16);
1031 val |= (avRecValue & 0x1F) << 16;
1032 Set_NB32_index_wait(dev, index_reg, 0x0C, val);
1034 /* Rewrite F2x[1,0]9C_x04-DRAM Address/Command Timing Control Register
1035 * delays (both channels). */
1036 val = Get_NB32_index_wait(dev, index_reg, 0x04);
1037 Set_NB32_index_wait(dev, index_reg, 0x04, val);
1040 void mct_Wait(u32 cycles)
1045 /* Wait # of 50ns cycles
1046 This seems like a hack to me... */
1048 cycles <<= 3; /* x8 (number of 1.25ns ticks) */
1050 msr = 0x10; /* TSC */
1051 _RDMSR(msr, &lo, &hi);
1054 _RDMSR(msr, &lo, &hi);
1055 } while (lo - saved < cycles );