2 * This file is part of the coreboot project.
4 * Copyright (C) 2010 Advanced Micro Devices, Inc.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; version 2 of the License.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 * Description: Max Read Latency Training feature for DDR 3 MCT
24 static u8 CompareMaxRdLatTestPattern_D(u32 pattern_buf, u32 addr);
25 static u32 GetMaxRdLatTestAddr_D(struct MCTStatStruc *pMCTstat,
26 struct DCTStatStruc *pDCTstat, u8 Channel,
27 u8 *MaxRcvrEnDly, u8 *valid);
28 u8 mct_GetStartMaxRdLat_D(struct MCTStatStruc *pMCTstat,
29 struct DCTStatStruc *pDCTstat, u8 Channel,
30 u8 DQSRcvEnDly, u32 *Margin);
31 static void maxRdLatencyTrain_D(struct MCTStatStruc *pMCTstat,
32 struct DCTStatStruc *pDCTstat);
33 static void mct_setMaxRdLatTrnVal_D(struct DCTStatStruc *pDCTstat, u8 Channel,
36 /*Warning: These must be located so they do not cross a logical 16-bit
38 static const u32 TestMaxRdLAtPattern_D[] = {
39 0x6E0E3FAC, 0x0C3CFF52,
40 0x4A688181, 0x49C5B613,
41 0x7C780BA6, 0x5C1650E3,
42 0x0C4F9D76, 0x0C6753E6,
43 0x205535A5, 0xBABFB6CA,
44 0x610E6E5F, 0x0C5F1C87,
45 0x488493CE, 0x14C9C383,
46 0xF5B9A5CD, 0x9CE8F615,
48 0xAAD714B5, 0xC38F1B4C,
49 0x72ED647C, 0x669F7562,
50 0x5233F802, 0x4A898B30,
51 0x10A40617, 0x3326B465,
52 0x55386E04, 0xC807E3D3,
53 0xAB49E193, 0x14B4E63A,
54 0x67DF2495, 0xEA517C45,
55 0x7624CE51, 0xF8140C51,
57 0x4824BD23, 0xB61DD0C9,
58 0x072BCFBE, 0xE8F3807D,
59 0x919EA373, 0x25E30C47,
60 0xFEB12958, 0x4DA80A5A,
61 0xE9A0DDF8, 0x792B0076,
62 0xE81C73DC, 0xF025B496,
63 0x1DB7E627, 0x808594FE,
64 0x82668268, 0x655C7783,
67 static u32 SetupMaxRdPattern(struct MCTStatStruc *pMCTstat,
68 struct DCTStatStruc *pDCTstat,
71 /* 1. Copy the alpha and Beta patterns from ROM to Cache,
72 * aligning on 16 byte boundary
73 * 2. Set the ptr to Cacheable copy in DCTStatstruc.PtrPatternBufA
75 * 3. Set the ptr to Cacheable copy in DCTStatstruc.PtrPatternBufB
81 buf = (u32 *)(((u32)buffer + 0x10) & (0xfffffff0));
83 for(i = 0; i < (16 * 3); i++) {
84 buf[i] = TestMaxRdLAtPattern_D[i];
90 void TrainMaxReadLatency_D(struct MCTStatStruc *pMCTstat,
91 struct DCTStatStruc *pDCTstatA)
95 for(Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
96 struct DCTStatStruc *pDCTstat;
97 pDCTstat = pDCTstatA + Node;
99 if(!pDCTstat->NodePresent)
102 if(pDCTstat->DCTSysLimit)
103 maxRdLatencyTrain_D(pMCTstat, pDCTstat);
107 static void maxRdLatencyTrain_D(struct MCTStatStruc *pMCTstat,
108 struct DCTStatStruc *pDCTstat)
112 u8 _DisableDramECC = 0, _Wrap32Dis = 0, _SSE2 = 0;
115 u32 PatternBuffer[60]; /* FIXME: why not 48 + 4 */
125 if(cr4 & (1<<9)) { /* save the old value */
128 cr4 |= (1<<9); /* OSFXSR enable SSE2 */
132 _RDMSR(addr, &lo, &hi);
133 if(lo & (1<<17)) { /* save the old value */
136 lo |= (1<<17); /* HWCR.wrap32dis */
137 lo &= ~(1<<15); /* SSEDIS */
138 /* Setting wrap32dis allows 64-bit memory references in
140 _WRMSR(addr, lo, hi);
142 _DisableDramECC = mct_DisableDimmEccEn_D(pMCTstat, pDCTstat);
144 pattern_buf = SetupMaxRdPattern(pMCTstat, pDCTstat, PatternBuffer);
146 for (Channel = 0; Channel < 2; Channel++) {
147 print_debug_dqs("\tMaxRdLatencyTrain51: Channel ",Channel, 1);
148 pDCTstat->Channel = Channel;
150 if( (pDCTstat->Status & (1 << SB_128bitmode)) && Channel)
151 break; /*if ganged mode, skip DCT 1 */
153 TestAddr0 = GetMaxRdLatTestAddr_D(pMCTstat, pDCTstat, Channel, &RcvrEnDly, &valid);
154 if(!valid) /* Address not supported on current CS */
156 /* rank 1 of DIMM, testpattern 0 */
157 WriteMaxRdLat1CLTestPattern_D(pattern_buf, TestAddr0);
159 MaxRdLatDly = mct_GetStartMaxRdLat_D(pMCTstat, pDCTstat, Channel, RcvrEnDly, &Margin);
160 print_debug_dqs("\tMaxRdLatencyTrain52: MaxRdLatDly start ", MaxRdLatDly, 2);
161 print_debug_dqs("\tMaxRdLatencyTrain52: MaxRdLatDly Margin ", Margin, 2);
162 while(MaxRdLatDly < MAX_RD_LAT) { /* sweep Delay value here */
163 mct_setMaxRdLatTrnVal_D(pDCTstat, Channel, MaxRdLatDly);
164 ReadMaxRdLat1CLTestPattern_D(TestAddr0);
165 if( CompareMaxRdLatTestPattern_D(pattern_buf, TestAddr0) == DQS_PASS)
167 SetTargetWTIO_D(TestAddr0);
168 FlushMaxRdLatTestPattern_D(TestAddr0);
172 print_debug_dqs("\tMaxRdLatencyTrain53: MaxRdLatDly end ", MaxRdLatDly, 2);
173 mct_setMaxRdLatTrnVal_D(pDCTstat, Channel, MaxRdLatDly + Margin);
176 if(_DisableDramECC) {
177 mct_EnableDimmEccEn_D(pMCTstat, pDCTstat, _DisableDramECC);
182 _RDMSR(addr, &lo, &hi);
183 lo &= ~(1<<17); /* restore HWCR.wrap32dis */
184 _WRMSR(addr, lo, hi);
188 cr4 &= ~(1<<9); /* restore cr4.OSFXSR */
192 #if DQS_TRAIN_DEBUG > 0
195 print_debug("maxRdLatencyTrain: CH_MaxRdLat:\n");
196 for(Channel = 0; Channel<2; Channel++) {
197 print_debug("Channel:"); print_debug_hex8(Channel);
199 print_debug_hex8( pDCTstat->CH_MaxRdLat[Channel] );
206 static void mct_setMaxRdLatTrnVal_D(struct DCTStatStruc *pDCTstat,
207 u8 Channel, u16 MaxRdLatVal)
214 if (pDCTstat->GangedMode) {
215 Channel = 0; /* for safe */
217 pDCTstat->CH_MaxRdLat[i] = MaxRdLatVal;
219 pDCTstat->CH_MaxRdLat[Channel] = MaxRdLatVal;
222 dev = pDCTstat->dev_dct;
223 reg = 0x78 + Channel * 0x100;
224 val = Get_NB32(dev, reg);
226 val |= MaxRdLatVal<<22;
227 /* program MaxRdLatency to correspond with current delay */
228 Set_NB32(dev, reg, val);
231 static u8 CompareMaxRdLatTestPattern_D(u32 pattern_buf, u32 addr)
233 /* Compare only the first beat of data. Since target addrs are cache
234 * line aligned, the Channel parameter is used to determine which cache
238 u32 *test_buf = (u32 *)pattern_buf;
244 SetUpperFSbase(addr);
248 for (i=0; i<(16*3); i++) {
249 val = read32_fs(addr_lo);
250 val_test = test_buf[i];
252 print_debug_dqs_pair("\t\t\t\t\t\ttest_buf = ", (u32)test_buf, " value = ", val_test, 5);
253 print_debug_dqs_pair("\t\t\t\t\t\ttaddr_lo = ", addr_lo, " value = ", val, 5);
254 if(val != val_test) {
264 static u32 GetMaxRdLatTestAddr_D(struct MCTStatStruc *pMCTstat,
265 struct DCTStatStruc *pDCTstat,
266 u8 Channel, u8 *MaxRcvrEnDly,
277 u8 ch, ch_start, ch_end;
282 if(pDCTstat->Status & (1 << SB_128bitmode)) {
287 ch_end = Channel + 1;
292 for(ch = ch_start; ch < ch_end; ch++) {
294 for(Byte = 0; Byte<bn; Byte++) {
296 tmp = pDCTstat->CH_D_B_RCVRDLY[ch][d][Byte];
299 Channel_Max = Channel;
306 if(mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel_Max, d_Max << 1)) {
307 TestAddr0 = mct_GetMCTSysAddr_D(pMCTstat, pDCTstat, Channel_Max, d_Max << 1, valid);
316 u8 mct_GetStartMaxRdLat_D(struct MCTStatStruc *pMCTstat,
317 struct DCTStatStruc *pDCTstat,
318 u8 Channel, u8 DQSRcvEnDly, u32 *Margin)
328 if(pDCTstat->GangedMode)
331 index_reg = 0x98 + 0x100 * Channel;
333 reg_off = 0x100 * Channel;
334 dev = pDCTstat->dev_dct;
336 /* Multiply the CAS Latency by two to get a number of 1/2 MEMCLKs units.*/
337 val = Get_NB32(dev, 0x88 + reg_off);
338 SubTotal = ((val & 0x0f) + 1) << 1; /* SubTotal is 1/2 Memclk unit */
340 /* If registered DIMMs are being used then add 1 MEMCLK to the sub-total*/
341 val = Get_NB32(dev, 0x90 + reg_off);
342 if(!(val & (1 << UnBuffDimm)))
345 /*If the address prelaunch is setup for 1/2 MEMCLKs then add 1,
346 * else add 2 to the sub-total. if (AddrCmdSetup || CsOdtSetup
347 * || CkeSetup) then K := K + 2; */
348 val = Get_NB32_index_wait(dev, index_reg, 0x04);
349 if(!(val & 0x00202020))
354 /* If the F2x[1, 0]78[RdPtrInit] field is 4, 5, 6 or 7 MEMCLKs,
355 * then add 4, 3, 2, or 1 MEMCLKs, respectively to the sub-total. */
356 val = Get_NB32(dev, 0x78 + reg_off);
357 SubTotal += 8 - (val & 0x0f);
359 /* Convert bits 7-5 (also referred to as the course delay) of the current
360 * (or worst case) DQS receiver enable delay to 1/2 MEMCLKs units,
361 * rounding up, and add this to the sub-total. */
362 SubTotal += DQSRcvEnDly >> 5; /*BOZO-no rounding up */
364 SubTotal <<= 1; /*scale 1/2 MemClk to 1/4 MemClk */
366 /* Convert the sub-total (in 1/2 MEMCLKs) to northbridge clocks (NCLKs)
367 * as follows (assuming DDR400 and assuming that no P-state or link speed
368 * changes have occurred). */
371 SubTotal *= 3*(Fn2xD4[NBFid]+4)/(3+Fn2x94[MemClkFreq])/2 */
372 val = Get_NB32(dev, 0x94 + reg_off);
373 /* SubTotal div 4 to scale 1/4 MemClk back to MemClk */
379 valx = (val) << 2; /* SubTotal div 4 to scale 1/4 MemClk back to MemClk */
381 val = Get_NB32(pDCTstat->dev_nbmisc, 0xD4);
382 val = ((val & 0x1f) + 4 ) * 3;
384 /* Calculate 1 MemClk + 1 NCLK delay in NCLKs for margin */
388 valxx++; /* round up */
389 valxx++; /* add 1NCLK */
390 *Margin = valxx; /* one MemClk delay in NCLKs and one additional NCLK */
396 val++; /* round up */