Fix ECC disable option for AMD Fam10 DDR2 and DDR3.
[coreboot.git] / src / northbridge / amd / amdmct / mct / mctdqs_d.c
1 /*
2  * This file is part of the coreboot project.
3  *
4  * Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; version 2 of the License.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
18  */
19
20
21 static void CalcEccDQSPos_D(struct MCTStatStruc *pMCTstat,
22                                 struct DCTStatStruc *pDCTstat, u16 like,
23                                 u8 scale, u8 ChipSel);
24 static void GetDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat,
25                                 struct DCTStatStruc *pDCTstat, u8 ChipSel);
26 static u8 MiddleDQS_D(u8 min, u8 max);
27 static void TrainReadDQS_D(struct MCTStatStruc *pMCTstat,
28                                 struct DCTStatStruc *pDCTstat,
29                                 u8 cs_start);
30 static void TrainWriteDQS_D(struct MCTStatStruc *pMCTstat,
31                                 struct DCTStatStruc *pDCTstat,
32                                 u8 cs_start);
33 static void WriteDQSTestPattern_D(struct MCTStatStruc *pMCTstat,
34                                         struct DCTStatStruc *pDCTstat,
35                                         u32 TestAddr_lo);
36 static void WriteL18TestPattern_D(struct DCTStatStruc *pDCTstat,
37                                         u32 TestAddr_lo);
38 static void WriteL9TestPattern_D(struct DCTStatStruc *pDCTstat,
39                                         u32 TestAddr_lo);
40 static u8 CompareDQSTestPattern_D(struct MCTStatStruc *pMCTstat,
41                                         struct DCTStatStruc *pDCTstat,
42                                         u32 addr_lo);
43 static void FlushDQSTestPattern_D(struct DCTStatStruc *pDCTstat,
44                                         u32 addr_lo);
45 static void SetTargetWTIO_D(u32 TestAddr);
46 static void ResetTargetWTIO_D(void);
47 static void ReadDQSTestPattern_D(struct MCTStatStruc *pMCTstat,
48                                         struct DCTStatStruc *pDCTstat,
49                                         u32 TestAddr_lo);
50 void ResetDCTWrPtr_D(u32 dev, u32 index_reg, u32 index);
51 u8 mct_DisableDimmEccEn_D(struct MCTStatStruc *pMCTstat,
52                                 struct DCTStatStruc *pDCTstat);
53 static void mct_SetDQSDelayCSR_D(struct MCTStatStruc *pMCTstat,
54                                         struct DCTStatStruc *pDCTstat,
55                                         u8 ChipSel);
56 static void mct_SetDQSDelayAllCSR_D(struct MCTStatStruc *pMCTstat,
57                                         struct DCTStatStruc *pDCTstat,
58                                         u8 cs_start);
59 u32 mct_GetMCTSysAddr_D(struct MCTStatStruc *pMCTstat,
60                                 struct DCTStatStruc *pDCTstat, u8 Channel,
61                                 u8 receiver, u8 *valid);
62 static void SetupDqsPattern_D(struct MCTStatStruc *pMCTstat,
63                                 struct DCTStatStruc *pDCTstat,
64                                 u32 *buffer);
65
66 #define DQS_TRAIN_DEBUG 0
67
68 static void print_debug_dqs(const char *str, u32 val, u8 level)
69 {
70 #if DQS_TRAIN_DEBUG > 0
71         if (DQS_TRAIN_DEBUG >= level) {
72                 printk(BIOS_DEBUG, "%s%x\n", str, val);
73         }
74 #endif
75 }
76
77 static void print_debug_dqs_pair(const char *str, u32 val, const char *str2, u32 val2, u8 level)
78 {
79 #if DQS_TRAIN_DEBUG > 0
80         if (DQS_TRAIN_DEBUG >= level) {
81                 printk(BIOS_DEBUG, "%s%08x%s%08x\n", str, val, str2, val2);
82         }
83 #endif
84 }
85
86 /*Warning:  These must be located so they do not cross a logical 16-bit segment boundary!*/
87 const static u32 TestPatternJD1a_D[] = {
88         0x00000000,0x00000000,0xFFFFFFFF,0xFFFFFFFF, /* QW0-1, ALL-EVEN */
89         0x00000000,0x00000000,0x00000000,0x00000000, /* QW2-3, ALL-EVEN */
90         0x00000000,0x00000000,0xFFFFFFFF,0xFFFFFFFF, /* QW4-5, ALL-EVEN */
91         0x00000000,0x00000000,0x00000000,0x00000000, /* QW6-7, ALL-EVEN */
92         0xFeFeFeFe,0xFeFeFeFe,0x01010101,0x01010101, /* QW0-1, DQ0-ODD */
93         0xFeFeFeFe,0xFeFeFeFe,0x01010101,0x01010101, /* QW2-3, DQ0-ODD */
94         0x01010101,0x01010101,0xFeFeFeFe,0xFeFeFeFe, /* QW4-5, DQ0-ODD */
95         0xFeFeFeFe,0xFeFeFeFe,0x01010101,0x01010101, /* QW6-7, DQ0-ODD */
96         0x02020202,0x02020202,0x02020202,0x02020202, /* QW0-1, DQ1-ODD */
97         0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, /* QW2-3, DQ1-ODD */
98         0xFdFdFdFd,0xFdFdFdFd,0x02020202,0x02020202, /* QW4-5, DQ1-ODD */
99         0x02020202,0x02020202,0x02020202,0x02020202, /* QW6-7, DQ1-ODD */
100         0x04040404,0x04040404,0xfBfBfBfB,0xfBfBfBfB, /* QW0-1, DQ2-ODD */
101         0x04040404,0x04040404,0x04040404,0x04040404, /* QW2-3, DQ2-ODD */
102         0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, /* QW4-5, DQ2-ODD */
103         0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, /* QW6-7, DQ2-ODD */
104         0x08080808,0x08080808,0xF7F7F7F7,0xF7F7F7F7, /* QW0-1, DQ3-ODD */
105         0x08080808,0x08080808,0x08080808,0x08080808, /* QW2-3, DQ3-ODD */
106         0xF7F7F7F7,0xF7F7F7F7,0x08080808,0x08080808, /* QW4-5, DQ3-ODD */
107         0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, /* QW6-7, DQ3-ODD */
108         0x10101010,0x10101010,0x10101010,0x10101010, /* QW0-1, DQ4-ODD */
109         0xeFeFeFeF,0xeFeFeFeF,0x10101010,0x10101010, /* QW2-3, DQ4-ODD */
110         0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, /* QW4-5, DQ4-ODD */
111         0xeFeFeFeF,0xeFeFeFeF,0x10101010,0x10101010, /* QW6-7, DQ4-ODD */
112         0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, /* QW0-1, DQ5-ODD */
113         0xdFdFdFdF,0xdFdFdFdF,0x20202020,0x20202020, /* QW2-3, DQ5-ODD */
114         0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, /* QW4-5, DQ5-ODD */
115         0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, /* QW6-7, DQ5-ODD */
116         0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, /* QW0-1, DQ6-ODD */
117         0x40404040,0x40404040,0xBfBfBfBf,0xBfBfBfBf, /* QW2-3, DQ6-ODD */
118         0x40404040,0x40404040,0xBfBfBfBf,0xBfBfBfBf, /* QW4-5, DQ6-ODD */
119         0x40404040,0x40404040,0xBfBfBfBf,0xBfBfBfBf, /* QW6-7, DQ6-ODD */
120         0x80808080,0x80808080,0x7F7F7F7F,0x7F7F7F7F, /* QW0-1, DQ7-ODD */
121         0x80808080,0x80808080,0x7F7F7F7F,0x7F7F7F7F, /* QW2-3, DQ7-ODD */
122         0x80808080,0x80808080,0x7F7F7F7F,0x7F7F7F7F, /* QW4-5, DQ7-ODD */
123         0x80808080,0x80808080,0x80808080,0x80808080  /* QW6-7, DQ7-ODD */
124 };
125 const static u32 TestPatternJD1b_D[] = {
126         0x00000000,0x00000000,0x00000000,0x00000000, /* QW0,CHA-B, ALL-EVEN */
127         0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, /* QW1,CHA-B, ALL-EVEN */
128         0x00000000,0x00000000,0x00000000,0x00000000, /* QW2,CHA-B, ALL-EVEN */
129         0x00000000,0x00000000,0x00000000,0x00000000, /* QW3,CHA-B, ALL-EVEN */
130         0x00000000,0x00000000,0x00000000,0x00000000, /* QW4,CHA-B, ALL-EVEN */
131         0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, /* QW5,CHA-B, ALL-EVEN */
132         0x00000000,0x00000000,0x00000000,0x00000000, /* QW6,CHA-B, ALL-EVEN */
133         0x00000000,0x00000000,0x00000000,0x00000000, /* QW7,CHA-B, ALL-EVEN */
134         0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, /* QW0,CHA-B, DQ0-ODD */
135         0x01010101,0x01010101,0x01010101,0x01010101, /* QW1,CHA-B, DQ0-ODD */
136         0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, /* QW2,CHA-B, DQ0-ODD */
137         0x01010101,0x01010101,0x01010101,0x01010101, /* QW3,CHA-B, DQ0-ODD */
138         0x01010101,0x01010101,0x01010101,0x01010101, /* QW4,CHA-B, DQ0-ODD */
139         0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, /* QW5,CHA-B, DQ0-ODD */
140         0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, /* QW6,CHA-B, DQ0-ODD */
141         0x01010101,0x01010101,0x01010101,0x01010101, /* QW7,CHA-B, DQ0-ODD */
142         0x02020202,0x02020202,0x02020202,0x02020202, /* QW0,CHA-B, DQ1-ODD */
143         0x02020202,0x02020202,0x02020202,0x02020202, /* QW1,CHA-B, DQ1-ODD */
144         0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, /* QW2,CHA-B, DQ1-ODD */
145         0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, /* QW3,CHA-B, DQ1-ODD */
146         0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, /* QW4,CHA-B, DQ1-ODD */
147         0x02020202,0x02020202,0x02020202,0x02020202, /* QW5,CHA-B, DQ1-ODD */
148         0x02020202,0x02020202,0x02020202,0x02020202, /* QW6,CHA-B, DQ1-ODD */
149         0x02020202,0x02020202,0x02020202,0x02020202, /* QW7,CHA-B, DQ1-ODD */
150         0x04040404,0x04040404,0x04040404,0x04040404, /* QW0,CHA-B, DQ2-ODD */
151         0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, /* QW1,CHA-B, DQ2-ODD */
152         0x04040404,0x04040404,0x04040404,0x04040404, /* QW2,CHA-B, DQ2-ODD */
153         0x04040404,0x04040404,0x04040404,0x04040404, /* QW3,CHA-B, DQ2-ODD */
154         0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, /* QW4,CHA-B, DQ2-ODD */
155         0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, /* QW5,CHA-B, DQ2-ODD */
156         0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, /* QW6,CHA-B, DQ2-ODD */
157         0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, /* QW7,CHA-B, DQ2-ODD */
158         0x08080808,0x08080808,0x08080808,0x08080808, /* QW0,CHA-B, DQ3-ODD */
159         0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, /* QW1,CHA-B, DQ3-ODD */
160         0x08080808,0x08080808,0x08080808,0x08080808, /* QW2,CHA-B, DQ3-ODD */
161         0x08080808,0x08080808,0x08080808,0x08080808, /* QW3,CHA-B, DQ3-ODD */
162         0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, /* QW4,CHA-B, DQ3-ODD */
163         0x08080808,0x08080808,0x08080808,0x08080808, /* QW5,CHA-B, DQ3-ODD */
164         0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, /* QW6,CHA-B, DQ3-ODD */
165         0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, /* QW7,CHA-B, DQ3-ODD */
166         0x10101010,0x10101010,0x10101010,0x10101010, /* QW0,CHA-B, DQ4-ODD */
167         0x10101010,0x10101010,0x10101010,0x10101010, /* QW1,CHA-B, DQ4-ODD */
168         0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, /* QW2,CHA-B, DQ4-ODD */
169         0x10101010,0x10101010,0x10101010,0x10101010, /* QW3,CHA-B, DQ4-ODD */
170         0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, /* QW4,CHA-B, DQ4-ODD */
171         0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, /* QW5,CHA-B, DQ4-ODD */
172         0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, /* QW6,CHA-B, DQ4-ODD */
173         0x10101010,0x10101010,0x10101010,0x10101010, /* QW7,CHA-B, DQ4-ODD */
174         0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, /* QW0,CHA-B, DQ5-ODD */
175         0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, /* QW1,CHA-B, DQ5-ODD */
176         0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, /* QW2,CHA-B, DQ5-ODD */
177         0x20202020,0x20202020,0x20202020,0x20202020, /* QW3,CHA-B, DQ5-ODD */
178         0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, /* QW4,CHA-B, DQ5-ODD */
179         0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, /* QW5,CHA-B, DQ5-ODD */
180         0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, /* QW6,CHA-B, DQ5-ODD */
181         0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, /* QW7,CHA-B, DQ5-ODD */
182         0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, /* QW0,CHA-B, DQ6-ODD */
183         0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, /* QW1,CHA-B, DQ6-ODD */
184         0x40404040,0x40404040,0x40404040,0x40404040, /* QW2,CHA-B, DQ6-ODD */
185         0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, /* QW3,CHA-B, DQ6-ODD */
186         0x40404040,0x40404040,0x40404040,0x40404040, /* QW4,CHA-B, DQ6-ODD */
187         0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, /* QW5,CHA-B, DQ6-ODD */
188         0x40404040,0x40404040,0x40404040,0x40404040, /* QW6,CHA-B, DQ6-ODD */
189         0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, /* QW7,CHA-B, DQ6-ODD */
190         0x80808080,0x80808080,0x80808080,0x80808080, /* QW0,CHA-B, DQ7-ODD */
191         0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F, /* QW1,CHA-B, DQ7-ODD */
192         0x80808080,0x80808080,0x80808080,0x80808080, /* QW2,CHA-B, DQ7-ODD */
193         0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F, /* QW3,CHA-B, DQ7-ODD */
194         0x80808080,0x80808080,0x80808080,0x80808080, /* QW4,CHA-B, DQ7-ODD */
195         0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F, /* QW5,CHA-B, DQ7-ODD */
196         0x80808080,0x80808080,0x80808080,0x80808080, /* QW6,CHA-B, DQ7-ODD */
197         0x80808080,0x80808080,0x80808080,0x80808080  /* QW7,CHA-B, DQ7-ODD */
198 };
199
200 const u8 Table_DQSRcvEn_Offset[] = {0x00,0x01,0x10,0x11};
201
202
203 void TrainReceiverEn_D(struct MCTStatStruc *pMCTstat,
204                         struct DCTStatStruc *pDCTstatA, u8 Pass)
205 {
206         u8 Node;
207         struct DCTStatStruc *pDCTstat;
208
209         for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
210                 pDCTstat = pDCTstatA + Node;
211
212 /*FIXME: needed?                if (!pDCTstat->NodePresent)
213                         break;
214 */
215                 if (pDCTstat->DCTSysLimit) {
216                         mct_TrainRcvrEn_D(pMCTstat, pDCTstat, Pass);
217                 }
218         }
219 }
220
221
222 static void SetEccDQSRdWrPos_D(struct MCTStatStruc *pMCTstat,
223                                 struct DCTStatStruc *pDCTstat, u8 ChipSel)
224 {
225         u8 channel;
226         u8 direction;
227
228         for (channel = 0; channel < 2; channel++){
229                 for (direction = 0; direction < 2; direction++) {
230                         pDCTstat->Channel = channel;    /* Channel A or B */
231                         pDCTstat->Direction = direction; /* Read or write */
232                         CalcEccDQSPos_D(pMCTstat, pDCTstat, pDCTstat->CH_EccDQSLike[channel], pDCTstat->CH_EccDQSScale[channel], ChipSel);
233                         print_debug_dqs_pair("\t\tSetEccDQSRdWrPos: channel ", channel, direction==DQS_READDIR? " R dqs_delay":" W dqs_delay",  pDCTstat->DQSDelay, 2);
234                         pDCTstat->ByteLane = 8;
235                         StoreDQSDatStrucVal_D(pMCTstat, pDCTstat, ChipSel);
236                         mct_SetDQSDelayCSR_D(pMCTstat, pDCTstat, ChipSel);
237                 }
238         }
239 }
240
241
242
243 static void CalcEccDQSPos_D(struct MCTStatStruc *pMCTstat,
244                                 struct DCTStatStruc *pDCTstat,
245                                 u16 like, u8 scale, u8 ChipSel)
246 {
247         u8 DQSDelay0, DQSDelay1;
248         u16 DQSDelay;
249
250         pDCTstat->ByteLane = like & 0xff;
251         GetDQSDatStrucVal_D(pMCTstat, pDCTstat, ChipSel);
252         DQSDelay0 = pDCTstat->DQSDelay;
253
254         pDCTstat->ByteLane = (like >> 8) & 0xff;
255         GetDQSDatStrucVal_D(pMCTstat, pDCTstat, ChipSel);
256         DQSDelay1 = pDCTstat->DQSDelay;
257
258         if (DQSDelay0>DQSDelay1) {
259                 DQSDelay = DQSDelay0 - DQSDelay1;
260         } else {
261                 DQSDelay = DQSDelay1 - DQSDelay0;
262         }
263
264         DQSDelay = DQSDelay * (~scale);
265
266         DQSDelay += 0x80;       // round it
267
268         DQSDelay >>= 8;         // /256
269
270         if (DQSDelay0>DQSDelay1) {
271                 DQSDelay = DQSDelay1 - DQSDelay;
272         } else {
273                 DQSDelay += DQSDelay1;
274         }
275
276         pDCTstat->DQSDelay = (u8)DQSDelay;
277 }
278
279
280 static void TrainDQSRdWrPos_D(struct MCTStatStruc *pMCTstat,
281                                 struct DCTStatStruc *pDCTstat,
282                                 u8 cs_start)
283 {
284         u32 Errors;
285         u8 Channel, DQSWrDelay;
286         u8 _DisableDramECC = 0;
287         u32 PatternBuffer[292];
288         u8 _Wrap32Dis = 0, _SSE2 = 0;
289         u8 dqsWrDelay_end;
290
291         u32 addr;
292         u32 cr4;
293         u32 lo, hi;
294
295         print_debug_dqs("\nTrainDQSRdWrPos: Node_ID ", pDCTstat->Node_ID, 0);
296         cr4 = read_cr4();
297         if (cr4 & (1<<9)) {
298                 _SSE2 = 1;
299         }
300         cr4 |= (1<<9);          /* OSFXSR enable SSE2 */
301         write_cr4(cr4);
302
303         addr = HWCR;
304         _RDMSR(addr, &lo, &hi);
305         if (lo & (1<<17)) {
306                 _Wrap32Dis = 1;
307         }
308         lo |= (1<<17);          /* HWCR.wrap32dis */
309         _WRMSR(addr, lo, hi);   /* allow 64-bit memory references in real mode */
310
311         /* Disable ECC correction of reads on the dram bus. */
312         _DisableDramECC = mct_DisableDimmEccEn_D(pMCTstat, pDCTstat);
313
314         SetupDqsPattern_D(pMCTstat, pDCTstat, PatternBuffer);
315
316         /* mct_BeforeTrainDQSRdWrPos_D */
317         dqsWrDelay_end = 0x20;
318
319         Errors = 0;
320         for (Channel = 0; Channel < 2; Channel++) {
321                 print_debug_dqs("\tTrainDQSRdWrPos: 1 Channel ",Channel, 1);
322                 pDCTstat->Channel = Channel;
323
324                 if (pDCTstat->DIMMValidDCT[Channel] == 0)       /* mct_BeforeTrainDQSRdWrPos_D */
325                         continue;
326
327                 for ( DQSWrDelay = 0; DQSWrDelay < dqsWrDelay_end; DQSWrDelay++) {
328                         pDCTstat->DQSDelay = DQSWrDelay;
329                         pDCTstat->Direction = DQS_WRITEDIR;
330                         mct_SetDQSDelayAllCSR_D(pMCTstat, pDCTstat, cs_start);
331
332                         print_debug_dqs("\t\tTrainDQSRdWrPos: 21 DQSWrDelay ", DQSWrDelay, 2);
333                         TrainReadDQS_D(pMCTstat, pDCTstat, cs_start);
334
335                         print_debug_dqs("\t\tTrainDQSRdWrPos: 22 TrainErrors ",pDCTstat->TrainErrors, 2);
336                         if (pDCTstat->TrainErrors == 0) {
337                                         break;
338                         }
339                         Errors |= pDCTstat->TrainErrors;
340                 }
341                 if (DQSWrDelay < dqsWrDelay_end) {
342                         Errors = 0;
343
344                         print_debug_dqs("\tTrainDQSRdWrPos: 231 DQSWrDelay ", DQSWrDelay, 1);
345                         TrainWriteDQS_D(pMCTstat, pDCTstat, cs_start);
346                 }
347                 print_debug_dqs("\tTrainDQSRdWrPos: 232 Errors ", Errors, 1);
348                 pDCTstat->ErrStatus |= Errors;
349         }
350
351 #if DQS_TRAIN_DEBUG > 0
352         {
353                 u8 val;
354                 u8 i;
355                 u8 Channel, Receiver, Dir;
356                 u8 *p;
357
358                 for (Dir = 0; Dir < 2; Dir++) {
359                         if (Dir == 0) {
360                                 print_debug("TrainDQSRdWrPos: CH_D_DIR_B_DQS WR:\n");
361                         } else {
362                                 print_debug("TrainDQSRdWrPos: CH_D_DIR_B_DQS RD:\n");
363                         }
364                         for (Channel = 0; Channel < 2; Channel++) {
365                                 print_debug("Channel:"); print_debug_hex8(Channel); print_debug("\n");
366                                 for (Receiver = cs_start; Receiver < (cs_start + 2); Receiver += 2) {
367                                         print_debug("\t\tReceiver:"); print_debug_hex8(Receiver);
368                                         p = pDCTstat->CH_D_DIR_B_DQS[Channel][Receiver >> 1][Dir];
369                                         print_debug(": ");
370                                         for (i=0;i<8; i++) {
371                                                 val  = p[i];
372                                                 print_debug_hex8(val);
373                                                 print_debug(" ");
374                                         }
375                                         print_debug("\n");
376                                 }
377                         }
378                 }
379
380         }
381 #endif
382
383         if (_DisableDramECC) {
384                 mct_EnableDimmEccEn_D(pMCTstat, pDCTstat, _DisableDramECC);
385         }
386         if (!_Wrap32Dis) {
387                 addr = HWCR;
388                 _RDMSR(addr, &lo, &hi);
389                 lo &= ~(1<<17);         /* restore HWCR.wrap32dis */
390                 _WRMSR(addr, lo, hi);
391         }
392         if (!_SSE2){
393                 cr4 = read_cr4();
394                 cr4 &= ~(1<<9);         /* restore cr4.OSFXSR */
395                 write_cr4(cr4);
396         }
397
398         print_tx("TrainDQSRdWrPos: Status ", pDCTstat->Status);
399         print_tx("TrainDQSRdWrPos: TrainErrors ", pDCTstat->TrainErrors);
400         print_tx("TrainDQSRdWrPos: ErrStatus ", pDCTstat->ErrStatus);
401         print_tx("TrainDQSRdWrPos: ErrCode ", pDCTstat->ErrCode);
402         print_t("TrainDQSRdWrPos: Done\n");
403 }
404
405
406 static void SetupDqsPattern_D(struct MCTStatStruc *pMCTstat,
407                                 struct DCTStatStruc *pDCTstat, u32 *buffer)
408 {
409         /* 1. Set the Pattern type (0 or 1) in DCTStatstruc.Pattern
410          * 2. Copy the pattern from ROM to Cache, aligning on 16 byte boundary
411          * 3. Set the ptr to Cacheable copy in DCTStatstruc.PtrPatternBufA
412          */
413
414         u32 *buf;
415         u16 i;
416
417         buf = (u32 *)(((u32)buffer + 0x10) & (0xfffffff0));
418         if (pDCTstat->Status & (1 << SB_128bitmode)) {
419                 pDCTstat->Pattern = 1;  /* 18 cache lines, alternating qwords */
420                 for (i=0; i<16*18; i++)
421                         buf[i] = TestPatternJD1b_D[i];
422         } else {
423                 pDCTstat->Pattern = 0;  /* 9 cache lines, sequential qwords */
424                 for (i=0; i<16*9; i++)
425                         buf[i] = TestPatternJD1a_D[i];
426         }
427         pDCTstat->PtrPatternBufA = (u32)buf;
428 }
429
430
431 static void TrainDQSPos_D(struct MCTStatStruc *pMCTstat,
432                                 struct DCTStatStruc *pDCTstat,
433                                 u8 cs_start)
434 {
435         u32 Errors;
436         u8 ChipSel, DQSDelay;
437         u8 RnkDlySeqPassMin,RnkDlySeqPassMax, RnkDlyFilterMin, RnkDlyFilterMax;
438         u8 LastTest;
439         u32 TestAddr;
440         u8 ByteLane;
441         u8 MutualCSPassW[64];
442         u8 BanksPresent;
443         u8 dqsDelay_end;
444         u8 tmp, valid;
445
446 //      print_tx("TrainDQSPos: Node_ID", pDCTstat->Node_ID);
447 //      print_tx("TrainDQSPos: Direction", pDCTstat->Direction);
448
449         /* MutualCSPassW: each byte represents a bitmap of pass/fail per
450          * ByteLane.  The indext within MutualCSPassW is the delay value
451          * given the results.
452          */
453
454
455         print_debug_dqs("\t\t\tTrainDQSPos begin ", 0, 3);
456
457         Errors = 0;
458         BanksPresent = 0;
459
460         if (pDCTstat->Direction == DQS_READDIR) {
461                 dqsDelay_end = 64;
462                 mct_AdjustDelayRange_D(pMCTstat, pDCTstat, &dqsDelay_end);
463         } else {
464                 dqsDelay_end = 32;
465         }
466
467         /* Bitmapped status per delay setting, 0xff=All positions
468          * passing (1= PASS). Set the entire array.
469          */
470         for (DQSDelay=0; DQSDelay<64; DQSDelay++) {
471                 MutualCSPassW[DQSDelay] = 0xFF;
472         }
473
474         for (ChipSel = cs_start; ChipSel < (cs_start + 2); ChipSel++) { /* logical register chipselects 0..7 */
475                 print_debug_dqs("\t\t\t\tTrainDQSPos: 11 ChipSel ", ChipSel, 4);
476
477                 if (!mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, pDCTstat->Channel, ChipSel)) {
478                         print_debug_dqs("\t\t\t\tmct_RcvrRankEnabled_D CS not enabled ", ChipSel, 4);
479                         continue;
480                 }
481
482                 BanksPresent = 1;       /* flag for at least one bank is present */
483                 TestAddr = mct_GetMCTSysAddr_D(pMCTstat, pDCTstat, pDCTstat->Channel, ChipSel, &valid);
484                 if (!valid) {
485                         print_debug_dqs("\t\t\t\tAddress not supported on current CS ", TestAddr, 4);
486                         continue;
487                 }
488
489                 print_debug_dqs("\t\t\t\tTrainDQSPos: 12 TestAddr ", TestAddr, 4);
490                 SetUpperFSbase(TestAddr);       /* fs:eax=far ptr to target */
491
492                 if (pDCTstat->Direction == DQS_READDIR) {
493                         print_debug_dqs("\t\t\t\tTrainDQSPos: 13 for read ", 0, 4);
494                         WriteDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr << 8);
495                 }
496
497                 for (DQSDelay = 0; DQSDelay < dqsDelay_end; DQSDelay++) {
498                         print_debug_dqs("\t\t\t\t\tTrainDQSPos: 141 DQSDelay ", DQSDelay, 5);
499                         if (MutualCSPassW[DQSDelay] == 0)
500                                 continue; //skip current delay value if other chipselects have failed all 8 bytelanes
501                         pDCTstat->DQSDelay = DQSDelay;
502                         mct_SetDQSDelayAllCSR_D(pMCTstat, pDCTstat, cs_start);
503                         print_debug_dqs("\t\t\t\t\tTrainDQSPos: 142 MutualCSPassW ", MutualCSPassW[DQSDelay], 5);
504
505                         if (pDCTstat->Direction == DQS_WRITEDIR) {
506                                 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 143 for write", 0, 5);
507                                 WriteDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr << 8);
508                         }
509
510                         print_debug_dqs("\t\t\t\t\tTrainDQSPos: 144 Pattern ", pDCTstat->Pattern, 5);
511                         ReadDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr << 8);
512                         /* print_debug_dqs("\t\t\t\t\tTrainDQSPos: 145 MutualCSPassW ", MutualCSPassW[DQSDelay], 5); */
513                         tmp = CompareDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr << 8); /* 0=fail, 1=pass */
514
515                         if (mct_checkFenceHoleAdjust_D(pMCTstat, pDCTstat, DQSDelay, ChipSel, &tmp)) {
516                                 goto skipLocMiddle;
517                         }
518
519                         MutualCSPassW[DQSDelay] &= tmp;
520                         print_debug_dqs("\t\t\t\t\tTrainDQSPos: 146 \tMutualCSPassW ", MutualCSPassW[DQSDelay], 5);
521
522                         SetTargetWTIO_D(TestAddr);
523                         FlushDQSTestPattern_D(pDCTstat, TestAddr << 8);
524                         ResetTargetWTIO_D();
525                 }
526
527         }
528
529         if (BanksPresent) {
530                 for (ByteLane = 0; ByteLane < 8; ByteLane++) {
531                         print_debug_dqs("\t\t\t\tTrainDQSPos: 31 ByteLane ",ByteLane, 4);
532                         pDCTstat->ByteLane = ByteLane;
533                         LastTest = DQS_FAIL;            /* Analyze the results */
534                         RnkDlySeqPassMin = 0;
535                         RnkDlySeqPassMax = 0;
536                         RnkDlyFilterMax = 0;
537                         RnkDlyFilterMin = 0;
538                         for (DQSDelay = 0; DQSDelay < dqsDelay_end; DQSDelay++) {
539                                 if (MutualCSPassW[DQSDelay] & (1 << ByteLane)) {
540                                         print_debug_dqs("\t\t\t\t\tTrainDQSPos: 321 DQSDelay ", DQSDelay, 5);
541                                         print_debug_dqs("\t\t\t\t\tTrainDQSPos: 322 MutualCSPassW ", MutualCSPassW[DQSDelay], 5);
542
543                                         RnkDlySeqPassMax = DQSDelay;
544                                         if (LastTest == DQS_FAIL) {
545                                                 RnkDlySeqPassMin = DQSDelay; //start sequential run
546                                         }
547                                         if ((RnkDlySeqPassMax - RnkDlySeqPassMin)>(RnkDlyFilterMax-RnkDlyFilterMin)){
548                                                 RnkDlyFilterMin = RnkDlySeqPassMin;
549                                                 RnkDlyFilterMax = RnkDlySeqPassMax;
550                                         }
551                                         LastTest = DQS_PASS;
552                                 } else {
553                                         LastTest = DQS_FAIL;
554                                 }
555                         }
556                         print_debug_dqs("\t\t\t\tTrainDQSPos: 33 RnkDlySeqPassMax ", RnkDlySeqPassMax, 4);
557                         if (RnkDlySeqPassMax == 0) {
558                                 Errors |= 1 << SB_NODQSPOS; /* no passing window */
559                         } else {
560                                 print_debug_dqs_pair("\t\t\t\tTrainDQSPos: 34 RnkDlyFilter: ", RnkDlyFilterMin, " ",  RnkDlyFilterMax, 4);
561                                 if (((RnkDlyFilterMax - RnkDlyFilterMin) < MIN_DQS_WNDW)){
562                                         Errors |= 1 << SB_SMALLDQS;
563                                 } else {
564                                         u8 middle_dqs;
565                                         /* mctEngDQSwindow_Save_D Not required for arrays */
566                                         middle_dqs = MiddleDQS_D(RnkDlyFilterMin, RnkDlyFilterMax);
567                                         pDCTstat->DQSDelay = middle_dqs;
568                                         mct_SetDQSDelayCSR_D(pMCTstat, pDCTstat, cs_start);  /* load the register with the value */
569                                         StoreDQSDatStrucVal_D(pMCTstat, pDCTstat, cs_start); /* store the value into the data structure */
570                                         print_debug_dqs("\t\t\t\tTrainDQSPos: 42 middle_dqs : ",middle_dqs, 4);
571                                 }
572                         }
573                 }
574         }
575 skipLocMiddle:
576         pDCTstat->TrainErrors = Errors;
577
578         print_debug_dqs("\t\t\tTrainDQSPos: Errors ", Errors, 3);
579
580 }
581
582
583 void StoreDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat,
584                         struct DCTStatStruc *pDCTstat, u8 ChipSel)
585 {
586         /* Store the DQSDelay value, found during a training sweep, into the DCT
587          * status structure for this node
588          */
589
590
591         /* When 400, 533, 667, it will support dimm0/1/2/3,
592          * and set conf for dimm0, hw will copy to dimm1/2/3
593          * set for dimm1, hw will copy to dimm3
594          * Rev A/B only support DIMM0/1 when 800Mhz and above + 0x100 to next dimm
595          * Rev C support DIMM0/1/2/3 when 800Mhz and above  + 0x100 to next dimm
596          */
597
598         /* FindDQSDatDimmVal_D is not required since we use an array */
599         u8 dn = 0;
600
601         if (pDCTstat->Status & (1 << SB_Over400MHz))
602                 dn = ChipSel>>1; /* if odd or even logical DIMM */
603
604         pDCTstat->CH_D_DIR_B_DQS[pDCTstat->Channel][dn][pDCTstat->Direction][pDCTstat->ByteLane] =
605                                         pDCTstat->DQSDelay;
606 }
607
608
609 static void GetDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat,
610                                 struct DCTStatStruc *pDCTstat, u8 ChipSel)
611 {
612         u8 dn = 0;
613
614
615         /* When 400, 533, 667, it will support dimm0/1/2/3,
616          * and set conf for dimm0, hw will copy to dimm1/2/3
617          * set for dimm1, hw will copy to dimm3
618          * Rev A/B only support DIMM0/1 when 800Mhz and above + 0x100 to next dimm
619          * Rev C support DIMM0/1/2/3 when 800Mhz and above  + 0x100 to next dimm
620          */
621
622         /* FindDQSDatDimmVal_D is not required since we use an array */
623         if (pDCTstat->Status & (1<<SB_Over400MHz))
624                 dn = ChipSel >> 1; /*if odd or even logical DIMM */
625
626         pDCTstat->DQSDelay =
627                 pDCTstat->CH_D_DIR_B_DQS[pDCTstat->Channel][dn][pDCTstat->Direction][pDCTstat->ByteLane];
628 }
629
630
631 /* FindDQSDatDimmVal_D is not required since we use an array */
632
633
634 static u8 MiddleDQS_D(u8 min, u8 max)
635 {
636         u8 size;
637         size = max-min;
638         if (size % 2)
639                 size++;         // round up if the size isn't even.
640         return ( min + (size >> 1));
641 }
642
643
644 static void TrainReadDQS_D(struct MCTStatStruc *pMCTstat,
645                                 struct DCTStatStruc *pDCTstat,
646                                 u8 cs_start)
647 {
648         print_debug_dqs("\t\tTrainReadPos ", 0, 2);
649         pDCTstat->Direction = DQS_READDIR;
650         TrainDQSPos_D(pMCTstat, pDCTstat, cs_start);
651 }
652
653
654 static void TrainWriteDQS_D(struct MCTStatStruc *pMCTstat,
655                                 struct DCTStatStruc *pDCTstat,
656                                 u8 cs_start)
657 {
658         pDCTstat->Direction = DQS_WRITEDIR;
659         print_debug_dqs("\t\tTrainWritePos", 0, 2);
660         TrainDQSPos_D(pMCTstat, pDCTstat, cs_start);
661 }
662
663
664 static void proc_IOCLFLUSH_D(u32 addr_hi)
665 {
666         SetTargetWTIO_D(addr_hi);
667         proc_CLFLUSH(addr_hi);
668         ResetTargetWTIO_D();
669 }
670
671
672 static u8 ChipSelPresent_D(struct MCTStatStruc *pMCTstat,
673                                 struct DCTStatStruc *pDCTstat,
674                                 u8 Channel, u8 ChipSel)
675 {
676         u32 val;
677         u32 reg;
678         u32 dev = pDCTstat->dev_dct;
679         u32 reg_off;
680         u8 ret = 0;
681
682         if (!pDCTstat->GangedMode) {
683                 reg_off = 0x100 * Channel;
684         } else {
685                 reg_off = 0;
686         }
687
688         if (ChipSel < MAX_CS_SUPPORTED){
689                 reg = 0x40 + (ChipSel << 2) + reg_off;
690                 val = Get_NB32(dev, reg);
691                 if (val & ( 1 << 0))
692                         ret = 1;
693         }
694
695         return ret;
696 }
697
698
699 /* proc_CLFLUSH_D located in mct_gcc.h */
700
701
702 static void WriteDQSTestPattern_D(struct MCTStatStruc *pMCTstat,
703                                         struct DCTStatStruc *pDCTstat,
704                                         u32 TestAddr_lo)
705 {
706         /* Write a pattern of 72 bit times (per DQ), to test dram functionality.
707          * The pattern is a stress pattern which exercises both ISI and
708          * crosstalk.  The number of cache lines to fill is dependent on DCT
709          * width mode and burstlength.
710          * Mode BL  Lines Pattern no.
711          * ----+---+-------------------
712          * 64   4         9     0
713          * 64   8         9     0
714          * 64M  4         9     0
715          * 64M  8         9     0
716          * 128  4         18    1
717          * 128  8         N/A   -
718          */
719
720         if (pDCTstat->Pattern == 0)
721                 WriteL9TestPattern_D(pDCTstat, TestAddr_lo);
722         else
723                 WriteL18TestPattern_D(pDCTstat, TestAddr_lo);
724 }
725
726
727 static void WriteL18TestPattern_D(struct DCTStatStruc *pDCTstat,
728                                         u32 TestAddr_lo)
729 {
730         u8 *buf;
731
732         buf = (u8 *)pDCTstat->PtrPatternBufA;
733         WriteLNTestPattern(TestAddr_lo, buf, 18);
734
735 }
736
737
738 static void WriteL9TestPattern_D(struct DCTStatStruc *pDCTstat,
739                                         u32 TestAddr_lo)
740 {
741         u8 *buf;
742
743         buf = (u8 *)pDCTstat->PtrPatternBufA;
744         WriteLNTestPattern(TestAddr_lo, buf, 9);
745 }
746
747
748
749 static u8 CompareDQSTestPattern_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u32 addr_lo)
750 {
751         /* Compare a pattern of 72 bit times (per DQ), to test dram functionality.
752          * The pattern is a stress pattern which exercises both ISI and
753          * crosstalk.  The number of cache lines to fill is dependent on DCT
754          * width mode and burstlength.
755          * Mode BL  Lines Pattern no.
756          * ----+---+-------------------
757          * 64   4         9     0
758          * 64   8         9     0
759          * 64M  4         9     0
760          * 64M  8         9     0
761          * 128  4         18    1
762          * 128  8         N/A   -
763          */
764
765         u32 *test_buf;
766         u8 bitmap;
767         u8 bytelane;
768         u8 i;
769         u32 value;
770         u8 j;
771         u32 value_test;
772         u8 pattern, channel;
773
774         pattern = pDCTstat->Pattern;
775         channel = pDCTstat->Channel;
776         test_buf = (u32 *)pDCTstat->PtrPatternBufA;
777
778         if (pattern && channel) {
779                 addr_lo += 8; //second channel
780                 test_buf += 2;
781         }
782
783         bytelane = 0;           /* bytelane counter */
784         bitmap = 0xFF;          /* bytelane test bitmap, 1=pass */
785         for (i=0; i < (9 * 64 / 4); i++) { /* sizeof testpattern. /4 due to next loop */
786                 value = read32_fs(addr_lo);
787                 value_test = *test_buf;
788
789                 print_debug_dqs_pair("\t\t\t\t\t\ttest_buf = ", (u32)test_buf, " value = ", value_test, 7);
790                 print_debug_dqs_pair("\t\t\t\t\t\ttaddr_lo = ", addr_lo, " value = ", value, 7);
791
792                 for (j = 0; j < (4 * 8); j += 8) { /* go through a 32bit data, on 1 byte step. */
793                         if (((value >> j) & 0xff) != ((value_test >> j) & 0xff)) {
794                                 bitmap &= ~(1 << bytelane);
795                         }
796
797                         bytelane++;
798                         bytelane &= 0x7;
799                 }
800
801                 print_debug_dqs("\t\t\t\t\t\tbitmap = ", bitmap, 7);
802
803                 if (!bitmap)
804                         break;
805
806                 if (bytelane == 0){
807                         if (pattern == 1) { //dual channel
808                                 addr_lo += 8; //skip over other channel's data
809                                 test_buf += 2;
810                         }
811                 }
812                 addr_lo += 4;
813                 test_buf += 1;
814         }
815
816         return bitmap;
817 }
818
819
820 static void FlushDQSTestPattern_D(struct DCTStatStruc *pDCTstat,
821                                         u32 addr_lo)
822 {
823         /* Flush functions in mct_gcc.h */
824         if (pDCTstat->Pattern == 0){
825                 FlushDQSTestPattern_L9(addr_lo);
826         } else {
827                 FlushDQSTestPattern_L18(addr_lo);
828         }
829 }
830
831 static void SetTargetWTIO_D(u32 TestAddr)
832 {
833         u32 lo, hi;
834         hi = TestAddr >> 24;
835         lo = TestAddr << 8;
836         _WRMSR(0xC0010016, lo, hi);             /* IORR0 Base */
837         hi = 0xFF;
838         lo = 0xFC000800;                        /* 64MB Mask */
839         _WRMSR(0xC0010017, lo, hi);             /* IORR0 Mask */
840 }
841
842
843 static void ResetTargetWTIO_D(void)
844 {
845         u32 lo, hi;
846
847         hi = 0;
848         lo = 0;
849         _WRMSR(0xc0010017, lo, hi); // IORR0 Mask
850 }
851
852
853 static void ReadDQSTestPattern_D(struct MCTStatStruc *pMCTstat,
854                                 struct DCTStatStruc *pDCTstat,
855                                 u32 TestAddr_lo)
856 {
857         /* Read a pattern of 72 bit times (per DQ), to test dram functionality.
858          * The pattern is a stress pattern which exercises both ISI and
859          * crosstalk.  The number of cache lines to fill is dependent on DCT
860          * width mode and burstlength.
861          * Mode BL  Lines Pattern no.
862          * ----+---+-------------------
863          * 64   4         9     0
864          * 64   8         9     0
865          * 64M  4         9     0
866          * 64M  8         9     0
867          * 128  4         18    1
868          * 128  8         N/A   -
869          */
870         if (pDCTstat->Pattern == 0)
871                 ReadL9TestPattern(TestAddr_lo);
872         else
873                 ReadL18TestPattern(TestAddr_lo);
874         _MFENCE;
875 }
876
877
878 u32 SetUpperFSbase(u32 addr_hi)
879 {
880         /* Set the upper 32-bits of the Base address, 4GB aligned) for the
881          * FS selector.
882          */
883
884         u32 lo, hi;
885         u32 addr;
886         lo = 0;
887         hi = addr_hi>>24;
888         addr = FS_Base;
889         _WRMSR(addr, lo, hi);
890         return addr_hi << 8;
891 }
892
893
894 void ResetDCTWrPtr_D(u32 dev, u32 index_reg, u32 index)
895 {
896         u32 val;
897
898         val = Get_NB32_index_wait(dev, index_reg, index);
899         Set_NB32_index_wait(dev, index_reg, index, val);
900 }
901
902
903 /* mctEngDQSwindow_Save_D not required with arrays */
904
905
906 void mct_TrainDQSPos_D(struct MCTStatStruc *pMCTstat,
907                         struct DCTStatStruc *pDCTstatA)
908 {
909         u8 Node;
910         u8 ChipSel;
911         struct DCTStatStruc *pDCTstat;
912
913         for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
914                 pDCTstat = pDCTstatA + Node;
915                 if (pDCTstat->DCTSysLimit) {
916                         /* when DCT speed >= 400MHz, we only support 2 DIMMs
917                          * and we have two sets registers for DIMM0 and DIMM1 so
918                          * here we must traning DQSRd/WrPos for DIMM0 and DIMM1
919                          */
920                         if (pDCTstat->Speed >= 4) {
921                                 pDCTstat->Status |= (1 << SB_Over400MHz);
922                         }
923                         for (ChipSel = 0; ChipSel < MAX_CS_SUPPORTED; ChipSel += 2) {
924                                 TrainDQSRdWrPos_D(pMCTstat, pDCTstat, ChipSel);
925                                 SetEccDQSRdWrPos_D(pMCTstat, pDCTstat, ChipSel);
926                         }
927                 }
928         }
929 }
930
931
932 /* mct_BeforeTrainDQSRdWrPos_D
933  * Function is inline.
934  */
935
936 u8 mct_DisableDimmEccEn_D(struct MCTStatStruc *pMCTstat,
937                                 struct DCTStatStruc *pDCTstat)
938 {
939         u8 _DisableDramECC = 0;
940         u32 val;
941         u32 reg;
942         u32 dev;
943
944         /*Disable ECC correction of reads on the dram bus. */
945
946         dev = pDCTstat->dev_dct;
947         reg = 0x90;
948         val = Get_NB32(dev, reg);
949         if (val & (1<<DimmEcEn)) {
950                 _DisableDramECC |= 0x01;
951                 val &= ~(1<<DimmEcEn);
952                 Set_NB32(dev, reg, val);
953         }
954         if (!pDCTstat->GangedMode) {
955                 reg = 0x190;
956                 val = Get_NB32(dev, reg);
957                 if (val & (1<<DimmEcEn)) {
958                         _DisableDramECC |= 0x02;
959                         val &= ~(1<<DimmEcEn);
960                         Set_NB32(dev, reg, val);
961                 }
962         }
963         return _DisableDramECC;
964 }
965
966
967
968 void mct_EnableDimmEccEn_D(struct MCTStatStruc *pMCTstat,
969                                 struct DCTStatStruc *pDCTstat, u8 _DisableDramECC)
970 {
971
972         u32 val;
973         u32 reg;
974         u32 dev;
975
976         /* Enable ECC correction if it was previously disabled */
977
978         dev = pDCTstat->dev_dct;
979
980         if ((_DisableDramECC & 0x01) == 0x01) {
981                 reg = 0x90;
982                 val = Get_NB32(dev, reg);
983                 val |= (1<<DimmEcEn);
984                 Set_NB32(dev, reg, val);
985         }
986         if ((_DisableDramECC & 0x02) == 0x02) {
987                 reg = 0x190;
988                 val = Get_NB32(dev, reg);
989                 val |= (1<<DimmEcEn);
990                 Set_NB32(dev, reg, val);
991         }
992 }
993
994
995 static void mct_SetDQSDelayCSR_D(struct MCTStatStruc *pMCTstat,
996                                         struct DCTStatStruc *pDCTstat, u8 ChipSel)
997 {
998         u8 ByteLane;
999         u32 val;
1000         u32 index_reg = 0x98 + 0x100 * pDCTstat->Channel;
1001         u8 shift;
1002         u32 dqs_delay = (u32)pDCTstat->DQSDelay;
1003         u32 dev = pDCTstat->dev_dct;
1004         u32 index;
1005
1006         ByteLane = pDCTstat->ByteLane;
1007
1008         /* Channel is offset */
1009         if (ByteLane < 4) {
1010                 index = 1;
1011         } else if (ByteLane <8) {
1012                 index = 2;
1013         } else {
1014                 index = 3;
1015         }
1016
1017         if (pDCTstat->Direction == DQS_READDIR) {
1018                 index += 4;
1019         }
1020
1021         /* get the proper register index */
1022         shift = ByteLane % 4;
1023         shift <<= 3; /* get bit position of bytelane, 8 bit */
1024
1025         if (pDCTstat->Status & (1 << SB_Over400MHz)) {
1026                 index += (ChipSel >> 1) * 0x100;        /* if logical DIMM1/DIMM3 */
1027         }
1028
1029         val = Get_NB32_index_wait(dev, index_reg, index);
1030         val &= ~(0x7f << shift);
1031         val |= (dqs_delay << shift);
1032         Set_NB32_index_wait(dev, index_reg, index, val);
1033 }
1034
1035
1036 static void mct_SetDQSDelayAllCSR_D(struct MCTStatStruc *pMCTstat,
1037                                         struct DCTStatStruc *pDCTstat,
1038                                         u8 cs_start)
1039 {
1040         u8 ByteLane;
1041         u8 ChipSel = cs_start;
1042
1043
1044         for (ChipSel = cs_start; ChipSel < (cs_start + 2); ChipSel++) {
1045                 if ( mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, pDCTstat->Channel, ChipSel)) {
1046                         for (ByteLane = 0; ByteLane < 8; ByteLane++) {
1047                                 pDCTstat->ByteLane = ByteLane;
1048                                 mct_SetDQSDelayCSR_D(pMCTstat, pDCTstat, ChipSel);
1049                         }
1050                 }
1051         }
1052 }
1053
1054
1055 u8 mct_RcvrRankEnabled_D(struct MCTStatStruc *pMCTstat,
1056                                 struct DCTStatStruc *pDCTstat,
1057                                 u8 Channel, u8 ChipSel)
1058 {
1059         u8 ret;
1060
1061         ret = ChipSelPresent_D(pMCTstat, pDCTstat, Channel, ChipSel);
1062         return ret;
1063 }
1064
1065
1066 u32 mct_GetRcvrSysAddr_D(struct MCTStatStruc *pMCTstat,
1067                                 struct DCTStatStruc *pDCTstat,
1068                                 u8 channel, u8 receiver, u8 *valid)
1069 {
1070         return mct_GetMCTSysAddr_D(pMCTstat, pDCTstat, channel, receiver, valid);
1071 }
1072
1073
1074 u32 mct_GetMCTSysAddr_D(struct MCTStatStruc *pMCTstat,
1075                                 struct DCTStatStruc *pDCTstat,
1076                                 u8 Channel, u8 receiver, u8 *valid)
1077 {
1078         u32 val;
1079         u32 reg_off = 0;
1080         u32 reg;
1081         u32 dword;
1082         u32 dev = pDCTstat->dev_dct;
1083
1084         *valid = 0;
1085
1086
1087         if (!pDCTstat->GangedMode) {
1088                 reg_off = 0x100 * Channel;
1089         }
1090
1091         /* get the local base addr of the chipselect */
1092         reg = 0x40 + (receiver << 2) + reg_off;
1093         val = Get_NB32(dev, reg);
1094
1095         val &= ~0x0F;
1096
1097         /* unganged mode DCT0+DCT1, sys addr of DCT1=node
1098          * base+DctSelBaseAddr+local ca base*/
1099         if ((Channel) && (pDCTstat->GangedMode == 0) && ( pDCTstat->DIMMValidDCT[0] > 0)) {
1100                 reg = 0x110;
1101                 dword = Get_NB32(dev, reg);
1102                 dword &= 0xfffff800;
1103                 dword <<= 8;    /* scale [47:27] of F2x110[31:11] to [39:8]*/
1104                 val += dword;
1105
1106                 /* if DCTSelBaseAddr < Hole, and eax > HoleBase, then add Hole size to test address */
1107                 if ((val >= pDCTstat->DCTHoleBase) && (pDCTstat->DCTHoleBase > dword)) {
1108                         dword = (~(pDCTstat->DCTHoleBase >> (24 - 8)) + 1) & 0xFF;
1109                         dword <<= (24 - 8);
1110                         val += dword;
1111                 }
1112         } else {
1113                 /* sys addr=node base+local cs base */
1114                 val += pDCTstat->DCTSysBase;
1115
1116                 /* New stuff */
1117                 if (pDCTstat->DCTHoleBase && (val >= pDCTstat->DCTHoleBase)) {
1118                         val -= pDCTstat->DCTSysBase;
1119                         dword = Get_NB32(pDCTstat->dev_map, 0xF0); /* get Hole Offset */
1120                         val += (dword & 0x0000ff00) << (24-8-8);
1121                 }
1122         }
1123
1124         /* New stuff */
1125         val += ((1 << 21) >> 8);        /* Add 2MB offset to avoid compat area */
1126         if (val >= MCT_TRNG_KEEPOUT_START) {
1127                 while(val < MCT_TRNG_KEEPOUT_END)
1128                         val += (1 << (15-8));   /* add 32K */
1129         }
1130
1131         /* Add a node seed */
1132         val += (((1 * pDCTstat->Node_ID) << 20) >> 8);  /* Add 1MB per node to avoid aliases */
1133
1134         /* HW remap disabled? */
1135         if (!(pDCTstat->Status & (1 << SB_HWHole))) {
1136                 if (!(pDCTstat->Status & (1 << SB_SWNodeHole))) {
1137                         /* SW memhole disabled */
1138                         u32 lo, hi;
1139                         _RDMSR(TOP_MEM, &lo, &hi);
1140                         lo >>= 8;
1141                         if ((val >= lo) && (val < _4GB_RJ8)) {
1142                                 val = 0;
1143                                 *valid = 0;
1144                                 goto exitGetAddr;
1145                         } else {
1146                                 *valid = 1;
1147                                 goto exitGetAddrWNoError;
1148                         }
1149                 } else {
1150                         *valid = 1;
1151                         goto exitGetAddrWNoError;
1152                 }
1153         } else {
1154                 *valid = 1;
1155                 goto exitGetAddrWNoError;
1156         }
1157
1158 exitGetAddrWNoError:
1159
1160         /* Skip if Address is in UMA region */
1161         dword = pMCTstat->Sub4GCacheTop;
1162         dword >>= 8;
1163         if (dword != 0) {
1164                 if ((val >= dword) && (val < _4GB_RJ8)) {
1165                         val = 0;
1166                         *valid = 0;
1167                 } else {
1168                         *valid = 1;
1169                 }
1170         }
1171         print_debug_dqs("mct_GetMCTSysAddr_D: receiver ", receiver, 2);
1172         print_debug_dqs("mct_GetMCTSysAddr_D: Channel ", Channel, 2);
1173         print_debug_dqs("mct_GetMCTSysAddr_D: base_addr ", val, 2);
1174         print_debug_dqs("mct_GetMCTSysAddr_D: valid ", *valid, 2);
1175         print_debug_dqs("mct_GetMCTSysAddr_D: status ", pDCTstat->Status, 2);
1176         print_debug_dqs("mct_GetMCTSysAddr_D: HoleBase ", pDCTstat->DCTHoleBase, 2);
1177         print_debug_dqs("mct_GetMCTSysAddr_D: Cachetop ", pMCTstat->Sub4GCacheTop, 2);
1178
1179 exitGetAddr:
1180         return val;
1181 }
1182
1183
1184 static void mct_Write1LTestPattern_D(struct MCTStatStruc *pMCTstat,
1185                                 struct DCTStatStruc *pDCTstat,
1186                                 u32 TestAddr, u8 pattern)
1187 {
1188
1189         u8 *buf;
1190
1191         /* Issue the stream of writes. When F2x11C[MctWrLimit] is reached
1192          * (or when F2x11C[FlushWr] is set again), all the writes are written
1193          * to DRAM.
1194          */
1195
1196         SetUpperFSbase(TestAddr);
1197
1198         if (pattern)
1199                 buf = (u8 *)pDCTstat->PtrPatternBufB;
1200         else
1201                 buf = (u8 *)pDCTstat->PtrPatternBufA;
1202
1203         WriteLNTestPattern(TestAddr << 8, buf, 1);
1204 }
1205
1206
1207 void mct_Read1LTestPattern_D(struct MCTStatStruc *pMCTstat,
1208                                 struct DCTStatStruc *pDCTstat, u32 addr)
1209 {
1210         u32 value;
1211
1212         /* BIOS issues the remaining (Ntrain - 2) reads after checking that
1213          * F2x11C[PrefDramTrainMode] is cleared. These reads must be to
1214          * consecutive cache lines (i.e., 64 bytes apart) and must not cross
1215          * a naturally aligned 4KB boundary. These reads hit the prefetches and
1216          * read the data from the prefetch buffer.
1217          */
1218
1219         /* get data from DIMM */
1220         SetUpperFSbase(addr);
1221
1222         /* 1st move causes read fill (to exclusive or shared)*/
1223         value = read32_fs(addr << 8);
1224 }