Please bear with me - another rename checkin. This qualifies as trivial, no
[coreboot.git] / src / northbridge / amd / amdmct / mct / mctdqs_d.c
1 /*
2  * This file is part of the coreboot project.
3  *
4  * Copyright (C) 2007 Advanced Micro Devices, Inc.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; version 2 of the License.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
18  */
19
20
21 static void CalcEccDQSPos_D(struct MCTStatStruc *pMCTstat,
22                                 struct DCTStatStruc *pDCTstat, u16 like,
23                                 u8 scale, u8 ChipSel);
24 static void GetDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat,
25                                 struct DCTStatStruc *pDCTstat, u8 ChipSel);
26 static u8 MiddleDQS_D(u8 min, u8 max);
27 static void TrainReadDQS_D(struct MCTStatStruc *pMCTstat,
28                                 struct DCTStatStruc *pDCTstat,
29                                 u8 cs_start);
30 static void TrainWriteDQS_D(struct MCTStatStruc *pMCTstat,
31                                 struct DCTStatStruc *pDCTstat,
32                                 u8 cs_start);
33 static void WriteDQSTestPattern_D(struct MCTStatStruc *pMCTstat,
34                                         struct DCTStatStruc *pDCTstat,
35                                         u32 TestAddr_lo);
36 static void WriteL18TestPattern_D(struct DCTStatStruc *pDCTstat,
37                                         u32 TestAddr_lo);
38 static void WriteL9TestPattern_D(struct DCTStatStruc *pDCTstat,
39                                         u32 TestAddr_lo);
40 static u8 CompareDQSTestPattern_D(struct MCTStatStruc *pMCTstat,
41                                         struct DCTStatStruc *pDCTstat,
42                                         u32 addr_lo);
43 static void FlushDQSTestPattern_D(struct DCTStatStruc *pDCTstat,
44                                         u32 addr_lo);
45 static void SetTargetWTIO_D(u32 TestAddr);
46 static void ResetTargetWTIO_D(void);
47 static void ReadDQSTestPattern_D(struct MCTStatStruc *pMCTstat,
48                                         struct DCTStatStruc *pDCTstat,
49                                         u32 TestAddr_lo);
50 void ResetDCTWrPtr_D(u32 dev, u32 index_reg, u32 index);
51 u8 mct_DisableDimmEccEn_D(struct MCTStatStruc *pMCTstat,
52                                 struct DCTStatStruc *pDCTstat);
53 static void mct_SetDQSDelayCSR_D(struct MCTStatStruc *pMCTstat,
54                                         struct DCTStatStruc *pDCTstat,
55                                         u8 ChipSel);
56 static void mct_SetDQSDelayAllCSR_D(struct MCTStatStruc *pMCTstat,
57                                         struct DCTStatStruc *pDCTstat,
58                                         u8 cs_start);
59 u32 mct_GetMCTSysAddr_D(struct MCTStatStruc *pMCTstat,
60                                 struct DCTStatStruc *pDCTstat, u8 Channel,
61                                 u8 receiver, u8 *valid);
62 static void SetupDqsPattern_D(struct MCTStatStruc *pMCTstat,
63                                 struct DCTStatStruc *pDCTstat,
64                                 u32 *buffer);
65
66 #define DQS_TRAIN_DEBUG 0
67
68 static inline void print_debug_dqs(const char *str, u32 val, u8 level)
69 {
70 #if DQS_TRAIN_DEBUG > 0
71         if (DQS_TRAIN_DEBUG >= level) {
72                 printk_debug("%s%x\n", str, val);
73         }
74 #endif
75 }
76
77 static inline void print_debug_dqs_pair(const char *str, u32 val, const char *str2, u32 val2, u8 level)
78 {
79 #if DQS_TRAIN_DEBUG > 0
80         if (DQS_TRAIN_DEBUG >= level) {
81                 printk_debug("%s%08x%s%08x\n", str, val, str2, val2);
82         }
83 #endif
84 }
85
86 /*Warning:  These must be located so they do not cross a logical 16-bit segment boundary!*/
87 const static u32 TestPatternJD1a_D[] = {
88         0x00000000,0x00000000,0xFFFFFFFF,0xFFFFFFFF, /* QW0-1, ALL-EVEN */
89         0x00000000,0x00000000,0x00000000,0x00000000, /* QW2-3, ALL-EVEN */
90         0x00000000,0x00000000,0xFFFFFFFF,0xFFFFFFFF, /* QW4-5, ALL-EVEN */
91         0x00000000,0x00000000,0x00000000,0x00000000, /* QW6-7, ALL-EVEN */
92         0xFeFeFeFe,0xFeFeFeFe,0x01010101,0x01010101, /* QW0-1, DQ0-ODD */
93         0xFeFeFeFe,0xFeFeFeFe,0x01010101,0x01010101, /* QW2-3, DQ0-ODD */
94         0x01010101,0x01010101,0xFeFeFeFe,0xFeFeFeFe, /* QW4-5, DQ0-ODD */
95         0xFeFeFeFe,0xFeFeFeFe,0x01010101,0x01010101, /* QW6-7, DQ0-ODD */
96         0x02020202,0x02020202,0x02020202,0x02020202, /* QW0-1, DQ1-ODD */
97         0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, /* QW2-3, DQ1-ODD */
98         0xFdFdFdFd,0xFdFdFdFd,0x02020202,0x02020202, /* QW4-5, DQ1-ODD */
99         0x02020202,0x02020202,0x02020202,0x02020202, /* QW6-7, DQ1-ODD */
100         0x04040404,0x04040404,0xfBfBfBfB,0xfBfBfBfB, /* QW0-1, DQ2-ODD */
101         0x04040404,0x04040404,0x04040404,0x04040404, /* QW2-3, DQ2-ODD */
102         0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, /* QW4-5, DQ2-ODD */
103         0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, /* QW6-7, DQ2-ODD */
104         0x08080808,0x08080808,0xF7F7F7F7,0xF7F7F7F7, /* QW0-1, DQ3-ODD */
105         0x08080808,0x08080808,0x08080808,0x08080808, /* QW2-3, DQ3-ODD */
106         0xF7F7F7F7,0xF7F7F7F7,0x08080808,0x08080808, /* QW4-5, DQ3-ODD */
107         0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, /* QW6-7, DQ3-ODD */
108         0x10101010,0x10101010,0x10101010,0x10101010, /* QW0-1, DQ4-ODD */
109         0xeFeFeFeF,0xeFeFeFeF,0x10101010,0x10101010, /* QW2-3, DQ4-ODD */
110         0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, /* QW4-5, DQ4-ODD */
111         0xeFeFeFeF,0xeFeFeFeF,0x10101010,0x10101010, /* QW6-7, DQ4-ODD */
112         0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, /* QW0-1, DQ5-ODD */
113         0xdFdFdFdF,0xdFdFdFdF,0x20202020,0x20202020, /* QW2-3, DQ5-ODD */
114         0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, /* QW4-5, DQ5-ODD */
115         0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, /* QW6-7, DQ5-ODD */
116         0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, /* QW0-1, DQ6-ODD */
117         0x40404040,0x40404040,0xBfBfBfBf,0xBfBfBfBf, /* QW2-3, DQ6-ODD */
118         0x40404040,0x40404040,0xBfBfBfBf,0xBfBfBfBf, /* QW4-5, DQ6-ODD */
119         0x40404040,0x40404040,0xBfBfBfBf,0xBfBfBfBf, /* QW6-7, DQ6-ODD */
120         0x80808080,0x80808080,0x7F7F7F7F,0x7F7F7F7F, /* QW0-1, DQ7-ODD */
121         0x80808080,0x80808080,0x7F7F7F7F,0x7F7F7F7F, /* QW2-3, DQ7-ODD */
122         0x80808080,0x80808080,0x7F7F7F7F,0x7F7F7F7F, /* QW4-5, DQ7-ODD */
123         0x80808080,0x80808080,0x80808080,0x80808080  /* QW6-7, DQ7-ODD */
124 };
125 const static u32 TestPatternJD1b_D[] = {
126         0x00000000,0x00000000,0x00000000,0x00000000, /* QW0,CHA-B, ALL-EVEN */
127         0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, /* QW1,CHA-B, ALL-EVEN */
128         0x00000000,0x00000000,0x00000000,0x00000000, /* QW2,CHA-B, ALL-EVEN */
129         0x00000000,0x00000000,0x00000000,0x00000000, /* QW3,CHA-B, ALL-EVEN */
130         0x00000000,0x00000000,0x00000000,0x00000000, /* QW4,CHA-B, ALL-EVEN */
131         0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, /* QW5,CHA-B, ALL-EVEN */
132         0x00000000,0x00000000,0x00000000,0x00000000, /* QW6,CHA-B, ALL-EVEN */
133         0x00000000,0x00000000,0x00000000,0x00000000, /* QW7,CHA-B, ALL-EVEN */
134         0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, /* QW0,CHA-B, DQ0-ODD */
135         0x01010101,0x01010101,0x01010101,0x01010101, /* QW1,CHA-B, DQ0-ODD */
136         0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, /* QW2,CHA-B, DQ0-ODD */
137         0x01010101,0x01010101,0x01010101,0x01010101, /* QW3,CHA-B, DQ0-ODD */
138         0x01010101,0x01010101,0x01010101,0x01010101, /* QW4,CHA-B, DQ0-ODD */
139         0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, /* QW5,CHA-B, DQ0-ODD */
140         0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, /* QW6,CHA-B, DQ0-ODD */
141         0x01010101,0x01010101,0x01010101,0x01010101, /* QW7,CHA-B, DQ0-ODD */
142         0x02020202,0x02020202,0x02020202,0x02020202, /* QW0,CHA-B, DQ1-ODD */
143         0x02020202,0x02020202,0x02020202,0x02020202, /* QW1,CHA-B, DQ1-ODD */
144         0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, /* QW2,CHA-B, DQ1-ODD */
145         0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, /* QW3,CHA-B, DQ1-ODD */
146         0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, /* QW4,CHA-B, DQ1-ODD */
147         0x02020202,0x02020202,0x02020202,0x02020202, /* QW5,CHA-B, DQ1-ODD */
148         0x02020202,0x02020202,0x02020202,0x02020202, /* QW6,CHA-B, DQ1-ODD */
149         0x02020202,0x02020202,0x02020202,0x02020202, /* QW7,CHA-B, DQ1-ODD */
150         0x04040404,0x04040404,0x04040404,0x04040404, /* QW0,CHA-B, DQ2-ODD */
151         0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, /* QW1,CHA-B, DQ2-ODD */
152         0x04040404,0x04040404,0x04040404,0x04040404, /* QW2,CHA-B, DQ2-ODD */
153         0x04040404,0x04040404,0x04040404,0x04040404, /* QW3,CHA-B, DQ2-ODD */
154         0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, /* QW4,CHA-B, DQ2-ODD */
155         0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, /* QW5,CHA-B, DQ2-ODD */
156         0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, /* QW6,CHA-B, DQ2-ODD */
157         0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, /* QW7,CHA-B, DQ2-ODD */
158         0x08080808,0x08080808,0x08080808,0x08080808, /* QW0,CHA-B, DQ3-ODD */
159         0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, /* QW1,CHA-B, DQ3-ODD */
160         0x08080808,0x08080808,0x08080808,0x08080808, /* QW2,CHA-B, DQ3-ODD */
161         0x08080808,0x08080808,0x08080808,0x08080808, /* QW3,CHA-B, DQ3-ODD */
162         0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, /* QW4,CHA-B, DQ3-ODD */
163         0x08080808,0x08080808,0x08080808,0x08080808, /* QW5,CHA-B, DQ3-ODD */
164         0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, /* QW6,CHA-B, DQ3-ODD */
165         0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, /* QW7,CHA-B, DQ3-ODD */
166         0x10101010,0x10101010,0x10101010,0x10101010, /* QW0,CHA-B, DQ4-ODD */
167         0x10101010,0x10101010,0x10101010,0x10101010, /* QW1,CHA-B, DQ4-ODD */
168         0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, /* QW2,CHA-B, DQ4-ODD */
169         0x10101010,0x10101010,0x10101010,0x10101010, /* QW3,CHA-B, DQ4-ODD */
170         0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, /* QW4,CHA-B, DQ4-ODD */
171         0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, /* QW5,CHA-B, DQ4-ODD */
172         0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, /* QW6,CHA-B, DQ4-ODD */
173         0x10101010,0x10101010,0x10101010,0x10101010, /* QW7,CHA-B, DQ4-ODD */
174         0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, /* QW0,CHA-B, DQ5-ODD */
175         0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, /* QW1,CHA-B, DQ5-ODD */
176         0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, /* QW2,CHA-B, DQ5-ODD */
177         0x20202020,0x20202020,0x20202020,0x20202020, /* QW3,CHA-B, DQ5-ODD */
178         0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, /* QW4,CHA-B, DQ5-ODD */
179         0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, /* QW5,CHA-B, DQ5-ODD */
180         0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, /* QW6,CHA-B, DQ5-ODD */
181         0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, /* QW7,CHA-B, DQ5-ODD */
182         0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, /* QW0,CHA-B, DQ6-ODD */
183         0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, /* QW1,CHA-B, DQ6-ODD */
184         0x40404040,0x40404040,0x40404040,0x40404040, /* QW2,CHA-B, DQ6-ODD */
185         0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, /* QW3,CHA-B, DQ6-ODD */
186         0x40404040,0x40404040,0x40404040,0x40404040, /* QW4,CHA-B, DQ6-ODD */
187         0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, /* QW5,CHA-B, DQ6-ODD */
188         0x40404040,0x40404040,0x40404040,0x40404040, /* QW6,CHA-B, DQ6-ODD */
189         0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, /* QW7,CHA-B, DQ6-ODD */
190         0x80808080,0x80808080,0x80808080,0x80808080, /* QW0,CHA-B, DQ7-ODD */
191         0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F, /* QW1,CHA-B, DQ7-ODD */
192         0x80808080,0x80808080,0x80808080,0x80808080, /* QW2,CHA-B, DQ7-ODD */
193         0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F, /* QW3,CHA-B, DQ7-ODD */
194         0x80808080,0x80808080,0x80808080,0x80808080, /* QW4,CHA-B, DQ7-ODD */
195         0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F, /* QW5,CHA-B, DQ7-ODD */
196         0x80808080,0x80808080,0x80808080,0x80808080, /* QW6,CHA-B, DQ7-ODD */
197         0x80808080,0x80808080,0x80808080,0x80808080  /* QW7,CHA-B, DQ7-ODD */
198 };
199
200 const u8 Table_DQSRcvEn_Offset[] = {0x00,0x01,0x10,0x11};
201
202
203 void TrainReceiverEn_D(struct MCTStatStruc *pMCTstat,
204                         struct DCTStatStruc *pDCTstatA, u8 Pass)
205 {
206         u8 Node;
207         struct DCTStatStruc *pDCTstat;
208
209         for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
210                 pDCTstat = pDCTstatA + Node;
211
212 /*FIXME: needed?                if (!pDCTstat->NodePresent)
213                         break;
214 */
215                 if (pDCTstat->DCTSysLimit) {
216                         mct_TrainRcvrEn_D(pMCTstat, pDCTstat, Pass);
217                 }
218         }
219 }
220
221
222 static void SetEccDQSRdWrPos_D(struct MCTStatStruc *pMCTstat,
223                                 struct DCTStatStruc *pDCTstat, u8 ChipSel)
224 {
225         u8 channel;
226         u8 direction;
227
228         for (channel = 0; channel < 2; channel++){
229                 for (direction = 0; direction < 2; direction++) {
230                         pDCTstat->Channel = channel;    /* Channel A or B */
231                         pDCTstat->Direction = direction; /* Read or write */
232                         CalcEccDQSPos_D(pMCTstat, pDCTstat, pDCTstat->CH_EccDQSLike[channel], pDCTstat->CH_EccDQSScale[channel], ChipSel);
233                         print_debug_dqs_pair("\t\tSetEccDQSRdWrPos: channel ", channel, direction==DQS_READDIR? " R dqs_delay":" W dqs_delay",  pDCTstat->DQSDelay, 2);
234                         pDCTstat->ByteLane = 8;
235                         StoreDQSDatStrucVal_D(pMCTstat, pDCTstat, ChipSel);
236                         mct_SetDQSDelayCSR_D(pMCTstat, pDCTstat, ChipSel);
237                 }
238         }
239 }
240
241
242
243 static void CalcEccDQSPos_D(struct MCTStatStruc *pMCTstat,
244                                 struct DCTStatStruc *pDCTstat,
245                                 u16 like, u8 scale, u8 ChipSel)
246 {
247         u8 DQSDelay0, DQSDelay1;
248         u16 DQSDelay;
249
250         pDCTstat->ByteLane = like & 0xff;
251         GetDQSDatStrucVal_D(pMCTstat, pDCTstat, ChipSel);
252         DQSDelay0 = pDCTstat->DQSDelay;
253
254         pDCTstat->ByteLane = (like >> 8) & 0xff;
255         GetDQSDatStrucVal_D(pMCTstat, pDCTstat, ChipSel);
256         DQSDelay1 = pDCTstat->DQSDelay;
257
258         if (DQSDelay0>DQSDelay1) {
259                 DQSDelay = DQSDelay0 - DQSDelay1;
260         } else {
261                 DQSDelay = DQSDelay1 - DQSDelay0;
262         }
263
264         DQSDelay = DQSDelay * (~scale);
265
266         DQSDelay += 0x80;       // round it
267
268         DQSDelay >>= 8;         // /256
269
270         if (DQSDelay0>DQSDelay1) {
271                 DQSDelay = DQSDelay1 - DQSDelay;
272         } else {
273                 DQSDelay += DQSDelay1;
274         }
275
276         pDCTstat->DQSDelay = (u8)DQSDelay;
277 }
278
279
280 static void TrainDQSRdWrPos_D(struct MCTStatStruc *pMCTstat,
281                                 struct DCTStatStruc *pDCTstat,
282                                 u8 cs_start)
283 {
284         u32 Errors;
285         u8 Channel, DQSWrDelay;
286         u8 _DisableDramECC = 0;
287         u32 PatternBuffer[292];
288         u8 _Wrap32Dis = 0, _SSE2 = 0;
289         u8 dqsWrDelay_end;
290
291         u32 addr;
292         u32 cr4;
293         u32 lo, hi;
294
295         print_debug_dqs("\nTrainDQSRdWrPos: Node_ID ", pDCTstat->Node_ID, 0);
296         cr4 = read_cr4();
297         if (cr4 & (1<<9)) {
298                 _SSE2 = 1;
299         }
300         cr4 |= (1<<9);          /* OSFXSR enable SSE2 */
301         write_cr4(cr4);
302
303         addr = HWCR;
304         _RDMSR(addr, &lo, &hi);
305         if (lo & (1<<17)) {
306                 _Wrap32Dis = 1;
307         }
308         lo |= (1<<17);          /* HWCR.wrap32dis */
309         _WRMSR(addr, lo, hi);   /* allow 64-bit memory references in real mode */
310
311         /* Disable ECC correction of reads on the dram bus. */
312         _DisableDramECC = mct_DisableDimmEccEn_D(pMCTstat, pDCTstat);
313
314         SetupDqsPattern_D(pMCTstat, pDCTstat, PatternBuffer);
315
316         /* mct_BeforeTrainDQSRdWrPos_D */
317         dqsWrDelay_end = 0x20;
318
319         Errors = 0;
320         for (Channel = 0; Channel < 2; Channel++) {
321                 print_debug_dqs("\tTrainDQSRdWrPos: 1 Channel ",Channel, 1);
322                 pDCTstat->Channel = Channel;
323
324                 if (pDCTstat->DIMMValidDCT[Channel] == 0)       /* mct_BeforeTrainDQSRdWrPos_D */
325                         continue;
326
327                 for ( DQSWrDelay = 0; DQSWrDelay < dqsWrDelay_end; DQSWrDelay++) {
328                         pDCTstat->DQSDelay = DQSWrDelay;
329                         pDCTstat->Direction = DQS_WRITEDIR;
330                         mct_SetDQSDelayAllCSR_D(pMCTstat, pDCTstat, cs_start);
331
332                         print_debug_dqs("\t\tTrainDQSRdWrPos: 21 DQSWrDelay ", DQSWrDelay, 2);
333                         TrainReadDQS_D(pMCTstat, pDCTstat, cs_start);
334
335                         print_debug_dqs("\t\tTrainDQSRdWrPos: 22 TrainErrors ",pDCTstat->TrainErrors, 2);
336                         if (pDCTstat->TrainErrors == 0) {
337                                         break;
338                         }
339                         Errors |= pDCTstat->TrainErrors;
340                 }
341                 if (DQSWrDelay < dqsWrDelay_end) {
342                         Errors = 0;
343
344                         print_debug_dqs("\tTrainDQSRdWrPos: 231 DQSWrDelay ", DQSWrDelay, 1);
345                         TrainWriteDQS_D(pMCTstat, pDCTstat, cs_start);
346                 }
347                 print_debug_dqs("\tTrainDQSRdWrPos: 232 Errors ", Errors, 1);
348                 pDCTstat->ErrStatus |= Errors;
349         }
350
351 #if DQS_TRAIN_DEBUG > 0
352         {
353                 u8 val;
354                 u8 i;
355                 u8 Channel, Receiver, Dir;
356                 u8 *p;
357
358                 for (Dir = 0; Dir < 2; Dir++) {
359                         if (Dir == 0) {
360                                 print_debug("TrainDQSRdWrPos: CH_D_DIR_B_DQS WR:\n");
361                         } else {
362                                 print_debug("TrainDQSRdWrPos: CH_D_DIR_B_DQS RD:\n");
363                         }
364                         for (Channel = 0; Channel < 2; Channel++) {
365                                 print_debug("Channel:"); print_debug_hex8(Channel); print_debug("\n");
366                                 for (Receiver = cs_start; Receiver < (cs_start + 2); Receiver += 2) {
367                                         print_debug("\t\tReceiver:"); print_debug_hex8(Receiver);
368                                         p = pDCTstat->CH_D_DIR_B_DQS[Channel][Receiver >> 1][Dir];
369                                         print_debug(": ");
370                                         for (i=0;i<8; i++) {
371                                                 val  = p[i];
372                                                 print_debug_hex8(val);
373                                                 print_debug(" ");
374                                         }
375                                         print_debug("\n");
376                                 }
377                         }
378                 }
379
380         }
381 #endif
382
383         if (_DisableDramECC) {
384                 mct_EnableDimmEccEn_D(pMCTstat, pDCTstat, _DisableDramECC);
385         }
386         if (!_Wrap32Dis) {
387                 addr = HWCR;
388                 _RDMSR(addr, &lo, &hi);
389                 lo &= ~(1<<17);         /* restore HWCR.wrap32dis */
390                 _WRMSR(addr, lo, hi);
391         }
392         if (!_SSE2){
393                 cr4 = read_cr4();
394                 cr4 &= ~(1<<9);         /* restore cr4.OSFXSR */
395                 write_cr4(cr4);
396         }
397
398         print_tx("TrainDQSRdWrPos: Status ", pDCTstat->Status);
399         print_tx("TrainDQSRdWrPos: TrainErrors ", pDCTstat->TrainErrors);
400         print_tx("TrainDQSRdWrPos: ErrStatus ", pDCTstat->ErrStatus);
401         print_tx("TrainDQSRdWrPos: ErrCode ", pDCTstat->ErrCode);
402         print_t("TrainDQSRdWrPos: Done\n");
403 }
404
405
406 static void SetupDqsPattern_D(struct MCTStatStruc *pMCTstat,
407                                 struct DCTStatStruc *pDCTstat, u32 *buffer)
408 {
409         /* 1. Set the Pattern type (0 or 1) in DCTStatstruc.Pattern
410          * 2. Copy the pattern from ROM to Cache, aligning on 16 byte boundary
411          * 3. Set the ptr to Cacheable copy in DCTStatstruc.PtrPatternBufA
412          */
413
414         u32 *buf;
415         u16 i;
416
417         buf = (u32 *)(((u32)buffer + 0x10) & (0xfffffff0));
418         if (pDCTstat->Status & (1<<SB_128bitmode)) {
419                 pDCTstat->Pattern = 1;  /* 18 cache lines, alternating qwords */
420                 for (i=0; i<16*18; i++)
421                         buf[i] = TestPatternJD1b_D[i];
422         } else {
423                 pDCTstat->Pattern = 0;  /* 9 cache lines, sequential qwords */
424                 for (i=0; i<16*9; i++)
425                         buf[i] = TestPatternJD1a_D[i];
426         }
427         pDCTstat->PtrPatternBufA = (u32)buf;
428 }
429
430
431 static void TrainDQSPos_D(struct MCTStatStruc *pMCTstat,
432                                 struct DCTStatStruc *pDCTstat,
433                                 u8 cs_start)
434 {
435         u32 Errors;
436         u8 ChipSel, DQSDelay;
437         u8 RnkDlySeqPassMin,RnkDlySeqPassMax, RnkDlyFilterMin, RnkDlyFilterMax;
438         u8 LastTest;
439         u32 TestAddr;
440         u8 ByteLane;
441         u8 MutualCSPassW[64];
442         u8 BanksPresent;
443         u8 dqsDelay_end;
444         u8 tmp, valid;
445
446 //      print_tx("TrainDQSPos: Node_ID", pDCTstat->Node_ID);
447 //      print_tx("TrainDQSPos: Direction", pDCTstat->Direction);
448
449         /* MutualCSPassW: each byte represents a bitmap of pass/fail per
450          * ByteLane.  The indext within MutualCSPassW is the delay value
451          * given the results.
452          */
453
454
455         print_debug_dqs("\t\t\tTrainDQSPos begin ", 0, 3);
456
457         Errors = 0;
458         BanksPresent = 0;
459
460         if (pDCTstat->Direction == DQS_READDIR) {
461                 dqsDelay_end = 64;
462                 mct_AdjustDelayRange_D(pMCTstat, pDCTstat, &dqsDelay_end);
463         } else {
464                 dqsDelay_end = 32;
465         }
466
467         /* Bitmapped status per delay setting, 0xff=All positions
468          * passing (1= PASS). Set the entire array.
469          */
470         for (DQSDelay=0; DQSDelay<64; DQSDelay++) {
471                 MutualCSPassW[DQSDelay] = 0xFF;
472         }
473
474         for (ChipSel = cs_start; ChipSel < (cs_start + 2); ChipSel++) { /* logical register chipselects 0..7 */
475                 print_debug_dqs("\t\t\t\tTrainDQSPos: 11 ChipSel ", ChipSel, 4);
476
477                 if (!mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, pDCTstat->Channel, ChipSel)) {
478                         print_debug_dqs("\t\t\t\tmct_RcvrRankEnabled_D CS not enabled ", ChipSel, 4);
479                         continue;
480                 }
481
482                 BanksPresent = 1;       /* flag for atleast one bank is present */
483                 TestAddr = mct_GetMCTSysAddr_D(pMCTstat, pDCTstat, pDCTstat->Channel, ChipSel, &valid);
484                 if (!valid) {
485                         print_debug_dqs("\t\t\t\tAddress not supported on current CS ", TestAddr, 4);
486                         continue;
487                 }
488
489                 print_debug_dqs("\t\t\t\tTrainDQSPos: 12 TestAddr ", TestAddr, 4);
490                 SetUpperFSbase(TestAddr);       /* fs:eax=far ptr to target */
491
492                 if (pDCTstat->Direction==DQS_READDIR) {
493                         print_debug_dqs("\t\t\t\tTrainDQSPos: 13 for read ", 0, 4);
494                         WriteDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr<<8);
495                 }
496
497                 for (DQSDelay = 0; DQSDelay < dqsDelay_end; DQSDelay++) {
498                         print_debug_dqs("\t\t\t\t\tTrainDQSPos: 141 DQSDelay ", DQSDelay, 5);
499                         if (MutualCSPassW[DQSDelay] == 0)
500                                 continue; //skip current delay value if other chipselects have failed all 8 bytelanes
501                         pDCTstat->DQSDelay = DQSDelay;
502                         mct_SetDQSDelayAllCSR_D(pMCTstat, pDCTstat, cs_start);
503                         print_debug_dqs("\t\t\t\t\tTrainDQSPos: 142 MutualCSPassW ", MutualCSPassW[DQSDelay], 5);
504
505                         if (pDCTstat->Direction == DQS_WRITEDIR) {
506                                 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 143 for write", 0, 5);
507                                 WriteDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr<<8);
508                         }
509
510                         print_debug_dqs("\t\t\t\t\tTrainDQSPos: 144 Pattern ", pDCTstat->Pattern, 5);
511                         ReadDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr<<8);
512 //                      print_debug_dqs("\t\t\t\t\tTrainDQSPos: 145 MutualCSPassW ", MutualCSPassW[DQSDelay], 5);
513                         tmp = CompareDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr << 8); /* 0=fail, 1=pass */
514
515                         if (mct_checkFenceHoleAdjust_D(pMCTstat, pDCTstat, DQSDelay, ChipSel, &tmp)) {
516                                 goto skipLocMiddle;
517                         }
518
519                         MutualCSPassW[DQSDelay] &= tmp;
520                         print_debug_dqs("\t\t\t\t\tTrainDQSPos: 146 \tMutualCSPassW ", MutualCSPassW[DQSDelay], 5);
521
522                         SetTargetWTIO_D(TestAddr);
523                         FlushDQSTestPattern_D(pDCTstat, TestAddr<<8);
524                         ResetTargetWTIO_D();
525                 }
526
527         }
528
529         if (BanksPresent) {
530                 u8 mask_pass = 0;
531                 for (ByteLane = 0; ByteLane < 8; ByteLane++) {
532                         print_debug_dqs("\t\t\t\tTrainDQSPos: 31 ByteLane ",ByteLane, 4);
533                         pDCTstat->ByteLane = ByteLane;
534                         LastTest = DQS_FAIL;            /* Analyze the results */
535                         RnkDlySeqPassMin = 0;
536                         RnkDlySeqPassMax = 0;
537                         RnkDlyFilterMax = 0;
538                         RnkDlyFilterMin = 0;
539                         for (DQSDelay = 0; DQSDelay < dqsDelay_end; DQSDelay++) {
540                                 if (MutualCSPassW[DQSDelay] & (1 << ByteLane)) {
541                                         print_debug_dqs("\t\t\t\t\tTrainDQSPos: 321 DQSDelay ", DQSDelay, 5);
542                                         print_debug_dqs("\t\t\t\t\tTrainDQSPos: 322 MutualCSPassW ", MutualCSPassW[DQSDelay], 5);
543
544                                         RnkDlySeqPassMax = DQSDelay;
545                                         if (LastTest == DQS_FAIL) {
546                                                 RnkDlySeqPassMin = DQSDelay; //start sequential run
547                                         }
548                                         if ((RnkDlySeqPassMax - RnkDlySeqPassMin)>(RnkDlyFilterMax-RnkDlyFilterMin)){
549                                                 RnkDlyFilterMin = RnkDlySeqPassMin;
550                                                 RnkDlyFilterMax = RnkDlySeqPassMax;
551                                         }
552                                         LastTest = DQS_PASS;
553                                 } else {
554                                         LastTest = DQS_FAIL;
555                                 }
556                         }
557                         print_debug_dqs("\t\t\t\tTrainDQSPos: 33 RnkDlySeqPassMax ", RnkDlySeqPassMax, 4);
558                         if (RnkDlySeqPassMax == 0) {
559                                 Errors |= 1<<SB_NODQSPOS; /* no passing window */
560                         } else {
561                                 print_debug_dqs_pair("\t\t\t\tTrainDQSPos: 34 RnkDlyFilter: ", RnkDlyFilterMin, " ",  RnkDlyFilterMax, 4);
562                                 if (((RnkDlyFilterMax - RnkDlyFilterMin) < MIN_DQS_WNDW)){
563                                         Errors |= 1 << SB_SMALLDQS;
564                                 } else {
565                                         u8 middle_dqs;
566                                         /* mctEngDQSwindow_Save_D Not required for arrays */
567                                         middle_dqs = MiddleDQS_D(RnkDlyFilterMin, RnkDlyFilterMax);
568                                         pDCTstat->DQSDelay = middle_dqs;
569                                         mct_SetDQSDelayCSR_D(pMCTstat, pDCTstat, cs_start);  /* load the register with the value */
570                                         StoreDQSDatStrucVal_D(pMCTstat, pDCTstat, cs_start); /* store the value into the data structure */
571                                         print_debug_dqs("\t\t\t\tTrainDQSPos: 42 middle_dqs : ",middle_dqs, 4);
572                                 }
573                         }
574                 }
575                 print_debug_dqs("\t\t\t\tTrainDQSPos: 41 mask_pass ",mask_pass, 3);
576         }
577 skipLocMiddle:
578         pDCTstat->TrainErrors = Errors;
579
580         print_debug_dqs("\t\t\tTrainDQSPos: Errors ", Errors, 3);
581
582 }
583
584
585 static void StoreDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat,
586                                         struct DCTStatStruc *pDCTstat, u8 ChipSel)
587 {
588         /* Store the DQSDelay value, found during a training sweep, into the DCT
589          * status structure for this node
590          */
591
592
593         /* When 400, 533, 667, it will support dimm0/1/2/3,
594          * and set conf for dimm0, hw will copy to dimm1/2/3
595          * set for dimm1, hw will copy to dimm3
596          * Rev A/B only support DIMM0/1 when 800Mhz and above + 0x100 to next dimm
597          * Rev C support DIMM0/1/2/3 when 800Mhz and above  + 0x100 to next dimm
598          */
599
600         /* FindDQSDatDimmVal_D is not required since we use an array */
601         u8 dn = 0;
602
603         if (pDCTstat->Status & (1 << SB_Over400MHz))
604                 dn = ChipSel>>1; /* if odd or even logical DIMM */
605
606         pDCTstat->CH_D_DIR_B_DQS[pDCTstat->Channel][dn][pDCTstat->Direction][pDCTstat->ByteLane] =
607                                         pDCTstat->DQSDelay;
608 }
609
610
611 static void GetDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat,
612                                 struct DCTStatStruc *pDCTstat, u8 ChipSel)
613 {
614         u8 dn = 0;
615
616
617         /* When 400, 533, 667, it will support dimm0/1/2/3,
618          * and set conf for dimm0, hw will copy to dimm1/2/3
619          * set for dimm1, hw will copy to dimm3
620          * Rev A/B only support DIMM0/1 when 800Mhz and above + 0x100 to next dimm
621          * Rev C support DIMM0/1/2/3 when 800Mhz and above  + 0x100 to next dimm
622          */
623
624         /* FindDQSDatDimmVal_D is not required since we use an array */
625         if (pDCTstat->Status & (1<<SB_Over400MHz))
626                 dn = ChipSel >> 1; /*if odd or even logical DIMM */
627
628         pDCTstat->DQSDelay =
629                 pDCTstat->CH_D_DIR_B_DQS[pDCTstat->Channel][dn][pDCTstat->Direction][pDCTstat->ByteLane];
630 }
631
632
633 /* FindDQSDatDimmVal_D is not required since we use an array */
634
635
636 static u8 MiddleDQS_D(u8 min, u8 max)
637 {
638         u8 size;
639         size = max-min;
640         if (size % 2)
641                 size++;         // round up if the size isn't even.
642         return ( min + (size >> 1));
643 }
644
645
646 static void TrainReadDQS_D(struct MCTStatStruc *pMCTstat,
647                                 struct DCTStatStruc *pDCTstat,
648                                 u8 cs_start)
649 {
650         print_debug_dqs("\t\tTrainReadPos ", 0, 2);
651         pDCTstat->Direction = DQS_READDIR;
652         TrainDQSPos_D(pMCTstat, pDCTstat, cs_start);
653 }
654
655
656 static void TrainWriteDQS_D(struct MCTStatStruc *pMCTstat,
657                                 struct DCTStatStruc *pDCTstat,
658                                 u8 cs_start)
659 {
660         pDCTstat->Direction = DQS_WRITEDIR;
661         print_debug_dqs("\t\tTrainWritePos", 0, 2);
662         TrainDQSPos_D(pMCTstat, pDCTstat, cs_start);
663 }
664
665
666 static void proc_IOCLFLUSH_D(u32 addr_hi)
667 {
668         SetTargetWTIO_D(addr_hi);
669         proc_CLFLUSH(addr_hi);
670         ResetTargetWTIO_D();
671 }
672
673
674 static u8 ChipSelPresent_D(struct MCTStatStruc *pMCTstat,
675                                 struct DCTStatStruc *pDCTstat,
676                                 u8 Channel, u8 ChipSel)
677 {
678         u32 val;
679         u32 reg;
680         u32 dev = pDCTstat->dev_dct;
681         u32 reg_off;
682         u8 ret = 0;
683
684         if (!pDCTstat->GangedMode) {
685                 reg_off = 0x100 * Channel;
686         } else {
687                 reg_off = 0;
688         }
689
690         if (ChipSel < MAX_CS_SUPPORTED){
691                 reg = 0x40 + (ChipSel << 2) + reg_off;
692                 val = Get_NB32(dev, reg);
693                 if (val & ( 1 << 0))
694                         ret = 1;
695         }
696
697         return ret;
698 }
699
700
701 /* proc_CLFLUSH_D located in mct_gcc.h */
702
703
704 static void WriteDQSTestPattern_D(struct MCTStatStruc *pMCTstat,
705                                         struct DCTStatStruc *pDCTstat,
706                                         u32 TestAddr_lo)
707 {
708         /* Write a pattern of 72 bit times (per DQ), to test dram functionality.
709          * The pattern is a stress pattern which exercises both ISI and
710          * crosstalk.  The number of cache lines to fill is dependent on DCT
711          * width mode and burstlength.
712          * Mode BL  Lines Pattern no.
713          * ----+---+-------------------
714          * 64   4         9     0
715          * 64   8         9     0
716          * 64M  4         9     0
717          * 64M  8         9     0
718          * 128  4         18    1
719          * 128  8         N/A   -
720          */
721
722         if (pDCTstat->Pattern == 0)
723                 WriteL9TestPattern_D(pDCTstat, TestAddr_lo);
724         else
725                 WriteL18TestPattern_D(pDCTstat, TestAddr_lo);
726 }
727
728
729 static void WriteL18TestPattern_D(struct DCTStatStruc *pDCTstat,
730                                         u32 TestAddr_lo)
731 {
732         u8 *buf;
733
734         buf = (u8 *)pDCTstat->PtrPatternBufA;
735         WriteLNTestPattern(TestAddr_lo, buf, 18);
736
737 }
738
739
740 static void WriteL9TestPattern_D(struct DCTStatStruc *pDCTstat,
741                                         u32 TestAddr_lo)
742 {
743         u8 *buf;
744
745         buf = (u8 *)pDCTstat->PtrPatternBufA;
746         WriteLNTestPattern(TestAddr_lo, buf, 9);
747 }
748
749
750
751 static u8 CompareDQSTestPattern_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u32 addr_lo)
752 {
753         /* Compare a pattern of 72 bit times (per DQ), to test dram functionality.
754          * The pattern is a stress pattern which exercises both ISI and
755          * crosstalk.  The number of cache lines to fill is dependent on DCT
756          * width mode and burstlength.
757          * Mode BL  Lines Pattern no.
758          * ----+---+-------------------
759          * 64   4         9     0
760          * 64   8         9     0
761          * 64M  4         9     0
762          * 64M  8         9     0
763          * 128  4         18    1
764          * 128  8         N/A   -
765          */
766
767         u32 *test_buf;
768         u8 bitmap;
769         u8 bytelane;
770         u8 i;
771         u32 value;
772         u8 j;
773         u32 value_test;
774         u8 pattern, channel;
775
776         pattern = pDCTstat->Pattern;
777         channel = pDCTstat->Channel;
778         test_buf = (u32 *)pDCTstat->PtrPatternBufA;
779
780         if (pattern && channel) {
781                 addr_lo += 8; //second channel
782                 test_buf+= 2;
783         }
784
785         bytelane = 0;
786         bitmap = 0xFF;
787         for (i=0; i < (9 * 64 / 4); i++) { /* /4 due to next loop */
788                 value = read32_fs(addr_lo);
789                 value_test = *test_buf;
790
791                 print_debug_dqs_pair("\t\t\t\t\t\ttest_buf = ", (u32)test_buf, " value = ", value_test, 7);
792                 print_debug_dqs_pair("\t\t\t\t\t\ttaddr_lo = ", addr_lo, " value = ", value, 7);
793
794                 for (j = 0; j < (4 * 8); j += 8) {
795                         if (((value >> j) & 0xff) != ((value_test >> j) & 0xff)) {
796                                 bitmap &= ~(1 << bytelane);
797                         }
798
799                         bytelane++;
800                         bytelane &= 0x7;
801                 }
802
803                 print_debug_dqs("\t\t\t\t\t\tbitmap = ", bitmap, 7);
804
805                 if (!bitmap)
806                         break;
807
808                 if (bytelane == 0){
809                         if (pattern == 1) { //dual channel
810                                 addr_lo += 8; //skip over other channel's data
811                                 test_buf += 2;
812                         }
813                 }
814                 addr_lo += 4;
815                 test_buf += 1;
816         }
817
818         return bitmap;
819 }
820
821
822 static void FlushDQSTestPattern_D(struct DCTStatStruc *pDCTstat,
823                                         u32 addr_lo)
824 {
825         /* Flush functions in mct_gcc.h */
826         if (pDCTstat->Pattern == 0){
827                 FlushDQSTestPattern_L9(addr_lo);
828         } else {
829                 FlushDQSTestPattern_L18(addr_lo);
830         }
831 }
832
833 static void SetTargetWTIO_D(u32 TestAddr)
834 {
835         u32 lo, hi;
836         hi = TestAddr >> 24;
837         lo = TestAddr << 8;
838         _WRMSR(0xC0010016, lo, hi);             /* IORR0 Base */
839         hi = 0xFF;
840         lo = 0xFC000800;                        /* 64MB Mask */
841         _WRMSR(0xC0010017, lo, hi);             /* IORR0 Mask */
842 }
843
844
845 static void ResetTargetWTIO_D(void)
846 {
847         u32 lo, hi;
848
849         hi = 0;
850         lo = 0;
851         _WRMSR(0xc0010017, lo, hi); // IORR0 Mask
852 }
853
854
855 static void ReadDQSTestPattern_D(struct MCTStatStruc *pMCTstat,
856                                 struct DCTStatStruc *pDCTstat,
857                                 u32 TestAddr_lo)
858 {
859         /* Read a pattern of 72 bit times (per DQ), to test dram functionality.
860          * The pattern is a stress pattern which exercises both ISI and
861          * crosstalk.  The number of cache lines to fill is dependent on DCT
862          * width mode and burstlength.
863          * Mode BL  Lines Pattern no.
864          * ----+---+-------------------
865          * 64   4         9     0
866          * 64   8         9     0
867          * 64M  4         9     0
868          * 64M  8         9     0
869          * 128  4         18    1
870          * 128  8         N/A   -
871          */
872         if (pDCTstat->Pattern == 0)
873                 ReadL9TestPattern(TestAddr_lo);
874         else
875                 ReadL18TestPattern(TestAddr_lo);
876         _MFENCE;
877 }
878
879
880 u32 SetUpperFSbase(u32 addr_hi)
881 {
882         /* Set the upper 32-bits of the Base address, 4GB aligned) for the
883          * FS selector.
884          */
885
886         u32 lo, hi;
887         u32 addr;
888         lo = 0;
889         hi = addr_hi>>24;
890         addr = FS_Base;
891         _WRMSR(addr, lo, hi);
892         return addr_hi<<8;
893 }
894
895
896 void ResetDCTWrPtr_D(u32 dev, u32 index_reg, u32 index)
897 {
898         u32 val;
899
900         val = Get_NB32_index_wait(dev, index_reg, index);
901         Set_NB32_index_wait(dev, index_reg, index, val);
902 }
903
904
905 /* mctEngDQSwindow_Save_D not required with arrays */
906
907
908 void mct_TrainDQSPos_D(struct MCTStatStruc *pMCTstat,
909                         struct DCTStatStruc *pDCTstatA)
910 {
911         u8 Node;
912         u8 ChipSel;
913         struct DCTStatStruc *pDCTstat;
914
915         for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
916                 pDCTstat = pDCTstatA + Node;
917                 if (pDCTstat->DCTSysLimit) {
918                         /* when DCT speed >= 400MHz, we only support 2 DIMMs
919                          * and we have two sets registers for DIMM0 and DIMM1 so
920                          * here we must traning DQSRd/WrPos for DIMM0 and DIMM1
921                          */
922                         if (pDCTstat->Speed >= 4) {
923                                 pDCTstat->Status |= (1 << SB_Over400MHz);
924                         }
925                         for (ChipSel = 0; ChipSel < MAX_CS_SUPPORTED; ChipSel += 2) {
926                                 TrainDQSRdWrPos_D(pMCTstat, pDCTstat, ChipSel);
927                                 SetEccDQSRdWrPos_D(pMCTstat, pDCTstat, ChipSel);
928                         }
929                 }
930         }
931 }
932
933
934 /* mct_BeforeTrainDQSRdWrPos_D
935  * Function is inline.
936  */
937
938 u8 mct_DisableDimmEccEn_D(struct MCTStatStruc *pMCTstat,
939                                 struct DCTStatStruc *pDCTstat)
940 {
941         u8 _DisableDramECC = 0;
942         u32 val;
943         u32 reg;
944         u32 dev;
945
946         /*Disable ECC correction of reads on the dram bus. */
947
948         dev = pDCTstat->dev_dct;
949         reg = 0x90;
950         val = Get_NB32(dev, reg);
951         if (val & (1<<DimmEcEn)) {
952                 _DisableDramECC |= 0x01;
953                 val &= ~(1<<DimmEcEn);
954                 Set_NB32(dev, reg, val);
955         }
956         if (!pDCTstat->GangedMode) {
957                 reg = 0x190;
958                 val = Get_NB32(dev, reg);
959                 if (val & (1<<DimmEcEn)) {
960                         _DisableDramECC |= 0x02;
961                         val &= ~(1<<DimmEcEn);
962                         Set_NB32(dev, reg, val);
963                 }
964         }
965         return _DisableDramECC;
966 }
967
968
969
970 void mct_EnableDimmEccEn_D(struct MCTStatStruc *pMCTstat,
971                                 struct DCTStatStruc *pDCTstat, u8 _DisableDramECC)
972 {
973
974         u32 val;
975         u32 reg;
976         u32 dev;
977
978         /* Enable ECC correction if it was previously disabled */
979
980         dev = pDCTstat->dev_dct;
981
982         if ((_DisableDramECC & 0x01) == 0x01) {
983                 reg = 0x90;
984                 val = Get_NB32(dev, reg);
985                 val |= (1<<DimmEcEn);
986                 Set_NB32(dev, reg, val);
987         }
988         if ((_DisableDramECC & 0x02) == 0x02) {
989                 reg = 0x190;
990                 val = Get_NB32(dev, reg);
991                 val |= (1<<DimmEcEn);
992                 Set_NB32(dev, reg, val);
993         }
994 }
995
996
997 static void mct_SetDQSDelayCSR_D(struct MCTStatStruc *pMCTstat,
998                                         struct DCTStatStruc *pDCTstat, u8 ChipSel)
999 {
1000         u8 ByteLane;
1001         u32 val;
1002         u32 index_reg = 0x98 + 0x100 * pDCTstat->Channel;
1003         u8 shift;
1004         u32 dqs_delay = (u32)pDCTstat->DQSDelay;
1005         u32 dev = pDCTstat->dev_dct;
1006         u32 index;
1007
1008         ByteLane = pDCTstat->ByteLane;
1009
1010         /* Channel is offset */
1011         if (ByteLane < 4) {
1012                 index = 1;
1013         } else if (ByteLane <8) {
1014                 index = 2;
1015         } else {
1016                 index = 3;
1017         }
1018
1019         if (pDCTstat->Direction == DQS_READDIR) {
1020                 index += 4;
1021         }
1022
1023         /* get the proper register index */
1024         shift = ByteLane%4;
1025         shift <<= 3; /* get bit position of bytelane, 8 bit */
1026
1027         if (pDCTstat->Status & (1 << SB_Over400MHz)) {
1028                 index += (ChipSel >> 1) * 0x100;        /* if logical DIMM1/DIMM3 */
1029         }
1030
1031         val = Get_NB32_index_wait(dev, index_reg, index);
1032         val &= ~(0x7f << shift);
1033         val |= (dqs_delay << shift);
1034         Set_NB32_index_wait(dev, index_reg, index, val);
1035 }
1036
1037
1038 static void mct_SetDQSDelayAllCSR_D(struct MCTStatStruc *pMCTstat,
1039                                         struct DCTStatStruc *pDCTstat,
1040                                         u8 cs_start)
1041 {
1042         u8 ByteLane;
1043         u8 ChipSel = cs_start;
1044
1045
1046         for (ChipSel = cs_start; ChipSel < (cs_start + 2); ChipSel++) {
1047                 if ( mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, pDCTstat->Channel, ChipSel)) {
1048                         for (ByteLane = 0; ByteLane < 8; ByteLane++) {
1049                                 pDCTstat->ByteLane = ByteLane;
1050                                 mct_SetDQSDelayCSR_D(pMCTstat, pDCTstat, ChipSel);
1051                         }
1052                 }
1053         }
1054 }
1055
1056
1057 u8 mct_RcvrRankEnabled_D(struct MCTStatStruc *pMCTstat,
1058                                 struct DCTStatStruc *pDCTstat,
1059                                 u8 Channel, u8 ChipSel)
1060 {
1061         u8 ret;
1062
1063         ret = ChipSelPresent_D(pMCTstat, pDCTstat, Channel, ChipSel);
1064         return ret;
1065 }
1066
1067
1068 u32 mct_GetRcvrSysAddr_D(struct MCTStatStruc *pMCTstat,
1069                                 struct DCTStatStruc *pDCTstat,
1070                                 u8 channel, u8 receiver, u8 *valid)
1071 {
1072         return mct_GetMCTSysAddr_D(pMCTstat, pDCTstat, channel, receiver, valid);
1073 }
1074
1075
1076 u32 mct_GetMCTSysAddr_D(struct MCTStatStruc *pMCTstat,
1077                                 struct DCTStatStruc *pDCTstat,
1078                                 u8 Channel, u8 receiver, u8 *valid)
1079 {
1080         u32 val;
1081         u32 reg_off = 0;
1082         u32 reg;
1083         u32 dword;
1084         u32 dev = pDCTstat->dev_dct;
1085
1086         *valid = 0;
1087
1088
1089         if (!pDCTstat->GangedMode)  {   // FIXME: not used.
1090                 reg_off = 0x100 * Channel;
1091         }
1092
1093         /* get the local base addr of the chipselect */
1094         reg = 0x40 + (receiver << 2);
1095         val = Get_NB32(dev, reg);
1096
1097         val &= ~0x0F;
1098
1099         /* unganged mode DCT0+DCT1, sys addr of DCT1=node
1100          * base+DctSelBaseAddr+local ca base*/
1101         if ((Channel) && (pDCTstat->GangedMode == 0) && ( pDCTstat->DIMMValidDCT[0] > 0)) {
1102                 reg = 0x110;
1103                 dword = Get_NB32(dev, reg);
1104                 dword &= 0xfffff800;
1105                 dword <<= 8;    /* scale [47:27] of F2x110[31:11] to [39:8]*/
1106                 val += dword;
1107
1108                 /* if DCTSelBaseAddr < Hole, and eax > HoleBase, then add Hole size to test address */
1109                 if ((val >= pDCTstat->DCTHoleBase) && (pDCTstat->DCTHoleBase > dword)) {
1110                         dword = (~(pDCTstat->DCTHoleBase >> (24 - 8)) + 1) & 0xFF;
1111                         dword <<= (24 - 8);
1112                         val += dword;
1113                 }
1114         } else {
1115                 /* sys addr=node base+local cs base */
1116                 val += pDCTstat->DCTSysBase;
1117
1118                 /* New stuff */
1119                 if (pDCTstat->DCTHoleBase && (val >= pDCTstat->DCTHoleBase)) {
1120                         val -= pDCTstat->DCTSysBase;
1121                         dword = Get_NB32(pDCTstat->dev_map, 0xF0); /* get Hole Offset */
1122                         val += (dword & 0x0000ff00) << (24-8-8);
1123                 }
1124         }
1125
1126         /* New stuff */
1127         val += ((1 << 21) >> 8);        /* Add 2MB offset to avoid compat area */
1128         if (val >= MCT_TRNG_KEEPOUT_START) {
1129                 while(val < MCT_TRNG_KEEPOUT_END)
1130                         val += (1 << (15-8));   /* add 32K */
1131         }
1132
1133         /* HW remap disabled? */
1134         if (!(pDCTstat->Status & (1 << SB_HWHole))) {
1135                 if (!(pDCTstat->Status & (1 << SB_SWNodeHole))) {
1136                         /* SW memhole disabled */
1137                         u32 lo, hi;
1138                         _RDMSR(TOP_MEM, &lo, &hi);
1139                         lo >>= 8;
1140                         if ((val >= lo) && (val < _4GB_RJ8)) {
1141                                 val = 0;
1142                                 *valid = 0;
1143                                 goto exitGetAddr;
1144                         } else {
1145                                 *valid = 1;
1146                                 goto exitGetAddrWNoError;
1147                         }
1148                 } else {
1149                         *valid = 1;
1150                         goto exitGetAddrWNoError;
1151                 }
1152         } else {
1153                 *valid = 1;
1154                 goto exitGetAddrWNoError;
1155         }
1156
1157 exitGetAddrWNoError:
1158
1159         /* Skip if Address is in UMA region */
1160         dword = pMCTstat->Sub4GCacheTop;
1161         dword >>= 8;
1162         if (dword != 0) {
1163                 if ((val >= dword) && (val < _4GB_RJ8)) {
1164                         val = 0;
1165                         *valid = 0;
1166                 } else {
1167                         *valid = 1;
1168                 }
1169         }
1170
1171 exitGetAddr:
1172         return val;
1173 }
1174
1175
1176 void mct_Write1LTestPattern_D(struct MCTStatStruc *pMCTstat,
1177                                 struct DCTStatStruc *pDCTstat,
1178                                 u32 TestAddr, u8 pattern)
1179 {
1180
1181         u8 *buf;
1182
1183         /* Issue the stream of writes. When F2x11C[MctWrLimit] is reached
1184          * (or when F2x11C[FlushWr] is set again), all the writes are written
1185          * to DRAM.
1186          */
1187
1188         SetUpperFSbase(TestAddr);
1189
1190         if (pattern)
1191                 buf = (u8 *)pDCTstat->PtrPatternBufB;
1192         else
1193                 buf = (u8 *)pDCTstat->PtrPatternBufA;
1194
1195         WriteLNTestPattern(TestAddr << 8, buf, 1);
1196 }
1197
1198
1199 void mct_Read1LTestPattern_D(struct MCTStatStruc *pMCTstat,
1200                                 struct DCTStatStruc *pDCTstat, u32 addr)
1201 {
1202         u32 value;
1203
1204         /* BIOS issues the remaining (Ntrain - 2) reads after checking that
1205          * F2x11C[PrefDramTrainMode] is cleared. These reads must be to
1206          * consecutive cache lines (i.e., 64 bytes apart) and must not cross
1207          * a naturally aligned 4KB boundary. These reads hit the prefetches and
1208          * read the data from the prefetch buffer.
1209          */
1210
1211         /* get data from DIMM */
1212         SetUpperFSbase(addr);
1213
1214         /* 1st move causes read fill (to exclusive or shared)*/
1215         value = read32_fs(addr<<8);
1216 }