DDR3 support for AMD Fam10.
[coreboot.git] / src / northbridge / amd / amdmct / mct_ddr3 / mctdqs_d.c
1 /*
2  * This file is part of the coreboot project.
3  *
4  * Copyright (C) 2010 Advanced Micro Devices, Inc.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; version 2 of the License.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
18  */
19
20 static void CalcEccDQSPos_D(struct MCTStatStruc *pMCTstat,
21                                 struct DCTStatStruc *pDCTstat, u16 like,
22                                 u8 scale, u8 ChipSel);
23 static void GetDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat,
24                                 struct DCTStatStruc *pDCTstat, u8 ChipSel);
25 static u8 MiddleDQS_D(u8 min, u8 max);
26 static void TrainReadDQS_D(struct MCTStatStruc *pMCTstat,
27                                 struct DCTStatStruc *pDCTstat,
28                                 u8 cs_start);
29 static void TrainWriteDQS_D(struct MCTStatStruc *pMCTstat,
30                                 struct DCTStatStruc *pDCTstat,
31                                 u8 cs_start);
32 static void WriteDQSTestPattern_D(struct MCTStatStruc *pMCTstat,
33                                         struct DCTStatStruc *pDCTstat,
34                                         u32 TestAddr_lo);
35 static void WriteL18TestPattern_D(struct DCTStatStruc *pDCTstat,
36                                         u32 TestAddr_lo);
37 static void WriteL9TestPattern_D(struct DCTStatStruc *pDCTstat,
38                                         u32 TestAddr_lo);
39 static u16 CompareDQSTestPattern_D(struct MCTStatStruc *pMCTstat,
40                                         struct DCTStatStruc *pDCTstat,
41                                         u32 addr_lo);
42 static void FlushDQSTestPattern_D(struct DCTStatStruc *pDCTstat,
43                                         u32 addr_lo);
44 static void SetTargetWTIO_D(u32 TestAddr);
45 static void ResetTargetWTIO_D(void);
46 static void ReadDQSTestPattern_D(struct MCTStatStruc *pMCTstat,
47                                         struct DCTStatStruc *pDCTstat,
48                                         u32 TestAddr_lo);
49 static void mctEngDQSwindow_Save_D(struct MCTStatStruc *pMCTstat,
50                                         struct DCTStatStruc *pDCTstat, u8 ChipSel,
51                                         u8 RnkDlyFilterMin, u8 RnkDlyFilterMax);
52 void ResetDCTWrPtr_D(u32 dev, u32 index_reg, u32 index);
53 u8 mct_DisableDimmEccEn_D(struct MCTStatStruc *pMCTstat,
54                                 struct DCTStatStruc *pDCTstat);
55 static void mct_SetDQSDelayCSR_D(struct MCTStatStruc *pMCTstat,
56                                         struct DCTStatStruc *pDCTstat,
57                                         u8 ChipSel);
58 static void mct_SetDQSDelayAllCSR_D(struct MCTStatStruc *pMCTstat,
59                                         struct DCTStatStruc *pDCTstat,
60                                         u8 cs_start);
61 u32 mct_GetMCTSysAddr_D(struct MCTStatStruc *pMCTstat,
62                                 struct DCTStatStruc *pDCTstat, u8 Channel,
63                                 u8 receiver, u8 *valid);
64 static void SetupDqsPattern_D(struct MCTStatStruc *pMCTstat,
65                                 struct DCTStatStruc *pDCTstat,
66                                 u32 *buffer);
67
68 static void StoreWrRdDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat,
69                                         struct DCTStatStruc *pDCTstat, u8 ChipSel,
70                                       u8 RnkDlyFilterMin, u8 RnkDlyFilterMax);
71
72 static void StoreDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 ChipSel);
73
74 #define DQS_TRAIN_DEBUG 0
75
76 static void print_debug_dqs(const char *str, u32 val, u8 level)
77 {
78 #if DQS_TRAIN_DEBUG > 0
79         if (DQS_TRAIN_DEBUG >= level) {
80                 printk(BIOS_DEBUG, "%s%x\n", str, val);
81         }
82 #endif
83 }
84
85 static void print_debug_dqs_pair(const char *str, u32 val, const char *str2, u32 val2, u8 level)
86 {
87 #if DQS_TRAIN_DEBUG > 0
88         if (DQS_TRAIN_DEBUG >= level) {
89                 printk(BIOS_DEBUG, "%s%08x%s%08x\n", str, val, str2, val2);
90         }
91 #endif
92 }
93
94 /*Warning:  These must be located so they do not cross a logical 16-bit segment boundary!*/
95 const static u32 TestPatternJD1a_D[] = {
96         0x00000000,0x00000000,0xFFFFFFFF,0xFFFFFFFF, /* QW0-1, ALL-EVEN */
97         0x00000000,0x00000000,0x00000000,0x00000000, /* QW2-3, ALL-EVEN */
98         0x00000000,0x00000000,0xFFFFFFFF,0xFFFFFFFF, /* QW4-5, ALL-EVEN */
99         0x00000000,0x00000000,0x00000000,0x00000000, /* QW6-7, ALL-EVEN */
100         0xFeFeFeFe,0xFeFeFeFe,0x01010101,0x01010101, /* QW0-1, DQ0-ODD */
101         0xFeFeFeFe,0xFeFeFeFe,0x01010101,0x01010101, /* QW2-3, DQ0-ODD */
102         0x01010101,0x01010101,0xFeFeFeFe,0xFeFeFeFe, /* QW4-5, DQ0-ODD */
103         0xFeFeFeFe,0xFeFeFeFe,0x01010101,0x01010101, /* QW6-7, DQ0-ODD */
104         0x02020202,0x02020202,0x02020202,0x02020202, /* QW0-1, DQ1-ODD */
105         0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, /* QW2-3, DQ1-ODD */
106         0xFdFdFdFd,0xFdFdFdFd,0x02020202,0x02020202, /* QW4-5, DQ1-ODD */
107         0x02020202,0x02020202,0x02020202,0x02020202, /* QW6-7, DQ1-ODD */
108         0x04040404,0x04040404,0xfBfBfBfB,0xfBfBfBfB, /* QW0-1, DQ2-ODD */
109         0x04040404,0x04040404,0x04040404,0x04040404, /* QW2-3, DQ2-ODD */
110         0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, /* QW4-5, DQ2-ODD */
111         0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, /* QW6-7, DQ2-ODD */
112         0x08080808,0x08080808,0xF7F7F7F7,0xF7F7F7F7, /* QW0-1, DQ3-ODD */
113         0x08080808,0x08080808,0x08080808,0x08080808, /* QW2-3, DQ3-ODD */
114         0xF7F7F7F7,0xF7F7F7F7,0x08080808,0x08080808, /* QW4-5, DQ3-ODD */
115         0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, /* QW6-7, DQ3-ODD */
116         0x10101010,0x10101010,0x10101010,0x10101010, /* QW0-1, DQ4-ODD */
117         0xeFeFeFeF,0xeFeFeFeF,0x10101010,0x10101010, /* QW2-3, DQ4-ODD */
118         0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, /* QW4-5, DQ4-ODD */
119         0xeFeFeFeF,0xeFeFeFeF,0x10101010,0x10101010, /* QW6-7, DQ4-ODD */
120         0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, /* QW0-1, DQ5-ODD */
121         0xdFdFdFdF,0xdFdFdFdF,0x20202020,0x20202020, /* QW2-3, DQ5-ODD */
122         0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, /* QW4-5, DQ5-ODD */
123         0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, /* QW6-7, DQ5-ODD */
124         0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, /* QW0-1, DQ6-ODD */
125         0x40404040,0x40404040,0xBfBfBfBf,0xBfBfBfBf, /* QW2-3, DQ6-ODD */
126         0x40404040,0x40404040,0xBfBfBfBf,0xBfBfBfBf, /* QW4-5, DQ6-ODD */
127         0x40404040,0x40404040,0xBfBfBfBf,0xBfBfBfBf, /* QW6-7, DQ6-ODD */
128         0x80808080,0x80808080,0x7F7F7F7F,0x7F7F7F7F, /* QW0-1, DQ7-ODD */
129         0x80808080,0x80808080,0x7F7F7F7F,0x7F7F7F7F, /* QW2-3, DQ7-ODD */
130         0x80808080,0x80808080,0x7F7F7F7F,0x7F7F7F7F, /* QW4-5, DQ7-ODD */
131         0x80808080,0x80808080,0x80808080,0x80808080  /* QW6-7, DQ7-ODD */
132 };
133 const static u32 TestPatternJD1b_D[] = {
134         0x00000000,0x00000000,0x00000000,0x00000000, /* QW0,CHA-B, ALL-EVEN */
135         0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, /* QW1,CHA-B, ALL-EVEN */
136         0x00000000,0x00000000,0x00000000,0x00000000, /* QW2,CHA-B, ALL-EVEN */
137         0x00000000,0x00000000,0x00000000,0x00000000, /* QW3,CHA-B, ALL-EVEN */
138         0x00000000,0x00000000,0x00000000,0x00000000, /* QW4,CHA-B, ALL-EVEN */
139         0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, /* QW5,CHA-B, ALL-EVEN */
140         0x00000000,0x00000000,0x00000000,0x00000000, /* QW6,CHA-B, ALL-EVEN */
141         0x00000000,0x00000000,0x00000000,0x00000000, /* QW7,CHA-B, ALL-EVEN */
142         0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, /* QW0,CHA-B, DQ0-ODD */
143         0x01010101,0x01010101,0x01010101,0x01010101, /* QW1,CHA-B, DQ0-ODD */
144         0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, /* QW2,CHA-B, DQ0-ODD */
145         0x01010101,0x01010101,0x01010101,0x01010101, /* QW3,CHA-B, DQ0-ODD */
146         0x01010101,0x01010101,0x01010101,0x01010101, /* QW4,CHA-B, DQ0-ODD */
147         0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, /* QW5,CHA-B, DQ0-ODD */
148         0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, /* QW6,CHA-B, DQ0-ODD */
149         0x01010101,0x01010101,0x01010101,0x01010101, /* QW7,CHA-B, DQ0-ODD */
150         0x02020202,0x02020202,0x02020202,0x02020202, /* QW0,CHA-B, DQ1-ODD */
151         0x02020202,0x02020202,0x02020202,0x02020202, /* QW1,CHA-B, DQ1-ODD */
152         0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, /* QW2,CHA-B, DQ1-ODD */
153         0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, /* QW3,CHA-B, DQ1-ODD */
154         0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, /* QW4,CHA-B, DQ1-ODD */
155         0x02020202,0x02020202,0x02020202,0x02020202, /* QW5,CHA-B, DQ1-ODD */
156         0x02020202,0x02020202,0x02020202,0x02020202, /* QW6,CHA-B, DQ1-ODD */
157         0x02020202,0x02020202,0x02020202,0x02020202, /* QW7,CHA-B, DQ1-ODD */
158         0x04040404,0x04040404,0x04040404,0x04040404, /* QW0,CHA-B, DQ2-ODD */
159         0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, /* QW1,CHA-B, DQ2-ODD */
160         0x04040404,0x04040404,0x04040404,0x04040404, /* QW2,CHA-B, DQ2-ODD */
161         0x04040404,0x04040404,0x04040404,0x04040404, /* QW3,CHA-B, DQ2-ODD */
162         0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, /* QW4,CHA-B, DQ2-ODD */
163         0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, /* QW5,CHA-B, DQ2-ODD */
164         0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, /* QW6,CHA-B, DQ2-ODD */
165         0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, /* QW7,CHA-B, DQ2-ODD */
166         0x08080808,0x08080808,0x08080808,0x08080808, /* QW0,CHA-B, DQ3-ODD */
167         0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, /* QW1,CHA-B, DQ3-ODD */
168         0x08080808,0x08080808,0x08080808,0x08080808, /* QW2,CHA-B, DQ3-ODD */
169         0x08080808,0x08080808,0x08080808,0x08080808, /* QW3,CHA-B, DQ3-ODD */
170         0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, /* QW4,CHA-B, DQ3-ODD */
171         0x08080808,0x08080808,0x08080808,0x08080808, /* QW5,CHA-B, DQ3-ODD */
172         0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, /* QW6,CHA-B, DQ3-ODD */
173         0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, /* QW7,CHA-B, DQ3-ODD */
174         0x10101010,0x10101010,0x10101010,0x10101010, /* QW0,CHA-B, DQ4-ODD */
175         0x10101010,0x10101010,0x10101010,0x10101010, /* QW1,CHA-B, DQ4-ODD */
176         0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, /* QW2,CHA-B, DQ4-ODD */
177         0x10101010,0x10101010,0x10101010,0x10101010, /* QW3,CHA-B, DQ4-ODD */
178         0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, /* QW4,CHA-B, DQ4-ODD */
179         0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, /* QW5,CHA-B, DQ4-ODD */
180         0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, /* QW6,CHA-B, DQ4-ODD */
181         0x10101010,0x10101010,0x10101010,0x10101010, /* QW7,CHA-B, DQ4-ODD */
182         0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, /* QW0,CHA-B, DQ5-ODD */
183         0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, /* QW1,CHA-B, DQ5-ODD */
184         0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, /* QW2,CHA-B, DQ5-ODD */
185         0x20202020,0x20202020,0x20202020,0x20202020, /* QW3,CHA-B, DQ5-ODD */
186         0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, /* QW4,CHA-B, DQ5-ODD */
187         0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, /* QW5,CHA-B, DQ5-ODD */
188         0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, /* QW6,CHA-B, DQ5-ODD */
189         0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, /* QW7,CHA-B, DQ5-ODD */
190         0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, /* QW0,CHA-B, DQ6-ODD */
191         0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, /* QW1,CHA-B, DQ6-ODD */
192         0x40404040,0x40404040,0x40404040,0x40404040, /* QW2,CHA-B, DQ6-ODD */
193         0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, /* QW3,CHA-B, DQ6-ODD */
194         0x40404040,0x40404040,0x40404040,0x40404040, /* QW4,CHA-B, DQ6-ODD */
195         0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, /* QW5,CHA-B, DQ6-ODD */
196         0x40404040,0x40404040,0x40404040,0x40404040, /* QW6,CHA-B, DQ6-ODD */
197         0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, /* QW7,CHA-B, DQ6-ODD */
198         0x80808080,0x80808080,0x80808080,0x80808080, /* QW0,CHA-B, DQ7-ODD */
199         0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F, /* QW1,CHA-B, DQ7-ODD */
200         0x80808080,0x80808080,0x80808080,0x80808080, /* QW2,CHA-B, DQ7-ODD */
201         0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F, /* QW3,CHA-B, DQ7-ODD */
202         0x80808080,0x80808080,0x80808080,0x80808080, /* QW4,CHA-B, DQ7-ODD */
203         0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F, /* QW5,CHA-B, DQ7-ODD */
204         0x80808080,0x80808080,0x80808080,0x80808080, /* QW6,CHA-B, DQ7-ODD */
205         0x80808080,0x80808080,0x80808080,0x80808080  /* QW7,CHA-B, DQ7-ODD */
206 };
207
208 void TrainReceiverEn_D(struct MCTStatStruc *pMCTstat,
209                         struct DCTStatStruc *pDCTstatA, u8 Pass)
210 {
211         u8 Node;
212         struct DCTStatStruc *pDCTstat;
213         u32 val;
214
215         for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
216                 pDCTstat = pDCTstatA + Node;
217
218                 if (pDCTstat->DCTSysLimit) {
219                         val = Get_NB32(pDCTstat->dev_dct, 0x78);
220                         val |= 1 <<DqsRcvEnTrain;
221                         Set_NB32(pDCTstat->dev_dct, 0x78, val);
222                         val = Get_NB32(pDCTstat->dev_dct, 0x78 + 0x100);
223                         val |= 1 <<DqsRcvEnTrain;
224                         Set_NB32(pDCTstat->dev_dct, 0x78 + 0x100, val);
225                         mct_TrainRcvrEn_D(pMCTstat, pDCTstat, Pass);
226                 }
227         }
228 }
229
230 static void SetEccDQSRdWrPos_D(struct MCTStatStruc *pMCTstat,
231                                 struct DCTStatStruc *pDCTstat, u8 ChipSel)
232 {
233         u8 channel;
234         u8 direction;
235
236         for (channel = 0; channel < 2; channel++){
237                 for (direction = 0; direction < 2; direction++) {
238                         pDCTstat->Channel = channel;    /* Channel A or B */
239                         pDCTstat->Direction = direction; /* Read or write */
240                         CalcEccDQSPos_D(pMCTstat, pDCTstat, pDCTstat->CH_EccDQSLike[channel], pDCTstat->CH_EccDQSScale[channel], ChipSel);
241                         print_debug_dqs_pair("\t\tSetEccDQSRdWrPos: channel ", channel, direction==DQS_READDIR? " R dqs_delay":" W dqs_delay",  pDCTstat->DQSDelay, 2);
242                         pDCTstat->ByteLane = 8;
243                         StoreDQSDatStrucVal_D(pMCTstat, pDCTstat, ChipSel);
244                         mct_SetDQSDelayCSR_D(pMCTstat, pDCTstat, ChipSel);
245                 }
246         }
247 }
248
249 static void CalcEccDQSPos_D(struct MCTStatStruc *pMCTstat,
250                                 struct DCTStatStruc *pDCTstat,
251                                 u16 like, u8 scale, u8 ChipSel)
252 {
253         u8 DQSDelay0, DQSDelay1;
254         u16 DQSDelay;
255
256         if (pDCTstat->Status & (1 << SB_Registered)) {
257                 return;
258         }
259
260         pDCTstat->ByteLane = like & 0xff;
261         GetDQSDatStrucVal_D(pMCTstat, pDCTstat, ChipSel);
262         DQSDelay0 = pDCTstat->DQSDelay;
263
264         pDCTstat->ByteLane = (like >> 8) & 0xff;
265         GetDQSDatStrucVal_D(pMCTstat, pDCTstat, ChipSel);
266         DQSDelay1 = pDCTstat->DQSDelay;
267
268         if (DQSDelay0>DQSDelay1) {
269                 DQSDelay = DQSDelay0 - DQSDelay1;
270         } else {
271                 DQSDelay = DQSDelay1 - DQSDelay0;
272         }
273
274         DQSDelay = DQSDelay * (~scale);
275
276         DQSDelay += 0x80;       /* round it */
277
278         DQSDelay >>= 8;         /* 256 */
279
280         if (DQSDelay0>DQSDelay1) {
281                 DQSDelay = DQSDelay1 - DQSDelay;
282         } else {
283                 DQSDelay += DQSDelay1;
284         }
285
286         pDCTstat->DQSDelay = (u8)DQSDelay;
287 }
288
289 static void TrainDQSRdWrPos_D(struct MCTStatStruc *pMCTstat,
290                                 struct DCTStatStruc *pDCTstat,
291                                 u8 cs_start)
292 {
293         u32 Errors;
294         u8 Channel, DQSWrDelay;
295         u8 _DisableDramECC = 0;
296         u32 PatternBuffer[292];
297         u8 _Wrap32Dis = 0, _SSE2 = 0;
298         u8 dqsWrDelay_end;
299
300         u32 addr;
301         u32 cr4;
302         u32 lo, hi;
303
304         print_debug_dqs("\nTrainDQSRdWrPos: Node_ID ", pDCTstat->Node_ID, 0);
305         cr4 = read_cr4();
306         if (cr4 & (1<<9)) {
307                 _SSE2 = 1;
308         }
309         cr4 |= (1<<9);          /* OSFXSR enable SSE2 */
310         write_cr4(cr4);
311
312         addr = HWCR;
313         _RDMSR(addr, &lo, &hi);
314         if (lo & (1<<17)) {
315                 _Wrap32Dis = 1;
316         }
317         lo |= (1<<17);          /* HWCR.wrap32dis */
318         _WRMSR(addr, lo, hi);   /* allow 64-bit memory references in real mode */
319
320         /* Disable ECC correction of reads on the dram bus. */
321         _DisableDramECC = mct_DisableDimmEccEn_D(pMCTstat, pDCTstat);
322
323         SetupDqsPattern_D(pMCTstat, pDCTstat, PatternBuffer);
324
325         /* mct_BeforeTrainDQSRdWrPos_D */
326         dqsWrDelay_end = 0x20;
327
328         Errors = 0;
329         for (Channel = 0; Channel < 2; Channel++) {
330                 print_debug_dqs("\tTrainDQSRdWrPos: 1 Channel ",Channel, 1);
331                 pDCTstat->Channel = Channel;
332
333                 if (pDCTstat->DIMMValidDCT[Channel] == 0)       /* mct_BeforeTrainDQSRdWrPos_D */
334                         continue;
335
336                 pDCTstat->DqsRdWrPos_Saved = 0;
337                 for ( DQSWrDelay = 0; DQSWrDelay < dqsWrDelay_end; DQSWrDelay++) {
338                         pDCTstat->DQSDelay = DQSWrDelay;
339                         pDCTstat->Direction = DQS_WRITEDIR;
340                         mct_SetDQSDelayAllCSR_D(pMCTstat, pDCTstat, cs_start);
341
342                         print_debug_dqs("\t\tTrainDQSRdWrPos: 21 DQSWrDelay ", DQSWrDelay, 2);
343                         TrainReadDQS_D(pMCTstat, pDCTstat, cs_start);
344                         print_debug_dqs("\t\tTrainDQSRdWrPos: 21 DqsRdWrPos_Saved ", pDCTstat->DqsRdWrPos_Saved, 2);
345                         if (pDCTstat->DqsRdWrPos_Saved == 0xFF)
346                                 break;
347
348                         print_debug_dqs("\t\tTrainDQSRdWrPos: 22 TrainErrors ",pDCTstat->TrainErrors, 2);
349                         if (pDCTstat->TrainErrors == 0) {
350                                         break;
351                         }
352                         Errors |= pDCTstat->TrainErrors;
353                 }
354
355                 pDCTstat->DqsRdWrPos_Saved = 0;
356                 if (DQSWrDelay < dqsWrDelay_end) {
357                         Errors = 0;
358
359                         print_debug_dqs("\tTrainDQSRdWrPos: 231 DQSWrDelay ", DQSWrDelay, 1);
360                         TrainWriteDQS_D(pMCTstat, pDCTstat, cs_start);
361                 }
362                 print_debug_dqs("\tTrainDQSRdWrPos: 232 Errors ", Errors, 1);
363                 pDCTstat->ErrStatus |= Errors;
364         }
365
366 #if DQS_TRAIN_DEBUG > 0
367         {
368                 u8 val;
369                 u8 i;
370                 u8 Channel, Receiver, Dir;
371                 u8 *p;
372
373                 for (Dir = 0; Dir < 2; Dir++) {
374                         if (Dir == 1) {
375                                 print_debug("TrainDQSRdWrPos: CH_D_DIR_B_DQS WR:\n");
376                         } else {
377                                 print_debug("TrainDQSRdWrPos: CH_D_DIR_B_DQS RD:\n");
378                         }
379                         for (Channel = 0; Channel < 2; Channel++) {
380                                 print_debug("Channel:"); print_debug_hex8(Channel); print_debug("\n");
381                                 for (Receiver = cs_start; Receiver < (cs_start + 2); Receiver += 2) {
382                                         print_debug("\t\tReceiver:"); print_debug_hex8(Receiver);
383                                         p = pDCTstat->CH_D_DIR_B_DQS[Channel][Receiver >> 1][Dir];
384                                         print_debug(": ");
385                                         for (i=0;i<8; i++) {
386                                                 val  = p[i];
387                                                 print_debug_hex8(val);
388                                                 print_debug(" ");
389                                         }
390                                         print_debug("\n");
391                                 }
392                         }
393                 }
394
395         }
396 #endif
397         if (_DisableDramECC) {
398                 mct_EnableDimmEccEn_D(pMCTstat, pDCTstat, _DisableDramECC);
399         }
400         if (!_Wrap32Dis) {
401                 addr = HWCR;
402                 _RDMSR(addr, &lo, &hi);
403                 lo &= ~(1<<17);         /* restore HWCR.wrap32dis */
404                 _WRMSR(addr, lo, hi);
405         }
406         if (!_SSE2){
407                 cr4 = read_cr4();
408                 cr4 &= ~(1<<9);         /* restore cr4.OSFXSR */
409                 write_cr4(cr4);
410         }
411
412         printk(BIOS_DEBUG, "TrainDQSRdWrPos: Status %x\n", pDCTstat->Status);
413         printk(BIOS_DEBUG, "TrainDQSRdWrPos: TrainErrors %x\n", pDCTstat->TrainErrors);
414         printk(BIOS_DEBUG, "TrainDQSRdWrPos: ErrStatus %x\n", pDCTstat->ErrStatus);
415         printk(BIOS_DEBUG, "TrainDQSRdWrPos: ErrCode %x\n", pDCTstat->ErrCode);
416         printk(BIOS_DEBUG, "TrainDQSRdWrPos: Done\n\n");
417 }
418
419 static void SetupDqsPattern_D(struct MCTStatStruc *pMCTstat,
420                                 struct DCTStatStruc *pDCTstat, u32 *buffer)
421 {
422         /* 1. Set the Pattern type (0 or 1) in DCTStatstruc.Pattern
423          * 2. Copy the pattern from ROM to Cache, aligning on 16 byte boundary
424          * 3. Set the ptr to Cacheable copy in DCTStatstruc.PtrPatternBufA
425          */
426
427         u32 *buf;
428         u16 i;
429
430         buf = (u32 *)(((u32)buffer + 0x10) & (0xfffffff0));
431         if (pDCTstat->Status & (1<<SB_128bitmode)) {
432                 pDCTstat->Pattern = 1;  /* 18 cache lines, alternating qwords */
433                 for (i=0; i<16*18; i++)
434                         buf[i] = TestPatternJD1b_D[i];
435         } else {
436                 pDCTstat->Pattern = 0;  /* 9 cache lines, sequential qwords */
437                 for (i=0; i<16*9; i++)
438                         buf[i] = TestPatternJD1a_D[i];
439         }
440         pDCTstat->PtrPatternBufA = (u32)buf;
441 }
442
443 static void TrainDQSPos_D(struct MCTStatStruc *pMCTstat,
444                                 struct DCTStatStruc *pDCTstat,
445                                 u8 cs_start)
446 {
447         u32 Errors;
448         u8 ChipSel, DQSDelay;
449         u8 RnkDlySeqPassMin=0, RnkDlySeqPassMax=0xFF, RnkDlyFilterMin=0, RnkDlyFilterMax=0xFF;
450         u8 RnkDlySeqPassMinTot=0, RnkDlySeqPassMaxTot=0xFF, RnkDlyFilterMinTot=0, RnkDlyFilterMaxTot=0xFF;
451         u8 LastTest ,LastTestTot;
452         u32 TestAddr;
453         u8 ByteLane;
454         u8 MutualCSPassW[128];
455         u8 BanksPresent;
456         u8 dqsDelay_end;
457         u8 tmp, valid, tmp1;
458         u16 word;
459
460         /* MutualCSPassW: each byte represents a bitmap of pass/fail per
461          * ByteLane.  The indext within MutualCSPassW is the delay value
462          * given the results.
463          */
464         print_debug_dqs("\t\t\tTrainDQSPos begin ", 0, 3);
465
466         Errors = 0;
467         BanksPresent = 0;
468
469         dqsDelay_end = 32;
470         /* Bitmapped status per delay setting, 0xff=All positions
471          * passing (1= PASS). Set the entire array.
472          */
473         for (DQSDelay=0; DQSDelay<128; DQSDelay++) {
474                 MutualCSPassW[DQSDelay] = 0xFF;
475         }
476
477         for (ChipSel = cs_start; ChipSel < (cs_start + 2); ChipSel++) { /* logical register chipselects 0..7 */
478                 print_debug_dqs("\t\t\t\tTrainDQSPos: 11 ChipSel ", ChipSel, 4);
479
480                 if (!mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, pDCTstat->Channel, ChipSel)) {
481                         print_debug_dqs("\t\t\t\tmct_RcvrRankEnabled_D CS not enabled ", ChipSel, 4);
482                         continue;
483                 }
484
485                 BanksPresent = 1;       /* flag for atleast one bank is present */
486                 TestAddr = mct_GetMCTSysAddr_D(pMCTstat, pDCTstat, pDCTstat->Channel, ChipSel, &valid);
487                 if (!valid) {
488                         print_debug_dqs("\t\t\t\tAddress not supported on current CS ", TestAddr, 4);
489                         continue;
490                 }
491
492                 print_debug_dqs("\t\t\t\tTrainDQSPos: 12 TestAddr ", TestAddr, 4);
493                 SetUpperFSbase(TestAddr);       /* fs:eax=far ptr to target */
494
495                 if (pDCTstat->Direction==DQS_READDIR) {
496                         print_debug_dqs("\t\t\t\tTrainDQSPos: 13 for read ", 0, 4);
497                         WriteDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr<<8);
498                 }
499
500                 for (DQSDelay = 0; DQSDelay < dqsDelay_end; DQSDelay++) {
501                         print_debug_dqs("\t\t\t\t\tTrainDQSPos: 141 DQSDelay ", DQSDelay, 5);
502
503                         tmp = 0xFF;
504                         tmp1 = DQSDelay;
505                         if (pDCTstat->Direction == DQS_READDIR) {
506                                 tmp &= MutualCSPassW[DQSDelay];
507                                 tmp1 += dqsDelay_end;
508                         }
509                         tmp &= MutualCSPassW[tmp1];
510
511                         if (tmp == 0) {
512                                 continue;/* skip current delay value if other chipselects have failed all 8 bytelanes */
513                         }
514
515                         pDCTstat->DQSDelay = DQSDelay;
516                         mct_SetDQSDelayAllCSR_D(pMCTstat, pDCTstat, cs_start);
517                         print_debug_dqs("\t\t\t\t\tTrainDQSPos: 142 MutualCSPassW ", MutualCSPassW[DQSDelay], 5);
518
519                         if (pDCTstat->Direction == DQS_WRITEDIR) {
520                                 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 143 for write", 0, 5);
521                                 WriteDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr<<8);
522                         }
523
524                         print_debug_dqs("\t\t\t\t\tTrainDQSPos: 144 Pattern ", pDCTstat->Pattern, 5);
525                         ReadDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr<<8);
526 /* print_debug_dqs("\t\t\t\t\tTrainDQSPos: 145 MutualCSPassW ", MutualCSPassW[DQSDelay], 5); */
527                         word = CompareDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr << 8); /* 0=fail, 1=pass */
528                         print_debug_dqs("\t\t\t\t\tTrainDQSPos: 144 compare 1 ", word, 3);
529
530                         print_debug_dqs("\t\t\t\t\tTrainDQSPos: 144 DqsRdWrPos_Saved ", pDCTstat->DqsRdWrPos_Saved, 3);
531                         word &= ~(pDCTstat->DqsRdWrPos_Saved); /* mask out bytelanes that already passed */
532                         word &= ~(pDCTstat->DqsRdWrPos_Saved << 8);
533                         print_debug_dqs("\t\t\t\t\tTrainDQSPos: 144 compare 2 ", word, 3);
534
535                         tmp = DQSDelay;
536                         if (pDCTstat->Direction == DQS_READDIR) {
537                                 MutualCSPassW[tmp] &= word >> 8;
538                                 tmp += dqsDelay_end;
539                         }
540                         MutualCSPassW[tmp] &= word & 0xFF;
541
542                         print_debug_dqs("\t\t\t\t\tTrainDQSPos: 146 \tMutualCSPassW ", MutualCSPassW[DQSDelay], 5);
543
544                         SetTargetWTIO_D(TestAddr);
545                         FlushDQSTestPattern_D(pDCTstat, TestAddr<<8);
546                         ResetTargetWTIO_D();
547                 }
548
549         }
550
551         if (pDCTstat->Direction == DQS_READDIR) {
552                 dqsDelay_end <<= 1;
553         }
554
555         if (BanksPresent) {
556                 u8 mask_pass = 0;
557                 for (ByteLane = 0; ByteLane < 8; ByteLane++) {
558                         print_debug_dqs("\t\t\t\tTrainDQSPos: 31 ByteLane ",ByteLane, 4);
559                         if (!(pDCTstat->DqsRdWrPos_Saved &(1 << ByteLane))) {
560                                 pDCTstat->ByteLane = ByteLane;
561                                 LastTest = DQS_FAIL;            /* Analyze the results */
562                                 LastTestTot = DQS_FAIL;
563                                 /* RnkDlySeqPassMin = 0; */
564                                 /* RnkDlySeqPassMax = 0; */
565                                 RnkDlyFilterMax = 0;
566                                 RnkDlyFilterMin = 0;
567                                 RnkDlyFilterMaxTot = 0;
568                                 RnkDlyFilterMinTot = 0;
569                                 for (DQSDelay = 0; DQSDelay < dqsDelay_end; DQSDelay++) {
570                                         if (MutualCSPassW[DQSDelay] & (1 << ByteLane)) {
571                                                 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 321 DQSDelay ", DQSDelay, 5);
572                                                 print_debug_dqs("\t\t\t\t\tTrainDQSPos: 322 MutualCSPassW ", MutualCSPassW[DQSDelay], 5);
573                                                 if (pDCTstat->Direction == DQS_READDIR)
574                                                         tmp = 0x20;
575                                                 else
576                                                         tmp = 0;
577                                                 if (DQSDelay >= tmp) {
578                                                         RnkDlySeqPassMax = DQSDelay;
579                                                         if (LastTest == DQS_FAIL) {
580                                                                 RnkDlySeqPassMin = DQSDelay; /* start sequential run */
581                                                         }
582                                                         if ((RnkDlySeqPassMax - RnkDlySeqPassMin)>(RnkDlyFilterMax-RnkDlyFilterMin)){
583                                                                 RnkDlyFilterMin = RnkDlySeqPassMin;
584                                                                 RnkDlyFilterMax = RnkDlySeqPassMax;
585                                                         }
586                                                         LastTest = DQS_PASS;
587                                                 }
588
589                                                 if (pDCTstat->Direction == DQS_READDIR) {
590                                                         RnkDlySeqPassMaxTot = DQSDelay;
591                                                         if (LastTestTot == DQS_FAIL)
592                                                                 RnkDlySeqPassMinTot = DQSDelay;
593                                                         if ((RnkDlySeqPassMaxTot - RnkDlySeqPassMinTot)>(RnkDlyFilterMaxTot-RnkDlyFilterMinTot)){
594                                                                 RnkDlyFilterMinTot = RnkDlySeqPassMinTot;
595                                                                 RnkDlyFilterMaxTot = RnkDlySeqPassMaxTot;
596                                                         }
597                                                         LastTestTot = DQS_PASS;
598                                                 }
599                                         } else {
600                                                 LastTest = DQS_FAIL;
601                                                 LastTestTot = DQS_FAIL;
602                                         }
603                                 }
604                                 print_debug_dqs("\t\t\t\tTrainDQSPos: 33 RnkDlySeqPassMax ", RnkDlySeqPassMax, 4);
605                                 if (RnkDlySeqPassMax == 0) {
606                                         Errors |= 1<<SB_NODQSPOS; /* no passing window */
607                                 } else {
608                                         print_debug_dqs_pair("\t\t\t\tTrainDQSPos: 34 RnkDlyFilter: ", RnkDlyFilterMin, " ",  RnkDlyFilterMax, 4);
609                                         if (((RnkDlyFilterMax - RnkDlyFilterMin) < MIN_DQS_WNDW)){
610                                                 Errors |= 1 << SB_SMALLDQS;
611                                         } else {
612                                                 u8 middle_dqs;
613                                                 /* mctEngDQSwindow_Save_D Not required for arrays */
614                                                 if (pDCTstat->Direction == DQS_READDIR)
615                                                         middle_dqs = MiddleDQS_D(RnkDlyFilterMinTot, RnkDlyFilterMaxTot);
616                                                 else
617                                                         middle_dqs = MiddleDQS_D(RnkDlyFilterMin, RnkDlyFilterMax);
618                                                 pDCTstat->DQSDelay = middle_dqs;
619                                                 mct_SetDQSDelayCSR_D(pMCTstat, pDCTstat, cs_start);  /* load the register with the value */
620                                                 if (pDCTstat->Direction == DQS_READDIR)
621                                                         StoreWrRdDQSDatStrucVal_D(pMCTstat, pDCTstat, cs_start, RnkDlyFilterMinTot, RnkDlyFilterMaxTot); /* store the value into the data structure */
622                                                 else
623                                                         StoreWrRdDQSDatStrucVal_D(pMCTstat, pDCTstat, cs_start, RnkDlyFilterMin, RnkDlyFilterMax); /* store the value into the data structure */
624                                                 print_debug_dqs("\t\t\t\tTrainDQSPos: 42 middle_dqs : ",middle_dqs, 4);
625                                                 pDCTstat->DqsRdWrPos_Saved |= 1 << ByteLane;
626                                         }
627                                 }
628                         } /* if (pDCTstat->DqsRdWrPos_Saved &(1 << ByteLane)) */
629                 }
630                 print_debug_dqs("\t\t\t\tTrainDQSPos: 41 mask_pass ",mask_pass, 3);
631         }
632 /* skipLocMiddle: */
633         pDCTstat->TrainErrors = Errors;
634
635         print_debug_dqs("\t\t\tTrainDQSPos: Errors ", Errors, 3);
636 }
637
638 static void mctEngDQSwindow_Save_D(struct MCTStatStruc *pMCTstat,
639                                         struct DCTStatStruc *pDCTstat, u8 ChipSel,
640                                         u8 RnkDlyFilterMin, u8 RnkDlyFilterMax)
641 {
642         pDCTstat->CH_D_DIR_MaxMin_B_Dly[pDCTstat->Channel]
643                 [pDCTstat->Direction]
644                 [0]
645                 [pDCTstat->ByteLane] = RnkDlyFilterMin;
646         pDCTstat->CH_D_DIR_MaxMin_B_Dly[pDCTstat->Channel]
647                 [pDCTstat->Direction]
648                 [1]
649                 [pDCTstat->ByteLane] = RnkDlyFilterMax;
650 }
651
652 static void StoreDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat,
653                                         struct DCTStatStruc *pDCTstat, u8 ChipSel)
654 {
655         /* Store the DQSDelay value, found during a training sweep, into the DCT
656          * status structure for this node
657          */
658
659         /* When 400, 533, 667, it will support dimm0/1/2/3,
660          * and set conf for dimm0, hw will copy to dimm1/2/3
661          * set for dimm1, hw will copy to dimm3
662          * Rev A/B only support DIMM0/1 when 800Mhz and above + 0x100 to next dimm
663          * Rev C support DIMM0/1/2/3 when 800Mhz and above  + 0x100 to next dimm
664          */
665
666         /* FindDQSDatDimmVal_D is not required since we use an array */
667         u8 dn = 0;
668
669         dn = ChipSel>>1; /* if odd or even logical DIMM */
670
671         pDCTstat->CH_D_DIR_B_DQS[pDCTstat->Channel][dn][pDCTstat->Direction][pDCTstat->ByteLane] =
672                                         pDCTstat->DQSDelay;
673 }
674
675 static void StoreWrRdDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat,
676                                         struct DCTStatStruc *pDCTstat, u8 ChipSel,
677                                         u8 RnkDlyFilterMin, u8 RnkDlyFilterMax)
678 {
679         u8 dn;
680
681         if (pDCTstat->Direction == DQS_WRITEDIR) {
682                 dn = ChipSel >> 1;
683                 RnkDlyFilterMin += pDCTstat->CH_D_B_TxDqs[pDCTstat->Channel][dn][pDCTstat->ByteLane];
684                 RnkDlyFilterMax += pDCTstat->CH_D_B_TxDqs[pDCTstat->Channel][dn][pDCTstat->ByteLane];
685                 pDCTstat->DQSDelay += pDCTstat->CH_D_B_TxDqs[pDCTstat->Channel][dn][pDCTstat->ByteLane];
686         } else {
687                 RnkDlyFilterMin <<= 1;
688                 RnkDlyFilterMax <<= 1;
689                 pDCTstat->DQSDelay <<= 1;
690         }
691         mctEngDQSwindow_Save_D(pMCTstat, pDCTstat, ChipSel, RnkDlyFilterMin, RnkDlyFilterMax);
692         StoreDQSDatStrucVal_D(pMCTstat, pDCTstat, ChipSel);
693 }
694
695 static void GetDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat,
696                                 struct DCTStatStruc *pDCTstat, u8 ChipSel)
697 {
698         u8 dn = 0;
699
700         /* When 400, 533, 667, it will support dimm0/1/2/3,
701          * and set conf for dimm0, hw will copy to dimm1/2/3
702          * set for dimm1, hw will copy to dimm3
703          * Rev A/B only support DIMM0/1 when 800Mhz and above + 0x100 to next dimm
704          * Rev C support DIMM0/1/2/3 when 800Mhz and above  + 0x100 to next dimm
705          */
706
707         /* FindDQSDatDimmVal_D is not required since we use an array */
708         dn = ChipSel >> 1; /*if odd or even logical DIMM */
709
710         pDCTstat->DQSDelay =
711                 pDCTstat->CH_D_DIR_B_DQS[pDCTstat->Channel][dn][pDCTstat->Direction][pDCTstat->ByteLane];
712 }
713
714 /* FindDQSDatDimmVal_D is not required since we use an array */
715
716 static u8 MiddleDQS_D(u8 min, u8 max)
717 {
718         u8 size;
719         size = max-min;
720         if (size % 2)
721                 size++;         /* round up if the size isn't even. */
722         return ( min + (size >> 1));
723 }
724
725 static void TrainReadDQS_D(struct MCTStatStruc *pMCTstat,
726                                 struct DCTStatStruc *pDCTstat,
727                                 u8 cs_start)
728 {
729         print_debug_dqs("\t\tTrainReadPos ", 0, 2);
730         pDCTstat->Direction = DQS_READDIR;
731         TrainDQSPos_D(pMCTstat, pDCTstat, cs_start);
732 }
733
734 static void TrainWriteDQS_D(struct MCTStatStruc *pMCTstat,
735                                 struct DCTStatStruc *pDCTstat,
736                                 u8 cs_start)
737 {
738         pDCTstat->Direction = DQS_WRITEDIR;
739         print_debug_dqs("\t\tTrainWritePos", 0, 2);
740         TrainDQSPos_D(pMCTstat, pDCTstat, cs_start);
741 }
742
743 static void proc_IOCLFLUSH_D(u32 addr_hi)
744 {
745         SetTargetWTIO_D(addr_hi);
746         proc_CLFLUSH(addr_hi);
747         ResetTargetWTIO_D();
748 }
749
750 static u8 ChipSelPresent_D(struct MCTStatStruc *pMCTstat,
751                                 struct DCTStatStruc *pDCTstat,
752                                 u8 Channel, u8 ChipSel)
753 {
754         u32 val;
755         u32 reg;
756         u32 dev = pDCTstat->dev_dct;
757         u32 reg_off;
758         u8 ret = 0;
759
760         if (!pDCTstat->GangedMode) {
761                 reg_off = 0x100 * Channel;
762         } else {
763                 reg_off = 0;
764         }
765
766         if (ChipSel < MAX_CS_SUPPORTED){
767                 reg = 0x40 + (ChipSel << 2) + reg_off;
768                 val = Get_NB32(dev, reg);
769                 if (val & ( 1 << 0))
770                         ret = 1;
771         }
772
773         return ret;
774 }
775
776 /* proc_CLFLUSH_D located in mct_gcc.h */
777
778 static void WriteDQSTestPattern_D(struct MCTStatStruc *pMCTstat,
779                                         struct DCTStatStruc *pDCTstat,
780                                         u32 TestAddr_lo)
781 {
782         /* Write a pattern of 72 bit times (per DQ), to test dram functionality.
783          * The pattern is a stress pattern which exercises both ISI and
784          * crosstalk.  The number of cache lines to fill is dependent on DCT
785          * width mode and burstlength.
786          * Mode BL  Lines Pattern no.
787          * ----+---+-------------------
788          * 64   4         9     0
789          * 64   8         9     0
790          * 64M  4         9     0
791          * 64M  8         9     0
792          * 128  4         18    1
793          * 128  8         N/A   -
794          */
795         if (pDCTstat->Pattern == 0)
796                 WriteL9TestPattern_D(pDCTstat, TestAddr_lo);
797         else
798                 WriteL18TestPattern_D(pDCTstat, TestAddr_lo);
799 }
800
801 static void WriteL18TestPattern_D(struct DCTStatStruc *pDCTstat,
802                                         u32 TestAddr_lo)
803 {
804         u8 *buf;
805
806         buf = (u8 *)pDCTstat->PtrPatternBufA;
807         WriteLNTestPattern(TestAddr_lo, buf, 18);
808
809 }
810
811 static void WriteL9TestPattern_D(struct DCTStatStruc *pDCTstat,
812                                         u32 TestAddr_lo)
813 {
814         u8 *buf;
815
816         buf = (u8 *)pDCTstat->PtrPatternBufA;
817         WriteLNTestPattern(TestAddr_lo, buf, 9);
818 }
819
820 static u16 CompareDQSTestPattern_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u32 addr_lo)
821 {
822         /* Compare a pattern of 72 bit times (per DQ), to test dram functionality.
823          * The pattern is a stress pattern which exercises both ISI and
824          * crosstalk.  The number of cache lines to fill is dependent on DCT
825          * width mode and burstlength.
826          * Mode BL  Lines Pattern no.
827          * ----+---+-------------------
828          * 64   4         9     0
829          * 64   8         9     0
830          * 64M  4         9     0
831          * 64M  8         9     0
832          * 128  4         18    1
833          * 128  8         N/A   -
834          */
835
836         u32 *test_buf;
837         u16 MEn1Results, bitmap;
838         u8 bytelane;
839         u8 i;
840         u32 value;
841         u8 j;
842         u32 value_test;
843         u32 value_r, value_r_test;
844         u8 pattern, channel, BeatCnt;
845         struct DCTStatStruc *ptrAddr;
846
847         ptrAddr = pDCTstat;
848         pattern = pDCTstat->Pattern;
849         channel = pDCTstat->Channel;
850         test_buf = (u32 *)pDCTstat->PtrPatternBufA;
851
852         if (pattern && channel) {
853                 addr_lo += 8; /* second channel */
854                 test_buf+= 2;
855         }
856
857         bytelane = 0;
858         bitmap = 0xFFFF;
859         MEn1Results = 0xFFFF;
860         BeatCnt = 0;
861         for (i=0; i < (9 * 64 / 4); i++) { /* /4 due to next loop */
862                 value = read32_fs(addr_lo);
863                 value_test = *test_buf;
864
865                 print_debug_dqs_pair("\t\t\t\t\t\ttest_buf = ", (u32)test_buf, " value = ", value_test, 7);
866                 print_debug_dqs_pair("\t\t\t\t\t\ttaddr_lo = ", addr_lo, " value = ", value, 7);
867
868                 if (pDCTstat->Direction == DQS_READDIR) {
869                         if (BeatCnt != 0) {
870                                 value_r = *test_buf;
871                                 if (pattern)
872                                         value_r_test = read32_fs(addr_lo - 16);
873                                 else
874                                         value_r_test = read32_fs(addr_lo - 8);
875                         }
876                         print_debug_dqs_pair("\t\t\t\t\t\t\ttest_buf = ", (u32)test_buf, " value_r_test = ", value_r, 7);
877                         print_debug_dqs_pair("\t\t\t\t\t\t\ttaddr_lo = ", addr_lo, " value_r = ", value_r_test, 7);
878                 }
879
880                 for (j = 0; j < (4 * 8); j += 8) {
881                         if (((value >> j) & 0xff) != ((value_test >> j) & 0xff)) {
882                                 bitmap &= ~(1 << bytelane);
883                         }
884
885                         if (pDCTstat->Direction == DQS_READDIR) {
886                                 if (BeatCnt != 0) {
887                                         if  (((value_r >> j) & 0xff) != ((value_r_test >> j) & 0xff)) {
888                                                 MEn1Results &= ~(1 << bytelane);
889                                         }
890                                 }
891                         }
892                         bytelane++;
893                         bytelane &= 0x7;
894                 }
895
896                 print_debug_dqs("\t\t\t\t\t\tbitmap = ", bitmap, 7);
897                 print_debug_dqs("\t\t\t\t\t\tMEn1Results = ", MEn1Results, 7);
898
899                 if (!bitmap)
900                         break;
901
902                 if (bytelane == 0){
903                         BeatCnt += 4;
904                         if (!(pDCTstat->Status & (1 <<SB_128bitmode))) {
905                                 if (BeatCnt == 8) BeatCnt = 0; /* 8 beat burst */
906                         } else {
907                                 if (BeatCnt == 4) BeatCnt = 0; /* 4 beat burst */
908                         }
909                         if (pattern == 1) { /* dual channel */
910                                 addr_lo += 8; /* skip over other channel's data */
911                                 test_buf += 2;
912                         }
913                 }
914                 addr_lo += 4;
915                 test_buf += 1;
916         }
917
918         if (pDCTstat->Direction == DQS_READDIR) {
919                 bitmap &= 0xFF;
920                 bitmap |= MEn1Results << 8;
921         }
922
923         print_debug_dqs("\t\t\t\t\t\tbitmap = ", bitmap, 6);
924
925         return bitmap;
926 }
927
928 static void FlushDQSTestPattern_D(struct DCTStatStruc *pDCTstat,
929                                         u32 addr_lo)
930 {
931         /* Flush functions in mct_gcc.h */
932         if (pDCTstat->Pattern == 0){
933                 FlushDQSTestPattern_L9(addr_lo);
934         } else {
935                 FlushDQSTestPattern_L18(addr_lo);
936         }
937 }
938
939 static void SetTargetWTIO_D(u32 TestAddr)
940 {
941         u32 lo, hi;
942         hi = TestAddr >> 24;
943         lo = TestAddr << 8;
944         _WRMSR(0xC0010016, lo, hi);             /* IORR0 Base */
945         hi = 0xFF;
946         lo = 0xFC000800;                        /* 64MB Mask */
947         _WRMSR(0xC0010017, lo, hi);             /* IORR0 Mask */
948 }
949
950 static void ResetTargetWTIO_D(void)
951 {
952         u32 lo, hi;
953
954         hi = 0;
955         lo = 0;
956         _WRMSR(0xc0010017, lo, hi); /* IORR0 Mask */
957 }
958
959 static void ReadDQSTestPattern_D(struct MCTStatStruc *pMCTstat,
960                                 struct DCTStatStruc *pDCTstat,
961                                 u32 TestAddr_lo)
962 {
963         /* Read a pattern of 72 bit times (per DQ), to test dram functionality.
964          * The pattern is a stress pattern which exercises both ISI and
965          * crosstalk.  The number of cache lines to fill is dependent on DCT
966          * width mode and burstlength.
967          * Mode BL  Lines Pattern no.
968          * ----+---+-------------------
969          * 64   4         9     0
970          * 64   8         9     0
971          * 64M  4         9     0
972          * 64M  8         9     0
973          * 128  4         18    1
974          * 128  8         N/A   -
975          */
976         if (pDCTstat->Pattern == 0)
977                 ReadL9TestPattern(TestAddr_lo);
978         else
979                 ReadL18TestPattern(TestAddr_lo);
980         _MFENCE;
981 }
982
983 u32 SetUpperFSbase(u32 addr_hi)
984 {
985         /* Set the upper 32-bits of the Base address, 4GB aligned) for the
986          * FS selector.
987          */
988         u32 lo, hi;
989         u32 addr;
990         lo = 0;
991         hi = addr_hi>>24;
992         addr = FS_Base;
993         _WRMSR(addr, lo, hi);
994         return addr_hi<<8;
995 }
996
997 void ResetDCTWrPtr_D(u32 dev, u32 index_reg, u32 index)
998 {
999         u32 val;
1000
1001         val = Get_NB32_index_wait(dev, index_reg, index);
1002         Set_NB32_index_wait(dev, index_reg, index, val);
1003 }
1004
1005 /* mctEngDQSwindow_Save_D not required with arrays */
1006
1007 void mct_TrainDQSPos_D(struct MCTStatStruc *pMCTstat,
1008                         struct DCTStatStruc *pDCTstatA)
1009 {
1010         u8 Node;
1011         u8 ChipSel;
1012         struct DCTStatStruc *pDCTstat;
1013
1014         for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
1015                 pDCTstat = pDCTstatA + Node;
1016                 if (pDCTstat->DCTSysLimit) {
1017                         for (ChipSel = 0; ChipSel < MAX_CS_SUPPORTED; ChipSel += 2) {
1018                                 TrainDQSRdWrPos_D(pMCTstat, pDCTstat, ChipSel);
1019                                 SetEccDQSRdWrPos_D(pMCTstat, pDCTstat, ChipSel);
1020                         }
1021                 }
1022         }
1023 }
1024
1025 /* mct_BeforeTrainDQSRdWrPos_D
1026  * Function is inline.
1027  */
1028 u8 mct_DisableDimmEccEn_D(struct MCTStatStruc *pMCTstat,
1029                                 struct DCTStatStruc *pDCTstat)
1030 {
1031         u8 _DisableDramECC = 0;
1032         u32 val;
1033         u32 reg;
1034         u32 dev;
1035
1036         /*Disable ECC correction of reads on the dram bus. */
1037
1038         dev = pDCTstat->dev_dct;
1039         reg = 0x90;
1040         val = Get_NB32(dev, reg);
1041         if (val & (1<<DimmEcEn)) {
1042                 _DisableDramECC |= 0x01;
1043                 val &= ~(1<<DimmEcEn);
1044                 Set_NB32(dev, reg, val);
1045         }
1046         if (!pDCTstat->GangedMode) {
1047                 reg = 0x190;
1048                 val = Get_NB32(dev, reg);
1049                 if (val & (1<<DimmEcEn)) {
1050                         _DisableDramECC |= 0x02;
1051                         val &= ~(1<<DimmEcEn);
1052                         Set_NB32(dev, reg, val);
1053                 }
1054         }
1055         return _DisableDramECC;
1056 }
1057
1058 void mct_EnableDimmEccEn_D(struct MCTStatStruc *pMCTstat,
1059                                 struct DCTStatStruc *pDCTstat, u8 _DisableDramECC)
1060 {
1061         u32 val;
1062         u32 reg;
1063         u32 dev;
1064
1065         /* Enable ECC correction if it was previously disabled */
1066
1067         dev = pDCTstat->dev_dct;
1068
1069         if ((_DisableDramECC & 0x01) == 0x01) {
1070                 reg = 0x90;
1071                 val = Get_NB32(dev, reg);
1072                 val |= (1<<DimmEcEn);
1073                 Set_NB32(dev, reg, val);
1074         }
1075         if ((_DisableDramECC & 0x02) == 0x02) {
1076                 reg = 0x190;
1077                 val = Get_NB32(dev, reg);
1078                 val |= (1<<DimmEcEn);
1079                 Set_NB32(dev, reg, val);
1080         }
1081 }
1082
1083 static void mct_SetDQSDelayCSR_D(struct MCTStatStruc *pMCTstat,
1084                                         struct DCTStatStruc *pDCTstat, u8 ChipSel)
1085 {
1086         u8 ByteLane;
1087         u32 val;
1088         u32 index_reg = 0x98 + 0x100 * pDCTstat->Channel;
1089         u8 shift;
1090         u32 dqs_delay = (u32)pDCTstat->DQSDelay;
1091         u32 dev = pDCTstat->dev_dct;
1092         u32 index;
1093
1094         ByteLane = pDCTstat->ByteLane;
1095
1096         if (!(pDCTstat->DqsRdWrPos_Saved & (1 << ByteLane))) {
1097                 /* Channel is offset */
1098                 if (ByteLane < 4) {
1099                         index = 1;
1100                 } else if (ByteLane <8) {
1101                         index = 2;
1102                 } else {
1103                         index = 3;
1104                 }
1105
1106                 if (pDCTstat->Direction == DQS_READDIR) {
1107                         index += 4;
1108                 }
1109
1110                 /* get the proper register index */
1111                 shift = ByteLane%4;
1112                 shift <<= 3; /* get bit position of bytelane, 8 bit */
1113
1114                 index += (ChipSel>>1) << 8;
1115
1116                 val = Get_NB32_index_wait(dev, index_reg, index);
1117                 if (ByteLane < 8) {
1118                         if (pDCTstat->Direction == DQS_WRITEDIR) {
1119                                 dqs_delay += pDCTstat->CH_D_B_TxDqs[pDCTstat->Channel][ChipSel>>1][ByteLane];
1120                         } else {
1121                                 dqs_delay <<= 1;
1122                         }
1123                 }
1124                 val &= ~(0x7f << shift);
1125                 val |= (dqs_delay << shift);
1126                 Set_NB32_index_wait(dev, index_reg, index, val);
1127         }
1128 }
1129
1130 static void mct_SetDQSDelayAllCSR_D(struct MCTStatStruc *pMCTstat,
1131                                         struct DCTStatStruc *pDCTstat,
1132                                         u8 cs_start)
1133 {
1134         u8 ByteLane;
1135         u8 ChipSel = cs_start;
1136
1137         for (ChipSel = cs_start; ChipSel < (cs_start + 2); ChipSel++) {
1138                 if ( mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, pDCTstat->Channel, ChipSel)) {
1139                         for (ByteLane = 0; ByteLane < 8; ByteLane++) {
1140                                 pDCTstat->ByteLane = ByteLane;
1141                                 mct_SetDQSDelayCSR_D(pMCTstat, pDCTstat, ChipSel);
1142                         }
1143                 }
1144         }
1145 }
1146
1147 u8 mct_RcvrRankEnabled_D(struct MCTStatStruc *pMCTstat,
1148                                 struct DCTStatStruc *pDCTstat,
1149                                 u8 Channel, u8 ChipSel)
1150 {
1151         u8 ret;
1152
1153         ret = ChipSelPresent_D(pMCTstat, pDCTstat, Channel, ChipSel);
1154         return ret;
1155 }
1156
1157 u32 mct_GetRcvrSysAddr_D(struct MCTStatStruc *pMCTstat,
1158                                 struct DCTStatStruc *pDCTstat,
1159                                 u8 channel, u8 receiver, u8 *valid)
1160 {
1161         return mct_GetMCTSysAddr_D(pMCTstat, pDCTstat, channel, receiver, valid);
1162 }
1163
1164 u32 mct_GetMCTSysAddr_D(struct MCTStatStruc *pMCTstat,
1165                                 struct DCTStatStruc *pDCTstat,
1166                                 u8 Channel, u8 receiver, u8 *valid)
1167 {
1168         u32 val;
1169         u32 reg_off = 0;
1170         u32 reg;
1171         u32 dword;
1172         u32 dev = pDCTstat->dev_dct;
1173
1174         *valid = 0;
1175
1176
1177         if (!pDCTstat->GangedMode)  {   /* FIXME: not used. */
1178                 reg_off = 0x100 * Channel;
1179         }
1180
1181         /* get the local base addr of the chipselect */
1182         reg = 0x40 + (receiver << 2);
1183         val = Get_NB32(dev, reg);
1184
1185         val &= ~0x0F;
1186
1187         /* unganged mode DCT0+DCT1, sys addr of DCT1=node
1188          * base+DctSelBaseAddr+local ca base*/
1189         if ((Channel) && (pDCTstat->GangedMode == 0) && ( pDCTstat->DIMMValidDCT[0] > 0)) {
1190                 reg = 0x110;
1191                 dword = Get_NB32(dev, reg);
1192                 dword &= 0xfffff800;
1193                 dword <<= 8;    /* scale [47:27] of F2x110[31:11] to [39:8]*/
1194                 val += dword;
1195
1196                 /* if DCTSelBaseAddr < Hole, and eax > HoleBase, then add Hole size to test address */
1197                 if ((val >= pDCTstat->DCTHoleBase) && (pDCTstat->DCTHoleBase > dword)) {
1198                         dword = (~(pDCTstat->DCTHoleBase >> (24 - 8)) + 1) & 0xFF;
1199                         dword <<= (24 - 8);
1200                         val += dword;
1201                 }
1202         } else {
1203                 /* sys addr=node base+local cs base */
1204                 val += pDCTstat->DCTSysBase;
1205
1206                 /* New stuff */
1207                 if (pDCTstat->DCTHoleBase && (val >= pDCTstat->DCTHoleBase)) {
1208                         val -= pDCTstat->DCTSysBase;
1209                         dword = Get_NB32(pDCTstat->dev_map, 0xF0); /* get Hole Offset */
1210                         val += (dword & 0x0000ff00) << (24-8-8);
1211                 }
1212         }
1213
1214         /* New stuff */
1215         val += ((1 << 21) >> 8);        /* Add 2MB offset to avoid compat area */
1216         if (val >= MCT_TRNG_KEEPOUT_START) {
1217                 while(val < MCT_TRNG_KEEPOUT_END)
1218                         val += (1 << (15-8));   /* add 32K */
1219         }
1220
1221         /* Add a node seed */
1222         val += (((1 * pDCTstat->Node_ID) << 20) >> 8);  /* Add 1MB per node to avoid aliases */
1223
1224         /* HW remap disabled? */
1225         if (!(pDCTstat->Status & (1 << SB_HWHole))) {
1226                 if (!(pDCTstat->Status & (1 << SB_SWNodeHole))) {
1227                         /* SW memhole disabled */
1228                         u32 lo, hi;
1229                         _RDMSR(TOP_MEM, &lo, &hi);
1230                         lo >>= 8;
1231                         if ((val >= lo) && (val < _4GB_RJ8)) {
1232                                 val = 0;
1233                                 *valid = 0;
1234                                 goto exitGetAddr;
1235                         } else {
1236                                 *valid = 1;
1237                                 goto exitGetAddrWNoError;
1238                         }
1239                 } else {
1240                         *valid = 1;
1241                         goto exitGetAddrWNoError;
1242                 }
1243         } else {
1244                 *valid = 1;
1245                 goto exitGetAddrWNoError;
1246         }
1247
1248 exitGetAddrWNoError:
1249
1250         /* Skip if Address is in UMA region */
1251         dword = pMCTstat->Sub4GCacheTop;
1252         dword >>= 8;
1253         if (dword != 0) {
1254                 if ((val >= dword) && (val < _4GB_RJ8)) {
1255                         val = 0;
1256                         *valid = 0;
1257                 } else {
1258                         *valid = 1;
1259                 }
1260         }
1261         print_debug_dqs("mct_GetMCTSysAddr_D: receiver ", receiver, 2);
1262         print_debug_dqs("mct_GetMCTSysAddr_D: Channel ", Channel, 2);
1263         print_debug_dqs("mct_GetMCTSysAddr_D: base_addr ", val, 2);
1264         print_debug_dqs("mct_GetMCTSysAddr_D: valid ", *valid, 2);
1265         print_debug_dqs("mct_GetMCTSysAddr_D: status ", pDCTstat->Status, 2);
1266         print_debug_dqs("mct_GetMCTSysAddr_D: HoleBase ", pDCTstat->DCTHoleBase, 2);
1267         print_debug_dqs("mct_GetMCTSysAddr_D: Cachetop ", pMCTstat->Sub4GCacheTop, 2);
1268
1269 exitGetAddr:
1270         return val;
1271 }
1272
1273 static void mct_Write1LTestPattern_D(struct MCTStatStruc *pMCTstat,
1274                                 struct DCTStatStruc *pDCTstat,
1275                                 u32 TestAddr, u8 pattern)
1276 {
1277
1278         u8 *buf;
1279
1280         /* Issue the stream of writes. When F2x11C[MctWrLimit] is reached
1281          * (or when F2x11C[FlushWr] is set again), all the writes are written
1282          * to DRAM.
1283          */
1284
1285         SetUpperFSbase(TestAddr);
1286
1287         if (pattern)
1288                 buf = (u8 *)pDCTstat->PtrPatternBufB;
1289         else
1290                 buf = (u8 *)pDCTstat->PtrPatternBufA;
1291
1292         WriteLNTestPattern(TestAddr << 8, buf, 1);
1293 }
1294
1295 void mct_Read1LTestPattern_D(struct MCTStatStruc *pMCTstat,
1296                                 struct DCTStatStruc *pDCTstat, u32 addr)
1297 {
1298         u32 value;
1299
1300         /* BIOS issues the remaining (Ntrain - 2) reads after checking that
1301          * F2x11C[PrefDramTrainMode] is cleared. These reads must be to
1302          * consecutive cache lines (i.e., 64 bytes apart) and must not cross
1303          * a naturally aligned 4KB boundary. These reads hit the prefetches and
1304          * read the data from the prefetch buffer.
1305          */
1306
1307         /* get data from DIMM */
1308         SetUpperFSbase(addr);
1309
1310         /* 1st move causes read fill (to exclusive or shared)*/
1311         value = read32_fs(addr<<8);
1312 }