1 /* coherent hypertransport initialization for AMD64
3 * written by Stefan Reinauer <stepan@openbios.org>
4 * (c) 2003-2004 by SuSE Linux AG
6 * This code is licensed under GPL.
10 * This algorithm assumes a grid configuration as follows:
13 * org. : 1x1 2x1 2x2 2x3 2x4
17 #include <device/pci_def.h>
18 #include <device/pci_ids.h>
19 #include <device/hypertransport_def.h>
20 #include "arch/romcc_io.h"
24 * Until we have a completely dynamic setup we want
25 * to be able to map different cpu graphs.
33 * set some default values. These are used if they are not
34 * differently defined in the motherboard's auto.c file.
35 * See src/mainboard/amd/quartet/auto.c for an example.
38 #ifndef CONNECTION_0_1
39 #define CONNECTION_0_1 ACROSS
42 #ifndef CONNECTION_0_2
43 #define CONNECTION_0_2 UP
46 #ifndef CONNECTION_1_3
47 #define CONNECTION_1_3 UP
50 /* when generating a temporary row configuration we
51 * don't want broadcast to be enabled for that node.
54 #define generate_temp_row(x...) ((generate_row(x)&(~0x0f0000))|0x010000)
55 #define clear_temp_row(x) fill_row(x,7,DEFAULT)
56 #define enable_bsp_routing() enable_routing(0)
58 #define NODE_HT(x) PCI_DEV(0,24+x,0)
59 #define NODE_MP(x) PCI_DEV(0,24+x,1)
60 #define NODE_MC(x) PCI_DEV(0,24+x,3)
62 #define DEFAULT 0x00010101 /* default row entry */
71 static void disable_probes(void)
73 /* disable read/write/fill probes for uniprocessor setup
74 * they don't make sense if only one cpu is available
77 /* Hypetransport Transaction Control Register
79 * [ 0: 0] Disable read byte probe
81 * 1 = Probes not issued
82 * [ 1: 1] Disable Read Doubleword probe
84 * 1 = Probes not issued
85 * [ 2: 2] Disable write byte probes
87 * 1 = Probes not issued
88 * [ 3: 3] Disable Write Doubleword Probes
90 * 1 = Probes not issued.
91 * [10:10] Disable Fill Probe
92 * 0 = Probes issued for cache fills
93 * 1 = Probes not issued for cache fills.
98 print_spew("Disabling read/write/fill probes for UP... ");
100 val=pci_read_config32(NODE_HT(0), 0x68);
101 val |= (1<<10)|(1<<9)|(1<<8)|(1<<4)|(1<<3)|(1<<2)|(1<<1)|(1 << 0);
102 pci_write_config32(NODE_HT(0), 0x68, val);
104 print_spew("done.\r\n");
108 static void enable_routing(u8 node)
112 /* HT Initialization Control Register
114 * [ 0: 0] Routing Table Disable
115 * 0 = Packets are routed according to routing tables
116 * 1 = Packets are routed according to the default link field
117 * [ 1: 1] Request Disable (BSP should clear this)
118 * 0 = Request packets may be generated
119 * 1 = Request packets may not be generated.
120 * [ 3: 2] Default Link (Read-only)
124 * 11 = CPU on same node
126 * - Scratch bit cleared by a cold reset
127 * [ 5: 5] BIOS Reset Detect
128 * - Scratch bit cleared by a cold reset
129 * [ 6: 6] INIT Detect
130 * - Scratch bit cleared by a warm or cold reset not by an INIT
134 /* Enable routing table */
135 print_spew("Enabling routing table for node ");
136 print_spew_hex8(node);
138 val=pci_read_config32(NODE_HT(node), 0x6c);
139 val &= ~((1<<1)|(1<<0));
140 pci_write_config32(NODE_HT(node), 0x6c, val);
142 print_spew(" done.\r\n");
145 #if CONFIG_MAX_CPUS > 1
147 static void rename_temp_node(u8 node)
151 print_spew("Renaming current temporary node to ");
152 print_spew_hex8(node);
154 val=pci_read_config32(NODE_HT(7), 0x60);
155 val &= (~7); /* clear low bits. */
156 val |= node; /* new node */
157 pci_write_config32(NODE_HT(7), 0x60, val);
159 print_spew(" done.\r\n");
162 static bool check_connection(u8 src, u8 dest, u8 link)
164 /* See if we have a valid connection to dest */
167 /* Detect if the coherent HT link is connected. */
168 val = pci_read_config32(NODE_HT(src), 0x98+link);
169 if ( (val&0x17) != 0x03)
172 /* Verify that the coherent hypertransport link is
173 * established and actually working by reading the
174 * remode node's vendor/device id
176 val = pci_read_config32(NODE_HT(dest),0);
177 if(val != 0x11001022)
183 static unsigned read_freq_cap(device_t dev, unsigned pos)
185 /* Handle bugs in valid hypertransport frequency reporting */
189 freq_cap = pci_read_config16(dev, pos);
190 freq_cap &= ~(1 << HT_FREQ_VENDOR); /* Ignore Vendor HT frequencies */
192 id = pci_read_config32(dev, 0);
194 /* AMD 8131 Errata 48 */
195 if (id == (PCI_VENDOR_ID_AMD | (PCI_DEVICE_ID_AMD_8131_PCIX << 16))) {
196 freq_cap &= ~(1 << HT_FREQ_800Mhz);
198 /* AMD 8151 Errata 23 */
199 if (id == (PCI_VENDOR_ID_AMD | (PCI_DEVICE_ID_AMD_8151_SYSCTRL << 16))) {
200 freq_cap &= ~(1 << HT_FREQ_800Mhz);
202 /* AMD K8 Unsupported 1Ghz? */
203 if (id == (PCI_VENDOR_ID_AMD | (0x1100 << 16))) {
204 freq_cap &= ~(1 << HT_FREQ_1000Mhz);
209 static int optimize_connection(device_t node1, uint8_t link1, device_t node2, uint8_t link2)
211 static const uint8_t link_width_to_pow2[]= { 3, 4, 0, 5, 1, 2, 0, 0 };
212 static const uint8_t pow2_to_link_width[] = { 0x7, 4, 5, 0, 1, 3 };
213 uint16_t freq_cap1, freq_cap2, freq_cap, freq_mask;
214 uint8_t width_cap1, width_cap2, width_cap, width, old_width, ln_width1, ln_width2;
215 uint8_t freq, old_freq;
217 /* Set link width and frequency */
219 /* Initially assume everything is already optimized and I don't need a reset */
222 /* Get the frequency capabilities */
223 freq_cap1 = read_freq_cap(node1, link1 + PCI_HT_CAP_HOST_FREQ_CAP);
224 freq_cap2 = read_freq_cap(node2, link2 + PCI_HT_CAP_HOST_FREQ_CAP);
226 /* Calculate the highest possible frequency */
227 freq = log2(freq_cap1 & freq_cap2);
229 /* See if I am changing the link freqency */
230 old_freq = pci_read_config8(node1, link1 + PCI_HT_CAP_HOST_FREQ);
231 needs_reset |= old_freq != freq;
232 old_freq = pci_read_config8(node2, link2 + PCI_HT_CAP_HOST_FREQ);
233 needs_reset |= old_freq != freq;
235 /* Set the Calulcated link frequency */
236 pci_write_config8(node1, link1 + PCI_HT_CAP_HOST_FREQ, freq);
237 pci_write_config8(node2, link2 + PCI_HT_CAP_HOST_FREQ, freq);
239 /* Get the width capabilities */
240 width_cap1 = pci_read_config8(node1, link1 + PCI_HT_CAP_HOST_WIDTH);
241 width_cap2 = pci_read_config8(node2, link2 + PCI_HT_CAP_HOST_WIDTH);
243 /* Calculate node1's input width */
244 ln_width1 = link_width_to_pow2[width_cap1 & 7];
245 ln_width2 = link_width_to_pow2[(width_cap2 >> 4) & 7];
246 if (ln_width1 > ln_width2) {
247 ln_width1 = ln_width2;
249 width = pow2_to_link_width[ln_width1];
250 /* Calculate node1's output width */
251 ln_width1 = link_width_to_pow2[(width_cap1 >> 4) & 7];
252 ln_width2 = link_width_to_pow2[width_cap2 & 7];
253 if (ln_width1 > ln_width2) {
254 ln_width1 = ln_width2;
256 width |= pow2_to_link_width[ln_width1] << 4;
258 /* See if I am changing node1's width */
259 old_width = pci_read_config8(node1, link1 + PCI_HT_CAP_HOST_WIDTH + 1);
260 needs_reset |= old_width != width;
262 /* Set node1's widths */
263 pci_write_config8(node1, link1 + PCI_HT_CAP_HOST_WIDTH + 1, width);
265 /* Calculate node2's width */
266 width = ((width & 0x70) >> 4) | ((width & 0x7) << 4);
268 /* See if I am changing node2's width */
269 old_width = pci_read_config8(node2, link2 + PCI_HT_CAP_HOST_WIDTH + 1);
270 needs_reset |= old_width != width;
272 /* Set node2's widths */
273 pci_write_config8(node2, link2 + PCI_HT_CAP_HOST_WIDTH + 1, width);
278 static void fill_row(u8 node, u8 row, u32 value)
280 pci_write_config32(NODE_HT(node), 0x40+(row<<2), value);
283 static void setup_row(u8 source, u8 dest, u8 cpus)
285 fill_row(source,dest,generate_row(source,dest,cpus));
288 static void setup_temp_row(u8 source, u8 dest, u8 cpus)
290 fill_row(source,7,generate_temp_row(source,dest,cpus));
293 static void setup_node(u8 node, u8 cpus)
296 for(row=0; row<cpus; row++)
297 setup_row(node, row, cpus);
300 static void setup_remote_row(u8 source, u8 dest, u8 cpus)
302 fill_row(7, dest, generate_row(source, dest, cpus));
305 static void setup_remote_node(u8 node, u8 cpus)
307 static const uint8_t pci_reg[] = {
308 0x44, 0x4c, 0x54, 0x5c, 0x64, 0x6c, 0x74, 0x7c,
309 0x40, 0x48, 0x50, 0x58, 0x60, 0x68, 0x70, 0x78,
310 0x84, 0x8c, 0x94, 0x9c, 0xa4, 0xac, 0xb4, 0xbc,
311 0x80, 0x88, 0x90, 0x98, 0xa0, 0xa8, 0xb0, 0xb8,
312 0xc4, 0xcc, 0xd4, 0xdc,
313 0xc0, 0xc8, 0xd0, 0xd8,
314 0xe0, 0xe4, 0xe8, 0xec,
319 print_spew("setup_remote_node: ");
320 for(row=0; row<cpus; row++)
321 setup_remote_row(node, row, cpus);
323 /* copy the default resource map from node 0 */
324 for(i = 0; i < sizeof(pci_reg)/sizeof(pci_reg[0]); i++) {
328 value = pci_read_config32(NODE_MP(0), reg);
329 pci_write_config32(NODE_MP(7), reg, value);
332 print_spew("done\r\n");
337 #if CONFIG_MAX_CPUS > 2
338 static void setup_temp_node(u8 node, u8 cpus)
341 for(row=0; row<cpus; row++)
342 fill_row(7,row,generate_row(node,row,cpus));
346 static void setup_uniprocessor(void)
348 print_spew("Enabling UP settings\r\n");
352 struct setup_smp_result {
357 #if CONFIG_MAX_CPUS > 1
358 static struct setup_smp_result setup_smp(void)
360 struct setup_smp_result result;
362 result.needs_reset = 0;
364 print_spew("Enabling SMP settings\r\n");
366 setup_row(0, 0, result.cpus);
367 /* Setup and check a temporary connection to node 1 */
368 setup_temp_row(0, 1, result.cpus);
370 if (!check_connection(0, 7, CONNECTION_0_1)) {
371 print_debug("No connection to Node 1.\r\n");
372 clear_temp_row(0); /* delete temp connection */
373 setup_uniprocessor(); /* and get up working */
378 /* We found 2 nodes so far */
380 optimize_connection(NODE_HT(0), 0x80 + CONNECTION_0_1, NODE_HT(7), 0x80 + CONNECTION_0_1);
381 setup_node(0, result.cpus); /* Node 1 is there. Setup Node 0 correctly */
382 setup_remote_node(1, result.cpus); /* Setup the routes on the remote node */
383 rename_temp_node(1); /* Rename Node 7 to Node 1 */
384 enable_routing(1); /* Enable routing on Node 1 */
386 clear_temp_row(0); /* delete temporary connection */
388 #if CONFIG_MAX_CPUS > 2
391 /* Setup and check temporary connection from Node 0 to Node 2 */
392 setup_temp_row(0,2, result.cpus);
394 if (!check_connection(0, 7, CONNECTION_0_2)) {
395 print_debug("No connection to Node 2.\r\n");
396 clear_temp_row(0); /* delete temp connection */
401 /* We found 3 nodes so far. Now setup a temporary
402 * connection from node 0 to node 3 via node 1
405 setup_temp_row(0,1, result.cpus); /* temp. link between nodes 0 and 1 */
406 setup_temp_row(1,3, result.cpus); /* temp. link between nodes 1 and 3 */
408 if (!check_connection(1, 7, CONNECTION_1_3)) {
409 print_debug("No connection to Node 3.\r\n");
410 clear_temp_row(0); /* delete temp connection */
411 clear_temp_row(1); /* delete temp connection */
416 #warning "FIXME optimize the physical connections"
418 /* We found 4 nodes so far. Now setup all nodes for 4p */
420 setup_node(0, result.cpus); /* The first 2 nodes are configured */
421 setup_node(1, result.cpus); /* already. Just configure them for 4p */
423 setup_temp_row(0,2, result.cpus);
424 setup_temp_node(2, result.cpus);
428 setup_temp_row(0,1, result.cpus);
429 setup_temp_row(1,3, result.cpus);
430 setup_temp_node(3, result.cpus);
432 enable_routing(3); /* enable routing on node 3 (temp.) */
440 print_debug_hex8(result.cpus);
441 print_debug(" nodes initialized.\r\n");
446 #if CONFIG_MAX_CPUS > 1
447 static unsigned verify_mp_capabilities(unsigned cpus)
449 unsigned node, row, mask;
453 mask=0x06; /* BigMPCap */
455 mask=0x02; /* MPCap */
458 for (node=0; node<cpus; node++) {
459 if ((pci_read_config32(NODE_MC(node), 0xe8) & mask) != mask) {
468 /* one of our cpus is not mp capable */
470 print_err("One of the CPUs is not MP capable. Going back to UP\r\n");
472 for (node = cpus; node > 0; node--) {
473 for (row = cpus; row > 0; row--) {
474 fill_row(NODE_HT(node-1), row-1, DEFAULT);
477 setup_uniprocessor();
483 static void coherent_ht_finalize(unsigned cpus)
488 /* set up cpu count and node count and enable Limit
489 * Config Space Range for all available CPUs.
490 * Also clear non coherent hypertransport bus range
491 * registers on Hammer A0 revision.
495 print_debug("coherent_ht_finalize\r\n");
497 rev_a0 = is_cpu_rev_a0();
498 for (node = 0; node < cpus; node++) {
503 /* Set the Total CPU and Node count in the system */
504 val = pci_read_config32(dev, 0x60);
505 val &= (~0x000F0070);
506 val |= ((cpus-1)<<16)|((cpus-1)<<4);
507 pci_write_config32(dev, 0x60, val);
509 /* Only respond to real cpu pci configuration cycles
510 * and optimize the HT settings
512 val=pci_read_config32(dev, 0x68);
513 val &= ~((HTTC_BUF_REL_PRI_MASK << HTTC_BUF_REL_PRI_SHIFT) |
514 (HTTC_MED_PRI_BYP_CNT_MASK << HTTC_MED_PRI_BYP_CNT_SHIFT) |
515 (HTTC_HI_PRI_BYP_CNT_MASK << HTTC_HI_PRI_BYP_CNT_SHIFT));
516 val |= HTTC_LIMIT_CLDT_CFG |
517 (HTTC_BUF_REL_PRI_8 << HTTC_BUF_REL_PRI_SHIFT) |
519 (3 << HTTC_MED_PRI_BYP_CNT_SHIFT) |
520 (3 << HTTC_HI_PRI_BYP_CNT_SHIFT);
521 pci_write_config32(dev, 0x68, val);
524 pci_write_config32(dev, 0x94, 0);
525 pci_write_config32(dev, 0xb4, 0);
526 pci_write_config32(dev, 0xd4, 0);
533 print_debug("done\r\n");
537 static int apply_cpu_errata_fixes(unsigned cpus, int needs_reset)
540 for(node = 0; node < cpus; node++) {
544 if (is_cpu_pre_c0()) {
547 * Limit the number of downstream posted requests to 1
549 cmd = pci_read_config32(dev, 0x70);
550 if ((cmd & (3 << 0)) != 2) {
553 pci_write_config32(dev, 0x70, cmd );
556 cmd = pci_read_config32(dev, 0x7c);
557 if ((cmd & (3 << 4)) != 0) {
560 pci_write_config32(dev, 0x7c, cmd );
563 /* Clock Power/Timing Low */
564 cmd = pci_read_config32(dev, 0xd4);
565 if (cmd != 0x000D0001) {
567 pci_write_config32(dev, 0xd4, cmd);
568 needs_reset = 1; /* Needed? */
575 * Set Clk Ramp Hystersis to 7
576 * Clock Power/Timing Low
578 cmd_ref = 0x04e20707; /* Registered */
579 cmd = pci_read_config32(dev, 0xd4);
581 pci_write_config32(dev, 0xd4, cmd_ref );
582 needs_reset = 1; /* Needed? */
589 static int optimize_link_read_pointers(unsigned cpus, int needs_reset)
592 for(node = 0; node < cpus; node = node + 1) {
593 device_t f0_dev, f3_dev;
594 uint32_t cmd_ref, cmd;
596 f0_dev = NODE_HT(node);
597 f3_dev = NODE_MC(node);
598 cmd_ref = cmd = pci_read_config32(f3_dev, 0xdc);
599 for(link = 0; link < 3; link = link + 1) {
602 reg = 0x98 + (link * 0x20);
603 link_type = pci_read_config32(f0_dev, reg);
604 if (link_type & LinkConnected) {
605 cmd &= 0xff << (link *8);
606 /* FIXME this assumes the device on the other side is an AMD device */
607 cmd |= 0x25 << (link *8);
610 if (cmd != cmd_ref) {
611 pci_write_config32(f3_dev, 0xdc, cmd);
618 static int setup_coherent_ht_domain(void)
620 struct setup_smp_result result;
622 result.needs_reset = 0;
624 enable_bsp_routing();
626 #if CONFIG_MAX_CPUS == 1
627 setup_uniprocessor();
629 result = setup_smp();
630 result.cpus = verify_mp_capabilities(result.cpus);
632 coherent_ht_finalize(result.cpus);
633 result.needs_reset = apply_cpu_errata_fixes(result.cpus, result.needs_reset);
634 #if CONFIG_MAX_CPUS > 1 /* Why doesn't this work on the solo? */
635 result.needs_reset = optimize_link_read_pointers(result.cpus, result.needs_reset);
638 return result.needs_reset;