1 /* coherent hypertransport initialization for AMD64
3 * written by Stefan Reinauer <stepan@openbios.org>
4 * (c) 2003-2004 by SuSE Linux AG
6 * This code is licensed under GPL.
10 * This algorithm assumes a grid configuration as follows:
13 * org. : 1x1 2x1 2x2 2x3 2x4
17 #include <device/pci_def.h>
18 #include <device/pci_ids.h>
19 #include <device/hypertransport_def.h>
20 #include "arch/romcc_io.h"
23 /* when generating a temporary row configuration we
24 * don't want broadcast to be enabled for that node.
27 #define generate_temp_row(...) ((generate_row(__VA_ARGS__)&(~0x0f0000))|0x010000)
28 #define enable_bsp_routing() enable_routing(0)
30 #define NODE_HT(x) PCI_DEV(0,24+x,0)
31 #define NODE_MP(x) PCI_DEV(0,24+x,1)
32 #define NODE_MC(x) PCI_DEV(0,24+x,3)
34 #define DEFAULT 0x00010101 /* default row entry */
43 static u8 link_to_register(int ldt)
46 * [ 0: 3] Request Route
47 * [0] Route to this node
53 if (ldt&0x08) return 0x40;
54 if (ldt&0x04) return 0x20;
55 if (ldt&0x02) return 0x00;
57 /* we should never get here */
58 print_spew("Unknown Link\n");
62 static int link_connection(int src, int dest)
64 /* we generate the needed link information from the rows
65 * by taking the Request Route of the according row.
68 return generate_row(src, dest, CONFIG_MAX_CPUS) & 0x0f;
71 static void disable_probes(void)
73 /* disable read/write/fill probes for uniprocessor setup
74 * they don't make sense if only one cpu is available
77 /* Hypetransport Transaction Control Register
79 * [ 0: 0] Disable read byte probe
81 * 1 = Probes not issued
82 * [ 1: 1] Disable Read Doubleword probe
84 * 1 = Probes not issued
85 * [ 2: 2] Disable write byte probes
87 * 1 = Probes not issued
88 * [ 3: 3] Disable Write Doubleword Probes
90 * 1 = Probes not issued.
91 * [10:10] Disable Fill Probe
92 * 0 = Probes issued for cache fills
93 * 1 = Probes not issued for cache fills.
98 print_spew("Disabling read/write/fill probes for UP... ");
100 val=pci_read_config32(NODE_HT(0), 0x68);
101 val |= (1<<10)|(1<<9)|(1<<8)|(1<<4)|(1<<3)|(1<<2)|(1<<1)|(1 << 0);
102 pci_write_config32(NODE_HT(0), 0x68, val);
104 print_spew("done.\r\n");
108 static void enable_routing(u8 node)
112 /* HT Initialization Control Register
114 * [ 0: 0] Routing Table Disable
115 * 0 = Packets are routed according to routing tables
116 * 1 = Packets are routed according to the default link field
117 * [ 1: 1] Request Disable (BSP should clear this)
118 * 0 = Request packets may be generated
119 * 1 = Request packets may not be generated.
120 * [ 3: 2] Default Link (Read-only)
124 * 11 = CPU on same node
126 * - Scratch bit cleared by a cold reset
127 * [ 5: 5] BIOS Reset Detect
128 * - Scratch bit cleared by a cold reset
129 * [ 6: 6] INIT Detect
130 * - Scratch bit cleared by a warm or cold reset not by an INIT
134 /* Enable routing table */
135 print_spew("Enabling routing table for node ");
136 print_spew_hex8(node);
138 val=pci_read_config32(NODE_HT(node), 0x6c);
139 val &= ~((1<<1)|(1<<0));
140 pci_write_config32(NODE_HT(node), 0x6c, val);
142 print_spew(" done.\r\n");
145 static void fill_row(u8 node, u8 row, u32 value)
147 pci_write_config32(NODE_HT(node), 0x40+(row<<2), value);
151 #if CONFIG_MAX_CPUS > 1
153 static void rename_temp_node(u8 node)
157 print_spew("Renaming current temporary node to ");
158 print_spew_hex8(node);
160 val=pci_read_config32(NODE_HT(7), 0x60);
161 val &= (~7); /* clear low bits. */
162 val |= node; /* new node */
163 pci_write_config32(NODE_HT(7), 0x60, val);
165 print_spew(" done.\r\n");
168 static bool check_connection(u8 src, u8 dest, u8 link)
170 /* See if we have a valid connection to dest */
173 /* Detect if the coherent HT link is connected. */
174 val = pci_read_config32(NODE_HT(src), 0x98+link);
175 if ( (val&0x17) != 0x03)
178 /* Verify that the coherent hypertransport link is
179 * established and actually working by reading the
180 * remode node's vendor/device id
182 val = pci_read_config32(NODE_HT(dest),0);
183 if(val != 0x11001022)
189 static unsigned read_freq_cap(device_t dev, unsigned pos)
191 /* Handle bugs in valid hypertransport frequency reporting */
195 freq_cap = pci_read_config16(dev, pos);
196 freq_cap &= ~(1 << HT_FREQ_VENDOR); /* Ignore Vendor HT frequencies */
198 id = pci_read_config32(dev, 0);
200 /* AMD 8131 Errata 48 */
201 if (id == (PCI_VENDOR_ID_AMD | (PCI_DEVICE_ID_AMD_8131_PCIX << 16))) {
202 freq_cap &= ~(1 << HT_FREQ_800Mhz);
204 /* AMD 8151 Errata 23 */
205 if (id == (PCI_VENDOR_ID_AMD | (PCI_DEVICE_ID_AMD_8151_SYSCTRL << 16))) {
206 freq_cap &= ~(1 << HT_FREQ_800Mhz);
208 /* AMD K8 Unsupported 1Ghz? */
209 if (id == (PCI_VENDOR_ID_AMD | (0x1100 << 16))) {
210 freq_cap &= ~(1 << HT_FREQ_1000Mhz);
215 static int optimize_connection(device_t node1, uint8_t link1, device_t node2, uint8_t link2)
217 static const uint8_t link_width_to_pow2[]= { 3, 4, 0, 5, 1, 2, 0, 0 };
218 static const uint8_t pow2_to_link_width[] = { 0x7, 4, 5, 0, 1, 3 };
219 uint16_t freq_cap1, freq_cap2, freq_cap, freq_mask;
220 uint8_t width_cap1, width_cap2, width_cap, width, old_width, ln_width1, ln_width2;
221 uint8_t freq, old_freq;
223 /* Set link width and frequency */
225 /* Initially assume everything is already optimized and I don't need a reset */
228 /* Get the frequency capabilities */
229 freq_cap1 = read_freq_cap(node1, link1 + PCI_HT_CAP_HOST_FREQ_CAP);
230 freq_cap2 = read_freq_cap(node2, link2 + PCI_HT_CAP_HOST_FREQ_CAP);
232 /* Calculate the highest possible frequency */
233 freq = log2(freq_cap1 & freq_cap2);
235 /* See if I am changing the link freqency */
236 old_freq = pci_read_config8(node1, link1 + PCI_HT_CAP_HOST_FREQ);
237 needs_reset |= old_freq != freq;
238 old_freq = pci_read_config8(node2, link2 + PCI_HT_CAP_HOST_FREQ);
239 needs_reset |= old_freq != freq;
241 /* Set the Calulcated link frequency */
242 pci_write_config8(node1, link1 + PCI_HT_CAP_HOST_FREQ, freq);
243 pci_write_config8(node2, link2 + PCI_HT_CAP_HOST_FREQ, freq);
245 /* Get the width capabilities */
246 width_cap1 = pci_read_config8(node1, link1 + PCI_HT_CAP_HOST_WIDTH);
247 width_cap2 = pci_read_config8(node2, link2 + PCI_HT_CAP_HOST_WIDTH);
249 /* Calculate node1's input width */
250 ln_width1 = link_width_to_pow2[width_cap1 & 7];
251 ln_width2 = link_width_to_pow2[(width_cap2 >> 4) & 7];
252 if (ln_width1 > ln_width2) {
253 ln_width1 = ln_width2;
255 width = pow2_to_link_width[ln_width1];
256 /* Calculate node1's output width */
257 ln_width1 = link_width_to_pow2[(width_cap1 >> 4) & 7];
258 ln_width2 = link_width_to_pow2[width_cap2 & 7];
259 if (ln_width1 > ln_width2) {
260 ln_width1 = ln_width2;
262 width |= pow2_to_link_width[ln_width1] << 4;
264 /* See if I am changing node1's width */
265 old_width = pci_read_config8(node1, link1 + PCI_HT_CAP_HOST_WIDTH + 1);
266 needs_reset |= old_width != width;
268 /* Set node1's widths */
269 pci_write_config8(node1, link1 + PCI_HT_CAP_HOST_WIDTH + 1, width);
271 /* Calculate node2's width */
272 width = ((width & 0x70) >> 4) | ((width & 0x7) << 4);
274 /* See if I am changing node2's width */
275 old_width = pci_read_config8(node2, link2 + PCI_HT_CAP_HOST_WIDTH + 1);
276 needs_reset |= old_width != width;
278 /* Set node2's widths */
279 pci_write_config8(node2, link2 + PCI_HT_CAP_HOST_WIDTH + 1, width);
284 static void setup_row(u8 source, u8 dest, u8 nodes)
286 fill_row(source,dest,generate_row(source,dest,nodes));
289 static void setup_temp_row(u8 source, u8 dest, u8 nodes)
291 fill_row(source,7,generate_temp_row(source,dest,nodes));
294 static void setup_node(u8 node, u8 nodes)
297 for(row=0; row<nodes; row++)
298 setup_row(node, row, nodes);
301 static void setup_remote_row(u8 source, u8 dest, u8 nodes)
303 fill_row(7, dest, generate_row(source, dest, nodes));
306 static void setup_remote_node(u8 node, u8 nodes)
308 static const uint8_t pci_reg[] = {
309 0x44, 0x4c, 0x54, 0x5c, 0x64, 0x6c, 0x74, 0x7c,
310 0x40, 0x48, 0x50, 0x58, 0x60, 0x68, 0x70, 0x78,
311 0x84, 0x8c, 0x94, 0x9c, 0xa4, 0xac, 0xb4, 0xbc,
312 0x80, 0x88, 0x90, 0x98, 0xa0, 0xa8, 0xb0, 0xb8,
313 0xc4, 0xcc, 0xd4, 0xdc,
314 0xc0, 0xc8, 0xd0, 0xd8,
315 0xe0, 0xe4, 0xe8, 0xec,
320 print_spew("setup_remote_node: ");
321 for(row=0; row<nodes; row++)
322 setup_remote_row(node, row, nodes);
324 /* copy the default resource map from node 0 */
325 for(i = 0; i < sizeof(pci_reg)/sizeof(pci_reg[0]); i++) {
329 value = pci_read_config32(NODE_MP(0), reg);
330 pci_write_config32(NODE_MP(7), reg, value);
333 print_spew("done\r\n");
338 #if CONFIG_MAX_CPUS > 2
339 static void setup_temp_node(u8 node, u8 nodes)
342 for(row=0; row<nodes; row++)
343 fill_row(7,row,generate_row(node,row,nodes));
347 static void setup_uniprocessor(void)
349 print_spew("Enabling UP settings\r\n");
353 struct setup_smp_result {
358 #if CONFIG_MAX_CPUS > 1
359 static struct setup_smp_result setup_smp(void)
361 struct setup_smp_result result;
363 result.needs_reset = 0;
365 print_spew("Enabling SMP settings\r\n");
367 setup_row(0, 0, result.nodes);
368 /* Setup and check a temporary connection to node 1 */
369 setup_temp_row(0, 1, result.nodes);
371 if (!check_connection(0, 7, link_to_register(link_connection(0,1)))) {
372 print_spew("No connection to Node 1.\r\n");
373 setup_uniprocessor(); /* and get up working */
378 /* We found 2 nodes so far */
380 setup_node(0, result.nodes); /* Node 1 is there. Setup Node 0 correctly */
381 setup_remote_node(1, result.nodes); /* Setup the routes on the remote node */
382 rename_temp_node(1); /* Rename Node 7 to Node 1 */
383 enable_routing(1); /* Enable routing on Node 1 */
385 result.needs_reset = optimize_connection(
386 NODE_HT(0), 0x80 + link_to_register(link_connection(0,1)),
387 NODE_HT(1), 0x80 + link_to_register(link_connection(1,0)) );
389 #if CONFIG_MAX_CPUS > 2
392 /* Setup and check temporary connection from Node 0 to Node 2 */
393 setup_temp_row(0,2, result.nodes);
395 if (!check_connection(0, 7, link_to_register(link_connection(0,2))) ) {
396 print_spew("No connection to Node 2.\r\n");
401 /* We found 3 nodes so far. Now setup a temporary
402 * connection from node 0 to node 3 via node 1
405 setup_temp_row(0,1, result.nodes); /* temp. link between nodes 0 and 1 */
406 setup_temp_row(1,3, result.nodes); /* temp. link between nodes 1 and 3 */
408 if (!check_connection(1, 7, link_to_register(link_connection(1,3)))) {
409 print_spew("No connection to Node 3.\r\n");
414 /* We found 4 nodes so far. Now setup all nodes for 4p */
416 setup_node(0, result.nodes); /* The first 2 nodes are configured */
417 setup_node(1, result.nodes); /* already. Just configure them for 4p */
419 setup_temp_row(0,2, result.nodes);
420 setup_temp_node(2, result.nodes);
424 setup_temp_row(0,1, result.nodes);
425 setup_temp_row(1,3, result.nodes);
426 setup_temp_node(3, result.nodes);
428 enable_routing(3); /* enable routing on node 3 (temp.) */
430 /* optimize physical connections - by LYH */
431 result.needs_reset = optimize_connection(
432 NODE_HT(0), 0x80 + link_to_register(link_connection(0,2)),
433 NODE_HT(2), 0x80 + link_to_register(link_connection(2,0)) );
435 result.needs_reset = optimize_connection(
436 NODE_HT(1), 0x80 + link_to_register(link_connection(1,3)),
437 NODE_HT(3), 0x80 + link_to_register(link_connection(3,1)) );
439 result.needs_reset = optimize_connection(
440 NODE_HT(2), 0x80 + link_to_register(link_connection(2,3)),
441 NODE_HT(3), 0x80 + link_to_register(link_connection(3,2)) );
443 #endif /* CONFIG_MAX_CPUS > 2 */
445 print_debug_hex8(result.nodes);
446 print_debug(" nodes initialized.\r\n");
451 static unsigned verify_mp_capabilities(unsigned nodes)
453 unsigned node, row, mask;
457 mask=0x06; /* BigMPCap */
458 } else if (nodes == 2) {
459 mask=0x02; /* MPCap */
461 mask=0x00; /* Non SMP */
464 for (node=0; node<nodes; node++) {
465 if ((pci_read_config32(NODE_MC(node), 0xe8) & mask) != mask) {
474 /* one of our nodes is not mp capable */
476 print_err("One of the CPUs is not MP capable. Going back to UP\r\n");
480 static void clear_dead_routes(unsigned nodes)
488 for(node = 7; node >= 0; node--) {
489 for(row = 7; row >= last_row; row--) {
490 fill_row(node, row, DEFAULT);
495 static void coherent_ht_finalize(unsigned nodes)
500 /* set up cpu count and node count and enable Limit
501 * Config Space Range for all available CPUs.
502 * Also clear non coherent hypertransport bus range
503 * registers on Hammer A0 revision.
506 print_spew("coherent_ht_finalize\r\n");
507 rev_a0 = is_cpu_rev_a0();
508 for (node = 0; node < nodes; node++) {
513 /* Set the Total CPU and Node count in the system */
514 val = pci_read_config32(dev, 0x60);
515 val &= (~0x000F0070);
516 val |= ((nodes-1)<<16)|((nodes-1)<<4);
517 pci_write_config32(dev, 0x60, val);
519 /* Only respond to real cpu pci configuration cycles
520 * and optimize the HT settings
522 val=pci_read_config32(dev, 0x68);
523 val &= ~((HTTC_BUF_REL_PRI_MASK << HTTC_BUF_REL_PRI_SHIFT) |
524 (HTTC_MED_PRI_BYP_CNT_MASK << HTTC_MED_PRI_BYP_CNT_SHIFT) |
525 (HTTC_HI_PRI_BYP_CNT_MASK << HTTC_HI_PRI_BYP_CNT_SHIFT));
526 val |= HTTC_LIMIT_CLDT_CFG |
527 (HTTC_BUF_REL_PRI_8 << HTTC_BUF_REL_PRI_SHIFT) |
529 (3 << HTTC_MED_PRI_BYP_CNT_SHIFT) |
530 (3 << HTTC_HI_PRI_BYP_CNT_SHIFT);
531 pci_write_config32(dev, 0x68, val);
534 print_spew("shit it is an old cup\n");
535 pci_write_config32(dev, 0x94, 0);
536 pci_write_config32(dev, 0xb4, 0);
537 pci_write_config32(dev, 0xd4, 0);
541 print_spew("done\r\n");
544 static int apply_cpu_errata_fixes(unsigned nodes, int needs_reset)
547 for(node = 0; node < nodes; node++) {
551 if (is_cpu_pre_c0()) {
554 * Limit the number of downstream posted requests to 1
556 cmd = pci_read_config32(dev, 0x70);
557 if ((cmd & (3 << 0)) != 2) {
560 pci_write_config32(dev, 0x70, cmd );
563 cmd = pci_read_config32(dev, 0x7c);
564 if ((cmd & (3 << 4)) != 0) {
567 pci_write_config32(dev, 0x7c, cmd );
570 /* Clock Power/Timing Low */
571 cmd = pci_read_config32(dev, 0xd4);
572 if (cmd != 0x000D0001) {
574 pci_write_config32(dev, 0xd4, cmd);
575 needs_reset = 1; /* Needed? */
582 * Set Clk Ramp Hystersis to 7
583 * Clock Power/Timing Low
585 cmd_ref = 0x04e20707; /* Registered */
586 cmd = pci_read_config32(dev, 0xd4);
588 pci_write_config32(dev, 0xd4, cmd_ref );
589 needs_reset = 1; /* Needed? */
596 static int optimize_link_read_pointers(unsigned nodes, int needs_reset)
599 for(node = 0; node < nodes; node = node + 1) {
600 device_t f0_dev, f3_dev;
601 uint32_t cmd_ref, cmd;
603 f0_dev = NODE_HT(node);
604 f3_dev = NODE_MC(node);
605 cmd_ref = cmd = pci_read_config32(f3_dev, 0xdc);
606 for(link = 0; link < 3; link = link + 1) {
609 reg = 0x98 + (link * 0x20);
610 link_type = pci_read_config32(f0_dev, reg);
611 if (link_type & LinkConnected) {
612 cmd &= 0xff << (link *8);
613 /* FIXME this assumes the device on the other
614 * side is an AMD device
616 cmd |= 0x25 << (link *8);
619 if (cmd != cmd_ref) {
620 pci_write_config32(f3_dev, 0xdc, cmd);
627 static int setup_coherent_ht_domain(void)
629 struct setup_smp_result result;
631 result.needs_reset = 0;
633 enable_bsp_routing();
635 #if CONFIG_MAX_CPUS > 1
636 result = setup_smp();
638 result.nodes = verify_mp_capabilities(result.nodes);
639 clear_dead_routes(result.nodes);
640 if (result.nodes == 1) {
641 setup_uniprocessor();
643 coherent_ht_finalize(result.nodes);
644 result.needs_reset = apply_cpu_errata_fixes(result.nodes, result.needs_reset);
646 #if CONFIG_MAX_CPUS > 1 /* Why doesn't this work on the solo? */
647 result.needs_reset = optimize_link_read_pointers(result.nodes, result.needs_reset);
650 return result.needs_reset;