1 /* coherent hypertransport initialization for AMD64
3 * written by Stefan Reinauer <stepan@openbios.org>
4 * (c) 2003-2004 by SuSE Linux AG
6 * This code is licensed under GPL.
10 * This algorithm assumes a grid configuration as follows:
13 * org. : 1x1 2x1 2x2 2x3 2x4
17 #include <device/pci_def.h>
18 #include <device/pci_ids.h>
19 #include <device/hypertransport_def.h>
20 #include "arch/romcc_io.h"
23 /* when generating a temporary row configuration we
24 * don't want broadcast to be enabled for that node.
27 #define generate_temp_row(...) ((generate_row(__VA_ARGS__)&(~0x0f0000))|0x010000)
28 #define clear_temp_row(x) fill_row(x,7,DEFAULT)
29 #define enable_bsp_routing() enable_routing(0)
31 #define NODE_HT(x) PCI_DEV(0,24+x,0)
32 #define NODE_MP(x) PCI_DEV(0,24+x,1)
33 #define NODE_MC(x) PCI_DEV(0,24+x,3)
35 #define DEFAULT 0x00010101 /* default row entry */
44 static u8 link_to_register(int ldt)
47 * [ 0: 3] Request Route
48 * [0] Route to this node
54 if (ldt&0x08) return 0x40;
55 if (ldt&0x04) return 0x20;
56 if (ldt&0x02) return 0x00;
58 /* we should never get here */
59 print_debug("Unknown Link\n");
63 static int link_connection(int src, int dest)
65 /* we generate the needed link information from the rows
66 * by taking the Request Route of the according row.
69 return generate_row(src, dest, CONFIG_MAX_CPUS) & 0x0f;
72 static void disable_probes(void)
74 /* disable read/write/fill probes for uniprocessor setup
75 * they don't make sense if only one cpu is available
78 /* Hypetransport Transaction Control Register
80 * [ 0: 0] Disable read byte probe
82 * 1 = Probes not issued
83 * [ 1: 1] Disable Read Doubleword probe
85 * 1 = Probes not issued
86 * [ 2: 2] Disable write byte probes
88 * 1 = Probes not issued
89 * [ 3: 3] Disable Write Doubleword Probes
91 * 1 = Probes not issued.
92 * [10:10] Disable Fill Probe
93 * 0 = Probes issued for cache fills
94 * 1 = Probes not issued for cache fills.
99 print_spew("Disabling read/write/fill probes for UP... ");
101 val=pci_read_config32(NODE_HT(0), 0x68);
102 val |= (1<<10)|(1<<9)|(1<<8)|(1<<4)|(1<<3)|(1<<2)|(1<<1)|(1 << 0);
103 pci_write_config32(NODE_HT(0), 0x68, val);
105 print_spew("done.\r\n");
109 static void enable_routing(u8 node)
113 /* HT Initialization Control Register
115 * [ 0: 0] Routing Table Disable
116 * 0 = Packets are routed according to routing tables
117 * 1 = Packets are routed according to the default link field
118 * [ 1: 1] Request Disable (BSP should clear this)
119 * 0 = Request packets may be generated
120 * 1 = Request packets may not be generated.
121 * [ 3: 2] Default Link (Read-only)
125 * 11 = CPU on same node
127 * - Scratch bit cleared by a cold reset
128 * [ 5: 5] BIOS Reset Detect
129 * - Scratch bit cleared by a cold reset
130 * [ 6: 6] INIT Detect
131 * - Scratch bit cleared by a warm or cold reset not by an INIT
135 /* Enable routing table */
136 print_spew("Enabling routing table for node ");
137 print_spew_hex8(node);
139 val=pci_read_config32(NODE_HT(node), 0x6c);
140 val &= ~((1<<1)|(1<<0));
141 pci_write_config32(NODE_HT(node), 0x6c, val);
143 print_spew(" done.\r\n");
146 #if CONFIG_MAX_CPUS > 1
148 static void rename_temp_node(u8 node)
152 print_spew("Renaming current temporary node to ");
153 print_spew_hex8(node);
155 val=pci_read_config32(NODE_HT(7), 0x60);
156 val &= (~7); /* clear low bits. */
157 val |= node; /* new node */
158 pci_write_config32(NODE_HT(7), 0x60, val);
160 print_spew(" done.\r\n");
163 static bool check_connection(u8 src, u8 dest, u8 link)
165 /* See if we have a valid connection to dest */
168 /* Detect if the coherent HT link is connected. */
169 val = pci_read_config32(NODE_HT(src), 0x98+link);
170 if ( (val&0x17) != 0x03)
173 /* Verify that the coherent hypertransport link is
174 * established and actually working by reading the
175 * remode node's vendor/device id
177 val = pci_read_config32(NODE_HT(dest),0);
178 if(val != 0x11001022)
184 static unsigned read_freq_cap(device_t dev, unsigned pos)
186 /* Handle bugs in valid hypertransport frequency reporting */
190 freq_cap = pci_read_config16(dev, pos);
191 freq_cap &= ~(1 << HT_FREQ_VENDOR); /* Ignore Vendor HT frequencies */
193 id = pci_read_config32(dev, 0);
195 /* AMD 8131 Errata 48 */
196 if (id == (PCI_VENDOR_ID_AMD | (PCI_DEVICE_ID_AMD_8131_PCIX << 16))) {
197 freq_cap &= ~(1 << HT_FREQ_800Mhz);
199 /* AMD 8151 Errata 23 */
200 if (id == (PCI_VENDOR_ID_AMD | (PCI_DEVICE_ID_AMD_8151_SYSCTRL << 16))) {
201 freq_cap &= ~(1 << HT_FREQ_800Mhz);
203 /* AMD K8 Unsupported 1Ghz? */
204 if (id == (PCI_VENDOR_ID_AMD | (0x1100 << 16))) {
205 freq_cap &= ~(1 << HT_FREQ_1000Mhz);
210 static int optimize_connection(device_t node1, uint8_t link1, device_t node2, uint8_t link2)
212 static const uint8_t link_width_to_pow2[]= { 3, 4, 0, 5, 1, 2, 0, 0 };
213 static const uint8_t pow2_to_link_width[] = { 0x7, 4, 5, 0, 1, 3 };
214 uint16_t freq_cap1, freq_cap2, freq_cap, freq_mask;
215 uint8_t width_cap1, width_cap2, width_cap, width, old_width, ln_width1, ln_width2;
216 uint8_t freq, old_freq;
218 /* Set link width and frequency */
220 /* Initially assume everything is already optimized and I don't need a reset */
223 /* Get the frequency capabilities */
224 freq_cap1 = read_freq_cap(node1, link1 + PCI_HT_CAP_HOST_FREQ_CAP);
225 freq_cap2 = read_freq_cap(node2, link2 + PCI_HT_CAP_HOST_FREQ_CAP);
227 /* Calculate the highest possible frequency */
228 freq = log2(freq_cap1 & freq_cap2);
230 /* See if I am changing the link freqency */
231 old_freq = pci_read_config8(node1, link1 + PCI_HT_CAP_HOST_FREQ);
232 needs_reset |= old_freq != freq;
233 old_freq = pci_read_config8(node2, link2 + PCI_HT_CAP_HOST_FREQ);
234 needs_reset |= old_freq != freq;
236 /* Set the Calulcated link frequency */
237 pci_write_config8(node1, link1 + PCI_HT_CAP_HOST_FREQ, freq);
238 pci_write_config8(node2, link2 + PCI_HT_CAP_HOST_FREQ, freq);
240 /* Get the width capabilities */
241 width_cap1 = pci_read_config8(node1, link1 + PCI_HT_CAP_HOST_WIDTH);
242 width_cap2 = pci_read_config8(node2, link2 + PCI_HT_CAP_HOST_WIDTH);
244 /* Calculate node1's input width */
245 ln_width1 = link_width_to_pow2[width_cap1 & 7];
246 ln_width2 = link_width_to_pow2[(width_cap2 >> 4) & 7];
247 if (ln_width1 > ln_width2) {
248 ln_width1 = ln_width2;
250 width = pow2_to_link_width[ln_width1];
251 /* Calculate node1's output width */
252 ln_width1 = link_width_to_pow2[(width_cap1 >> 4) & 7];
253 ln_width2 = link_width_to_pow2[width_cap2 & 7];
254 if (ln_width1 > ln_width2) {
255 ln_width1 = ln_width2;
257 width |= pow2_to_link_width[ln_width1] << 4;
259 /* See if I am changing node1's width */
260 old_width = pci_read_config8(node1, link1 + PCI_HT_CAP_HOST_WIDTH + 1);
261 needs_reset |= old_width != width;
263 /* Set node1's widths */
264 pci_write_config8(node1, link1 + PCI_HT_CAP_HOST_WIDTH + 1, width);
266 /* Calculate node2's width */
267 width = ((width & 0x70) >> 4) | ((width & 0x7) << 4);
269 /* See if I am changing node2's width */
270 old_width = pci_read_config8(node2, link2 + PCI_HT_CAP_HOST_WIDTH + 1);
271 needs_reset |= old_width != width;
273 /* Set node2's widths */
274 pci_write_config8(node2, link2 + PCI_HT_CAP_HOST_WIDTH + 1, width);
279 static void fill_row(u8 node, u8 row, u32 value)
281 pci_write_config32(NODE_HT(node), 0x40+(row<<2), value);
284 static void setup_row(u8 source, u8 dest, u8 cpus)
286 fill_row(source,dest,generate_row(source,dest,cpus));
289 static void setup_temp_row(u8 source, u8 dest, u8 cpus)
291 fill_row(source,7,generate_temp_row(source,dest,cpus));
294 static void setup_node(u8 node, u8 cpus)
297 for(row=0; row<cpus; row++)
298 setup_row(node, row, cpus);
301 static void setup_remote_row(u8 source, u8 dest, u8 cpus)
303 fill_row(7, dest, generate_row(source, dest, cpus));
306 static void setup_remote_node(u8 node, u8 cpus)
308 static const uint8_t pci_reg[] = {
309 0x44, 0x4c, 0x54, 0x5c, 0x64, 0x6c, 0x74, 0x7c,
310 0x40, 0x48, 0x50, 0x58, 0x60, 0x68, 0x70, 0x78,
311 0x84, 0x8c, 0x94, 0x9c, 0xa4, 0xac, 0xb4, 0xbc,
312 0x80, 0x88, 0x90, 0x98, 0xa0, 0xa8, 0xb0, 0xb8,
313 0xc4, 0xcc, 0xd4, 0xdc,
314 0xc0, 0xc8, 0xd0, 0xd8,
315 0xe0, 0xe4, 0xe8, 0xec,
320 print_spew("setup_remote_node: ");
321 for(row=0; row<cpus; row++)
322 setup_remote_row(node, row, cpus);
324 /* copy the default resource map from node 0 */
325 for(i = 0; i < sizeof(pci_reg)/sizeof(pci_reg[0]); i++) {
329 value = pci_read_config32(NODE_MP(0), reg);
330 pci_write_config32(NODE_MP(7), reg, value);
333 print_spew("done\r\n");
338 #if CONFIG_MAX_CPUS > 2
339 static void setup_temp_node(u8 node, u8 cpus)
342 for(row=0; row<cpus; row++)
343 fill_row(7,row,generate_row(node,row,cpus));
347 static void setup_uniprocessor(void)
349 print_spew("Enabling UP settings\r\n");
353 struct setup_smp_result {
358 #if CONFIG_MAX_CPUS > 1
359 static struct setup_smp_result setup_smp(void)
361 struct setup_smp_result result;
363 result.needs_reset = 0;
365 print_spew("Enabling SMP settings\r\n");
367 setup_row(0, 0, result.cpus);
368 /* Setup and check a temporary connection to node 1 */
369 setup_temp_row(0, 1, result.cpus);
371 if (!check_connection(0, 7, link_to_register(link_connection(0,1)))) {
372 print_debug("No connection to Node 1.\r\n");
373 clear_temp_row(0); /* delete temp connection */
374 setup_uniprocessor(); /* and get up working */
379 /* We found 2 nodes so far */
381 setup_node(0, result.cpus); /* Node 1 is there. Setup Node 0 correctly */
382 setup_remote_node(1, result.cpus); /* Setup the routes on the remote node */
383 rename_temp_node(1); /* Rename Node 7 to Node 1 */
384 enable_routing(1); /* Enable routing on Node 1 */
386 clear_temp_row(0); /* delete temporary connection */
389 optimize_connection(NODE_HT(0), 0x80 + link_to_register(link_connection(0,1)),
390 NODE_HT(1), 0x80 + link_to_register(link_connection(1,0)) );
392 #if CONFIG_MAX_CPUS > 2
395 /* Setup and check temporary connection from Node 0 to Node 2 */
396 setup_temp_row(0,2, result.cpus);
398 if (!check_connection(0, 7, link_to_register(link_connection(0,2))) ) {
399 print_debug("No connection to Node 2.\r\n");
400 clear_temp_row(0); /* delete temp connection */
405 /* We found 3 nodes so far. Now setup a temporary
406 * connection from node 0 to node 3 via node 1
409 setup_temp_row(0,1, result.cpus); /* temp. link between nodes 0 and 1 */
410 setup_temp_row(1,3, result.cpus); /* temp. link between nodes 1 and 3 */
412 if (!check_connection(1, 7, link_to_register(link_connection(1,3)))) {
413 print_debug("No connection to Node 3.\r\n");
414 clear_temp_row(0); /* delete temp connection */
415 clear_temp_row(1); /* delete temp connection */
420 /* We found 4 nodes so far. Now setup all nodes for 4p */
422 setup_node(0, result.cpus); /* The first 2 nodes are configured */
423 setup_node(1, result.cpus); /* already. Just configure them for 4p */
425 setup_temp_row(0,2, result.cpus);
426 setup_temp_node(2, result.cpus);
430 setup_temp_row(0,1, result.cpus);
431 setup_temp_row(1,3, result.cpus);
432 setup_temp_node(3, result.cpus);
434 enable_routing(3); /* enable routing on node 3 (temp.) */
441 /* optimize physical connections - by LYH */
442 result.needs_reset = optimize_connection(
443 NODE_HT(0), 0x80 + link_to_register(link_connection(0,2)),
444 NODE_HT(2), 0x80 + link_to_register(link_connection(2,0)) );
446 result.needs_reset = optimize_connection(
447 NODE_HT(1), 0x80 + link_to_register(link_connection(1,3)),
448 NODE_HT(3), 0x80 + link_to_register(link_connection(3,1)) );
450 result.needs_reset = optimize_connection(
451 NODE_HT(2), 0x80 + link_to_register(link_connection(2,3)),
452 NODE_HT(3), 0x80 + link_to_register(link_connection(3,2)) );
454 #endif /* CONFIG_MAX_CPUS > 2 */
456 print_debug_hex8(result.cpus);
457 print_debug(" nodes initialized.\r\n");
462 #if CONFIG_MAX_CPUS > 1
463 static unsigned verify_mp_capabilities(unsigned cpus)
465 unsigned node, row, mask;
469 mask=0x06; /* BigMPCap */
471 mask=0x02; /* MPCap */
474 for (node=0; node<cpus; node++) {
475 if ((pci_read_config32(NODE_MC(node), 0xe8) & mask) != mask) {
484 /* one of our cpus is not mp capable */
486 print_err("One of the CPUs is not MP capable. Going back to UP\r\n");
488 for (node = cpus; node > 0; node--) {
489 for (row = cpus; row > 0; row--) {
490 fill_row(NODE_HT(node-1), row-1, DEFAULT);
493 setup_uniprocessor();
499 static void coherent_ht_finalize(unsigned cpus)
504 /* set up cpu count and node count and enable Limit
505 * Config Space Range for all available CPUs.
506 * Also clear non coherent hypertransport bus range
507 * registers on Hammer A0 revision.
511 print_debug("coherent_ht_finalize\r\n");
513 rev_a0 = is_cpu_rev_a0();
514 for (node = 0; node < cpus; node++) {
519 /* Set the Total CPU and Node count in the system */
520 val = pci_read_config32(dev, 0x60);
521 val &= (~0x000F0070);
522 val |= ((cpus-1)<<16)|((cpus-1)<<4);
523 pci_write_config32(dev, 0x60, val);
525 /* Only respond to real cpu pci configuration cycles
526 * and optimize the HT settings
528 val=pci_read_config32(dev, 0x68);
529 val &= ~((HTTC_BUF_REL_PRI_MASK << HTTC_BUF_REL_PRI_SHIFT) |
530 (HTTC_MED_PRI_BYP_CNT_MASK << HTTC_MED_PRI_BYP_CNT_SHIFT) |
531 (HTTC_HI_PRI_BYP_CNT_MASK << HTTC_HI_PRI_BYP_CNT_SHIFT));
532 val |= HTTC_LIMIT_CLDT_CFG |
533 (HTTC_BUF_REL_PRI_8 << HTTC_BUF_REL_PRI_SHIFT) |
535 (3 << HTTC_MED_PRI_BYP_CNT_SHIFT) |
536 (3 << HTTC_HI_PRI_BYP_CNT_SHIFT);
537 pci_write_config32(dev, 0x68, val);
540 print_debug("shit it is an old cup\n");
541 pci_write_config32(dev, 0x94, 0);
542 pci_write_config32(dev, 0xb4, 0);
543 pci_write_config32(dev, 0xd4, 0);
548 print_debug("done\r\n");
552 static int apply_cpu_errata_fixes(unsigned cpus, int needs_reset)
555 for(node = 0; node < cpus; node++) {
559 if (is_cpu_pre_c0()) {
562 * Limit the number of downstream posted requests to 1
564 cmd = pci_read_config32(dev, 0x70);
565 if ((cmd & (3 << 0)) != 2) {
568 pci_write_config32(dev, 0x70, cmd );
571 cmd = pci_read_config32(dev, 0x7c);
572 if ((cmd & (3 << 4)) != 0) {
575 pci_write_config32(dev, 0x7c, cmd );
578 /* Clock Power/Timing Low */
579 cmd = pci_read_config32(dev, 0xd4);
580 if (cmd != 0x000D0001) {
582 pci_write_config32(dev, 0xd4, cmd);
583 needs_reset = 1; /* Needed? */
590 * Set Clk Ramp Hystersis to 7
591 * Clock Power/Timing Low
593 cmd_ref = 0x04e20707; /* Registered */
594 cmd = pci_read_config32(dev, 0xd4);
596 pci_write_config32(dev, 0xd4, cmd_ref );
597 needs_reset = 1; /* Needed? */
604 static int optimize_link_read_pointers(unsigned cpus, int needs_reset)
607 for(node = 0; node < cpus; node = node + 1) {
608 device_t f0_dev, f3_dev;
609 uint32_t cmd_ref, cmd;
611 f0_dev = NODE_HT(node);
612 f3_dev = NODE_MC(node);
613 cmd_ref = cmd = pci_read_config32(f3_dev, 0xdc);
614 for(link = 0; link < 3; link = link + 1) {
617 reg = 0x98 + (link * 0x20);
618 link_type = pci_read_config32(f0_dev, reg);
619 if (link_type & LinkConnected) {
620 cmd &= 0xff << (link *8);
621 /* FIXME this assumes the device on the other
622 * side is an AMD device */
623 cmd |= 0x25 << (link *8);
626 if (cmd != cmd_ref) {
627 pci_write_config32(f3_dev, 0xdc, cmd);
634 static int setup_coherent_ht_domain(void)
636 struct setup_smp_result result;
638 result.needs_reset = 0;
640 enable_bsp_routing();
642 #if CONFIG_MAX_CPUS == 1
643 setup_uniprocessor();
645 result = setup_smp();
646 result.cpus = verify_mp_capabilities(result.cpus);
649 coherent_ht_finalize(result.cpus);
650 result.needs_reset = apply_cpu_errata_fixes(result.cpus, result.needs_reset);
652 #if CONFIG_MAX_CPUS > 1 /* Why doesn't this work on the solo? */
653 result.needs_reset = optimize_link_read_pointers(result.cpus, result.needs_reset);
656 return result.needs_reset;