1 /* coherent hypertransport initialization for AMD64
3 * written by Stefan Reinauer <stepan@openbios.org>
4 * (c) 2003-2004 by SuSE Linux AG
6 * This code is licensed under GPL.
10 * This algorithm assumes a grid configuration as follows:
13 * org. : 1x1 2x1 2x2 2x3 2x4
17 #include <device/pci_def.h>
18 #include <device/pci_ids.h>
19 #include <device/hypertransport_def.h>
20 #include "arch/romcc_io.h"
23 /* when generating a temporary row configuration we
24 * don't want broadcast to be enabled for that node.
27 #define generate_temp_row(...) ((generate_row(__VA_ARGS__)&(~0x0f0000))|0x010000)
28 #define enable_bsp_routing() enable_routing(0)
30 #define NODE_HT(x) PCI_DEV(0,24+x,0)
31 #define NODE_MP(x) PCI_DEV(0,24+x,1)
32 #define NODE_MC(x) PCI_DEV(0,24+x,3)
34 #define DEFAULT 0x00010101 /* default row entry */
43 static u8 link_to_register(int ldt)
46 * [ 0: 3] Request Route
47 * [0] Route to this node
53 if (ldt&0x08) return 0x40;
54 if (ldt&0x04) return 0x20;
55 if (ldt&0x02) return 0x00;
57 /* we should never get here */
58 print_spew("Unknown Link\n");
62 static int link_connection(int src, int dest)
64 /* we generate the needed link information from the rows
65 * by taking the Request Route of the according row.
68 return generate_row(src, dest, CONFIG_MAX_CPUS) & 0x0f;
71 static void disable_probes(void)
73 /* disable read/write/fill probes for uniprocessor setup
74 * they don't make sense if only one cpu is available
77 /* Hypetransport Transaction Control Register
79 * [ 0: 0] Disable read byte probe
81 * 1 = Probes not issued
82 * [ 1: 1] Disable Read Doubleword probe
84 * 1 = Probes not issued
85 * [ 2: 2] Disable write byte probes
87 * 1 = Probes not issued
88 * [ 3: 3] Disable Write Doubleword Probes
90 * 1 = Probes not issued.
91 * [10:10] Disable Fill Probe
92 * 0 = Probes issued for cache fills
93 * 1 = Probes not issued for cache fills.
98 print_spew("Disabling read/write/fill probes for UP... ");
100 val=pci_read_config32(NODE_HT(0), 0x68);
101 val |= (1<<10)|(1<<9)|(1<<8)|(1<<4)|(1<<3)|(1<<2)|(1<<1)|(1 << 0);
102 pci_write_config32(NODE_HT(0), 0x68, val);
104 print_spew("done.\r\n");
108 static void enable_routing(u8 node)
112 /* HT Initialization Control Register
114 * [ 0: 0] Routing Table Disable
115 * 0 = Packets are routed according to routing tables
116 * 1 = Packets are routed according to the default link field
117 * [ 1: 1] Request Disable (BSP should clear this)
118 * 0 = Request packets may be generated
119 * 1 = Request packets may not be generated.
120 * [ 3: 2] Default Link (Read-only)
124 * 11 = CPU on same node
126 * - Scratch bit cleared by a cold reset
127 * [ 5: 5] BIOS Reset Detect
128 * - Scratch bit cleared by a cold reset
129 * [ 6: 6] INIT Detect
130 * - Scratch bit cleared by a warm or cold reset not by an INIT
134 /* Enable routing table */
135 print_spew("Enabling routing table for node ");
136 print_spew_hex8(node);
138 val=pci_read_config32(NODE_HT(node), 0x6c);
139 val &= ~((1<<1)|(1<<0));
140 pci_write_config32(NODE_HT(node), 0x6c, val);
142 print_spew(" done.\r\n");
145 #if CONFIG_MAX_CPUS > 1
147 static void rename_temp_node(u8 node)
151 print_spew("Renaming current temporary node to ");
152 print_spew_hex8(node);
154 val=pci_read_config32(NODE_HT(7), 0x60);
155 val &= (~7); /* clear low bits. */
156 val |= node; /* new node */
157 pci_write_config32(NODE_HT(7), 0x60, val);
159 print_spew(" done.\r\n");
162 static bool check_connection(u8 src, u8 dest, u8 link)
164 /* See if we have a valid connection to dest */
167 /* Detect if the coherent HT link is connected. */
168 val = pci_read_config32(NODE_HT(src), 0x98+link);
169 if ( (val&0x17) != 0x03)
172 /* Verify that the coherent hypertransport link is
173 * established and actually working by reading the
174 * remode node's vendor/device id
176 val = pci_read_config32(NODE_HT(dest),0);
177 if(val != 0x11001022)
183 static unsigned read_freq_cap(device_t dev, unsigned pos)
185 /* Handle bugs in valid hypertransport frequency reporting */
189 freq_cap = pci_read_config16(dev, pos);
190 freq_cap &= ~(1 << HT_FREQ_VENDOR); /* Ignore Vendor HT frequencies */
192 id = pci_read_config32(dev, 0);
194 /* AMD 8131 Errata 48 */
195 if (id == (PCI_VENDOR_ID_AMD | (PCI_DEVICE_ID_AMD_8131_PCIX << 16))) {
196 freq_cap &= ~(1 << HT_FREQ_800Mhz);
198 /* AMD 8151 Errata 23 */
199 if (id == (PCI_VENDOR_ID_AMD | (PCI_DEVICE_ID_AMD_8151_SYSCTRL << 16))) {
200 freq_cap &= ~(1 << HT_FREQ_800Mhz);
202 /* AMD K8 Unsupported 1Ghz? */
203 if (id == (PCI_VENDOR_ID_AMD | (0x1100 << 16))) {
204 freq_cap &= ~(1 << HT_FREQ_1000Mhz);
209 static int optimize_connection(device_t node1, uint8_t link1, device_t node2, uint8_t link2)
211 static const uint8_t link_width_to_pow2[]= { 3, 4, 0, 5, 1, 2, 0, 0 };
212 static const uint8_t pow2_to_link_width[] = { 0x7, 4, 5, 0, 1, 3 };
213 uint16_t freq_cap1, freq_cap2, freq_cap, freq_mask;
214 uint8_t width_cap1, width_cap2, width_cap, width, old_width, ln_width1, ln_width2;
215 uint8_t freq, old_freq;
217 /* Set link width and frequency */
219 /* Initially assume everything is already optimized and I don't need a reset */
222 /* Get the frequency capabilities */
223 freq_cap1 = read_freq_cap(node1, link1 + PCI_HT_CAP_HOST_FREQ_CAP);
224 freq_cap2 = read_freq_cap(node2, link2 + PCI_HT_CAP_HOST_FREQ_CAP);
226 /* Calculate the highest possible frequency */
227 freq = log2(freq_cap1 & freq_cap2);
229 /* See if I am changing the link freqency */
230 old_freq = pci_read_config8(node1, link1 + PCI_HT_CAP_HOST_FREQ);
231 needs_reset |= old_freq != freq;
232 old_freq = pci_read_config8(node2, link2 + PCI_HT_CAP_HOST_FREQ);
233 needs_reset |= old_freq != freq;
235 /* Set the Calulcated link frequency */
236 pci_write_config8(node1, link1 + PCI_HT_CAP_HOST_FREQ, freq);
237 pci_write_config8(node2, link2 + PCI_HT_CAP_HOST_FREQ, freq);
239 /* Get the width capabilities */
240 width_cap1 = pci_read_config8(node1, link1 + PCI_HT_CAP_HOST_WIDTH);
241 width_cap2 = pci_read_config8(node2, link2 + PCI_HT_CAP_HOST_WIDTH);
243 /* Calculate node1's input width */
244 ln_width1 = link_width_to_pow2[width_cap1 & 7];
245 ln_width2 = link_width_to_pow2[(width_cap2 >> 4) & 7];
246 if (ln_width1 > ln_width2) {
247 ln_width1 = ln_width2;
249 width = pow2_to_link_width[ln_width1];
250 /* Calculate node1's output width */
251 ln_width1 = link_width_to_pow2[(width_cap1 >> 4) & 7];
252 ln_width2 = link_width_to_pow2[width_cap2 & 7];
253 if (ln_width1 > ln_width2) {
254 ln_width1 = ln_width2;
256 width |= pow2_to_link_width[ln_width1] << 4;
258 /* See if I am changing node1's width */
259 old_width = pci_read_config8(node1, link1 + PCI_HT_CAP_HOST_WIDTH + 1);
260 needs_reset |= old_width != width;
262 /* Set node1's widths */
263 pci_write_config8(node1, link1 + PCI_HT_CAP_HOST_WIDTH + 1, width);
265 /* Calculate node2's width */
266 width = ((width & 0x70) >> 4) | ((width & 0x7) << 4);
268 /* See if I am changing node2's width */
269 old_width = pci_read_config8(node2, link2 + PCI_HT_CAP_HOST_WIDTH + 1);
270 needs_reset |= old_width != width;
272 /* Set node2's widths */
273 pci_write_config8(node2, link2 + PCI_HT_CAP_HOST_WIDTH + 1, width);
278 static void fill_row(u8 node, u8 row, u32 value)
280 pci_write_config32(NODE_HT(node), 0x40+(row<<2), value);
283 static void setup_row(u8 source, u8 dest, u8 nodes)
285 fill_row(source,dest,generate_row(source,dest,nodes));
288 static void setup_temp_row(u8 source, u8 dest, u8 nodes)
290 fill_row(source,7,generate_temp_row(source,dest,nodes));
293 static void setup_node(u8 node, u8 nodes)
296 for(row=0; row<nodes; row++)
297 setup_row(node, row, nodes);
300 static void setup_remote_row(u8 source, u8 dest, u8 nodes)
302 fill_row(7, dest, generate_row(source, dest, nodes));
305 static void setup_remote_node(u8 node, u8 nodes)
307 static const uint8_t pci_reg[] = {
308 0x44, 0x4c, 0x54, 0x5c, 0x64, 0x6c, 0x74, 0x7c,
309 0x40, 0x48, 0x50, 0x58, 0x60, 0x68, 0x70, 0x78,
310 0x84, 0x8c, 0x94, 0x9c, 0xa4, 0xac, 0xb4, 0xbc,
311 0x80, 0x88, 0x90, 0x98, 0xa0, 0xa8, 0xb0, 0xb8,
312 0xc4, 0xcc, 0xd4, 0xdc,
313 0xc0, 0xc8, 0xd0, 0xd8,
314 0xe0, 0xe4, 0xe8, 0xec,
319 print_spew("setup_remote_node: ");
320 for(row=0; row<nodes; row++)
321 setup_remote_row(node, row, nodes);
323 /* copy the default resource map from node 0 */
324 for(i = 0; i < sizeof(pci_reg)/sizeof(pci_reg[0]); i++) {
328 value = pci_read_config32(NODE_MP(0), reg);
329 pci_write_config32(NODE_MP(7), reg, value);
332 print_spew("done\r\n");
337 #if CONFIG_MAX_CPUS > 2
338 static void setup_temp_node(u8 node, u8 nodes)
341 for(row=0; row<nodes; row++)
342 fill_row(7,row,generate_row(node,row,nodes));
346 static void setup_uniprocessor(void)
348 print_spew("Enabling UP settings\r\n");
352 struct setup_smp_result {
357 #if CONFIG_MAX_CPUS > 1
358 static struct setup_smp_result setup_smp(void)
360 struct setup_smp_result result;
362 result.needs_reset = 0;
364 print_spew("Enabling SMP settings\r\n");
366 setup_row(0, 0, result.nodes);
367 /* Setup and check a temporary connection to node 1 */
368 setup_temp_row(0, 1, result.nodes);
370 if (!check_connection(0, 7, link_to_register(link_connection(0,1)))) {
371 print_spew("No connection to Node 1.\r\n");
372 setup_uniprocessor(); /* and get up working */
377 /* We found 2 nodes so far */
379 setup_node(0, result.nodes); /* Node 1 is there. Setup Node 0 correctly */
380 setup_remote_node(1, result.nodes); /* Setup the routes on the remote node */
381 rename_temp_node(1); /* Rename Node 7 to Node 1 */
382 enable_routing(1); /* Enable routing on Node 1 */
384 result.needs_reset = optimize_connection(
385 NODE_HT(0), 0x80 + link_to_register(link_connection(0,1)),
386 NODE_HT(1), 0x80 + link_to_register(link_connection(1,0)) );
388 #if CONFIG_MAX_CPUS > 2
391 /* Setup and check temporary connection from Node 0 to Node 2 */
392 setup_temp_row(0,2, result.nodes);
394 if (!check_connection(0, 7, link_to_register(link_connection(0,2))) ) {
395 print_spew("No connection to Node 2.\r\n");
400 /* We found 3 nodes so far. Now setup a temporary
401 * connection from node 0 to node 3 via node 1
404 setup_temp_row(0,1, result.nodes); /* temp. link between nodes 0 and 1 */
405 setup_temp_row(1,3, result.nodes); /* temp. link between nodes 1 and 3 */
407 if (!check_connection(1, 7, link_to_register(link_connection(1,3)))) {
408 print_spew("No connection to Node 3.\r\n");
413 /* We found 4 nodes so far. Now setup all nodes for 4p */
415 setup_node(0, result.nodes); /* The first 2 nodes are configured */
416 setup_node(1, result.nodes); /* already. Just configure them for 4p */
418 setup_temp_row(0,2, result.nodes);
419 setup_temp_node(2, result.nodes);
423 setup_temp_row(0,1, result.nodes);
424 setup_temp_row(1,3, result.nodes);
425 setup_temp_node(3, result.nodes);
427 enable_routing(3); /* enable routing on node 3 (temp.) */
429 /* optimize physical connections - by LYH */
430 result.needs_reset = optimize_connection(
431 NODE_HT(0), 0x80 + link_to_register(link_connection(0,2)),
432 NODE_HT(2), 0x80 + link_to_register(link_connection(2,0)) );
434 result.needs_reset = optimize_connection(
435 NODE_HT(1), 0x80 + link_to_register(link_connection(1,3)),
436 NODE_HT(3), 0x80 + link_to_register(link_connection(3,1)) );
438 result.needs_reset = optimize_connection(
439 NODE_HT(2), 0x80 + link_to_register(link_connection(2,3)),
440 NODE_HT(3), 0x80 + link_to_register(link_connection(3,2)) );
442 #endif /* CONFIG_MAX_CPUS > 2 */
444 print_debug_hex8(result.nodes);
445 print_debug(" nodes initialized.\r\n");
450 static unsigned verify_mp_capabilities(unsigned nodes)
452 unsigned node, row, mask;
456 mask=0x06; /* BigMPCap */
457 } else if (nodes == 2) {
458 mask=0x02; /* MPCap */
460 mask=0x00; /* Non SMP */
463 for (node=0; node<nodes; node++) {
464 if ((pci_read_config32(NODE_MC(node), 0xe8) & mask) != mask) {
473 /* one of our nodes is not mp capable */
475 print_err("One of the CPUs is not MP capable. Going back to UP\r\n");
479 static void clear_dead_routes(unsigned nodes)
487 for(node = 7; node >= 0; node--) {
488 for(row = 7; row >= last_row; row--) {
489 fill_row(node, row, DEFAULT);
494 static void coherent_ht_finalize(unsigned nodes)
499 /* set up cpu count and node count and enable Limit
500 * Config Space Range for all available CPUs.
501 * Also clear non coherent hypertransport bus range
502 * registers on Hammer A0 revision.
505 print_spew("coherent_ht_finalize\r\n");
506 rev_a0 = is_cpu_rev_a0();
507 for (node = 0; node < nodes; node++) {
512 /* Set the Total CPU and Node count in the system */
513 val = pci_read_config32(dev, 0x60);
514 val &= (~0x000F0070);
515 val |= ((nodes-1)<<16)|((nodes-1)<<4);
516 pci_write_config32(dev, 0x60, val);
518 /* Only respond to real cpu pci configuration cycles
519 * and optimize the HT settings
521 val=pci_read_config32(dev, 0x68);
522 val &= ~((HTTC_BUF_REL_PRI_MASK << HTTC_BUF_REL_PRI_SHIFT) |
523 (HTTC_MED_PRI_BYP_CNT_MASK << HTTC_MED_PRI_BYP_CNT_SHIFT) |
524 (HTTC_HI_PRI_BYP_CNT_MASK << HTTC_HI_PRI_BYP_CNT_SHIFT));
525 val |= HTTC_LIMIT_CLDT_CFG |
526 (HTTC_BUF_REL_PRI_8 << HTTC_BUF_REL_PRI_SHIFT) |
528 (3 << HTTC_MED_PRI_BYP_CNT_SHIFT) |
529 (3 << HTTC_HI_PRI_BYP_CNT_SHIFT);
530 pci_write_config32(dev, 0x68, val);
533 print_spew("shit it is an old cup\n");
534 pci_write_config32(dev, 0x94, 0);
535 pci_write_config32(dev, 0xb4, 0);
536 pci_write_config32(dev, 0xd4, 0);
540 print_spew("done\r\n");
543 static int apply_cpu_errata_fixes(unsigned nodes, int needs_reset)
546 for(node = 0; node < nodes; node++) {
550 if (is_cpu_pre_c0()) {
553 * Limit the number of downstream posted requests to 1
555 cmd = pci_read_config32(dev, 0x70);
556 if ((cmd & (3 << 0)) != 2) {
559 pci_write_config32(dev, 0x70, cmd );
562 cmd = pci_read_config32(dev, 0x7c);
563 if ((cmd & (3 << 4)) != 0) {
566 pci_write_config32(dev, 0x7c, cmd );
569 /* Clock Power/Timing Low */
570 cmd = pci_read_config32(dev, 0xd4);
571 if (cmd != 0x000D0001) {
573 pci_write_config32(dev, 0xd4, cmd);
574 needs_reset = 1; /* Needed? */
581 * Set Clk Ramp Hystersis to 7
582 * Clock Power/Timing Low
584 cmd_ref = 0x04e20707; /* Registered */
585 cmd = pci_read_config32(dev, 0xd4);
587 pci_write_config32(dev, 0xd4, cmd_ref );
588 needs_reset = 1; /* Needed? */
595 static int optimize_link_read_pointers(unsigned nodes, int needs_reset)
598 for(node = 0; node < nodes; node = node + 1) {
599 device_t f0_dev, f3_dev;
600 uint32_t cmd_ref, cmd;
602 f0_dev = NODE_HT(node);
603 f3_dev = NODE_MC(node);
604 cmd_ref = cmd = pci_read_config32(f3_dev, 0xdc);
605 for(link = 0; link < 3; link = link + 1) {
608 reg = 0x98 + (link * 0x20);
609 link_type = pci_read_config32(f0_dev, reg);
610 if (link_type & LinkConnected) {
611 cmd &= 0xff << (link *8);
612 /* FIXME this assumes the device on the other
613 * side is an AMD device
615 cmd |= 0x25 << (link *8);
618 if (cmd != cmd_ref) {
619 pci_write_config32(f3_dev, 0xdc, cmd);
626 static int setup_coherent_ht_domain(void)
628 struct setup_smp_result result;
630 result.needs_reset = 0;
632 enable_bsp_routing();
634 #if CONFIG_MAX_CPUS > 1
635 result = setup_smp();
637 result.nodes = verify_mp_capabilities(result.nodes);
638 clear_dead_routes(result.nodes);
639 if (result.nodes == 1) {
640 setup_uniprocessor();
642 coherent_ht_finalize(result.nodes);
643 result.needs_reset = apply_cpu_errata_fixes(result.nodes, result.needs_reset);
645 #if CONFIG_MAX_CPUS > 1 /* Why doesn't this work on the solo? */
646 result.needs_reset = optimize_link_read_pointers(result.nodes, result.needs_reset);
649 return result.needs_reset;