1 /* coherent hypertransport initialization for AMD64
2 * written by Stefan Reinauer <stepan@openbios.org>
3 * (c) 2003 by SuSE Linux AG
5 * This code is licensed under GPL.
9 * This algorithm assumes a grid configuration as follows:
12 * org. : 1x1 2x1 2x2 2x3 2x4
16 #include <device/pci_def.h>
17 #include <device/pci_ids.h>
18 #include <device/hypertransport_def.h>
19 #include "arch/romcc_io.h"
23 * Until we have a completely dynamic setup we want
24 * to be able to map different cpu graphs.
32 * set some default values. These are used if they are not
33 * differently defined in the motherboard's auto.c file.
34 * See src/mainboard/amd/quartet/auto.c for an example.
37 #ifndef CONNECTION_0_1
38 #define CONNECTION_0_1 ACROSS
41 #ifndef CONNECTION_0_2
42 #define CONNECTION_0_2 UP
45 #ifndef CONNECTION_1_3
46 #define CONNECTION_1_3 UP
49 /* when generating a temporary row configuration we
50 * don't want broadcast to be enabled for that node.
53 #define generate_temp_row(x...) ((generate_row(x)&(~0x0f0000))|0x010000)
54 #define clear_temp_row(x) fill_row(x,7,DEFAULT)
55 #define enable_bsp_routing() enable_routing(0)
57 #define NODE_HT(x) PCI_DEV(0,24+x,0)
58 #define NODE_MP(x) PCI_DEV(0,24+x,1)
59 #define NODE_MC(x) PCI_DEV(0,24+x,3)
61 #define DEFAULT 0x00010101 /* default row entry */
70 static void disable_probes(void)
72 /* disable read/write/fill probes for uniprocessor setup
73 * they don't make sense if only one cpu is available
76 /* Hypetransport Transaction Control Register
78 * [ 0: 0] Disable read byte probe
80 * 1 = Probes not issued
81 * [ 1: 1] Disable Read Doubleword probe
83 * 1 = Probes not issued
84 * [ 2: 2] Disable write byte probes
86 * 1 = Probes not issued
87 * [ 3: 3] Disable Write Doubleword Probes
89 * 1 = Probes not issued.
90 * [10:10] Disable Fill Probe
91 * 0 = Probes issued for cache fills
92 * 1 = Probes not issued for cache fills.
97 print_spew("Disabling read/write/fill probes for UP... ");
99 val=pci_read_config32(NODE_HT(0), 0x68);
100 val |= (1<<10)|(1<<9)|(1<<8)|(1<<4)|(1<<3)|(1<<2)|(1<<1)|(1 << 0);
101 pci_write_config32(NODE_HT(0), 0x68, val);
103 print_spew("done.\r\n");
107 static void enable_routing(u8 node)
111 /* HT Initialization Control Register
113 * [ 0: 0] Routing Table Disable
114 * 0 = Packets are routed according to routing tables
115 * 1 = Packets are routed according to the default link field
116 * [ 1: 1] Request Disable (BSP should clear this)
117 * 0 = Request packets may be generated
118 * 1 = Request packets may not be generated.
119 * [ 3: 2] Default Link (Read-only)
123 * 11 = CPU on same node
125 * - Scratch bit cleared by a cold reset
126 * [ 5: 5] BIOS Reset Detect
127 * - Scratch bit cleared by a cold reset
128 * [ 6: 6] INIT Detect
129 * - Scratch bit cleared by a warm or cold reset not by an INIT
133 /* Enable routing table */
134 print_spew("Enabling routing table for node ");
135 print_spew_hex32(node);
137 val=pci_read_config32(NODE_HT(node), 0x6c);
138 val &= ~((1<<1)|(1<<0));
139 pci_write_config32(NODE_HT(node), 0x6c, val);
141 print_spew(" done.\r\n");
144 #if CONFIG_MAX_CPUS > 1
146 static void rename_temp_node(u8 node)
150 print_spew("Renaming current temp node to ");
151 print_spew_hex32(node);
153 val=pci_read_config32(NODE_HT(7), 0x60);
154 val &= (~7); /* clear low bits. */
155 val |= node; /* new node */
156 pci_write_config32(NODE_HT(7), 0x60, val);
158 print_spew(" done.\r\n");
161 static bool check_connection(u8 src, u8 dest, u8 link)
163 /* See if we have a valid connection to dest */
166 /* Detect if the coherent HT link is connected. */
167 val = pci_read_config32(NODE_HT(src), 0x98+link);
168 if ( (val&0x17) != 0x03)
171 /* Verify that the coherent hypertransport link is
172 * established and actually working by reading the
173 * remode node's vendor/device id
175 val = pci_read_config32(NODE_HT(dest),0);
176 if(val != 0x11001022)
182 static unsigned read_freq_cap(device_t dev, unsigned pos)
184 /* Handle bugs in valid hypertransport frequency reporting */
188 freq_cap = pci_read_config16(dev, pos);
189 freq_cap &= ~(1 << HT_FREQ_VENDOR); /* Ignore Vendor HT frequencies */
191 id = pci_read_config32(dev, 0);
193 /* AMD 8131 Errata 48 */
194 if (id == (PCI_VENDOR_ID_AMD | (PCI_DEVICE_ID_AMD_8131_PCIX << 16))) {
195 freq_cap &= ~(1 << HT_FREQ_800Mhz);
197 /* AMD 8151 Errata 23 */
198 if (id == (PCI_VENDOR_ID_AMD | (PCI_DEVICE_ID_AMD_8151_SYSCTRL << 16))) {
199 freq_cap &= ~(1 << HT_FREQ_800Mhz);
201 /* AMD K8 Unsupported 1Ghz? */
202 if (id == (PCI_VENDOR_ID_AMD | (0x1100 << 16))) {
203 freq_cap &= ~(1 << HT_FREQ_1000Mhz);
208 static int optimize_connection(device_t node1, uint8_t link1, device_t node2, uint8_t link2)
210 static const uint8_t link_width_to_pow2[]= { 3, 4, 0, 5, 1, 2, 0, 0 };
211 static const uint8_t pow2_to_link_width[] = { 0x7, 4, 5, 0, 1, 3 };
212 uint16_t freq_cap1, freq_cap2, freq_cap, freq_mask;
213 uint8_t width_cap1, width_cap2, width_cap, width, old_width, ln_width1, ln_width2;
214 uint8_t freq, old_freq;
216 /* Set link width and frequency */
218 /* Initially assume everything is already optimized and I don't need a reset */
221 /* Get the frequency capabilities */
222 freq_cap1 = read_freq_cap(node1, link1 + PCI_HT_CAP_HOST_FREQ_CAP);
223 freq_cap2 = read_freq_cap(node2, link2 + PCI_HT_CAP_HOST_FREQ_CAP);
225 /* Calculate the highest possible frequency */
226 freq = log2(freq_cap1 & freq_cap2);
228 /* See if I am changing the link freqency */
229 old_freq = pci_read_config8(node1, link1 + PCI_HT_CAP_HOST_FREQ);
230 needs_reset |= old_freq != freq;
231 old_freq = pci_read_config8(node2, link2 + PCI_HT_CAP_HOST_FREQ);
232 needs_reset |= old_freq != freq;
234 /* Set the Calulcated link frequency */
235 pci_write_config8(node1, link1 + PCI_HT_CAP_HOST_FREQ, freq);
236 pci_write_config8(node2, link2 + PCI_HT_CAP_HOST_FREQ, freq);
238 /* Get the width capabilities */
239 width_cap1 = pci_read_config8(node1, link1 + PCI_HT_CAP_HOST_WIDTH);
240 width_cap2 = pci_read_config8(node2, link2 + PCI_HT_CAP_HOST_WIDTH);
242 /* Calculate node1's input width */
243 ln_width1 = link_width_to_pow2[width_cap1 & 7];
244 ln_width2 = link_width_to_pow2[(width_cap2 >> 4) & 7];
245 if (ln_width1 > ln_width2) {
246 ln_width1 = ln_width2;
248 width = pow2_to_link_width[ln_width1];
249 /* Calculate node1's output width */
250 ln_width1 = link_width_to_pow2[(width_cap1 >> 4) & 7];
251 ln_width2 = link_width_to_pow2[width_cap2 & 7];
252 if (ln_width1 > ln_width2) {
253 ln_width1 = ln_width2;
255 width |= pow2_to_link_width[ln_width1] << 4;
257 /* See if I am changing node1's width */
258 old_width = pci_read_config8(node1, link1 + PCI_HT_CAP_HOST_WIDTH + 1);
259 needs_reset |= old_width != width;
261 /* Set node1's widths */
262 pci_write_config8(node1, link1 + PCI_HT_CAP_HOST_WIDTH + 1, width);
264 /* Calculate node2's width */
265 width = ((width & 0x70) >> 4) | ((width & 0x7) << 4);
267 /* See if I am changing node2's width */
268 old_width = pci_read_config8(node2, link2 + PCI_HT_CAP_HOST_WIDTH + 1);
269 needs_reset |= old_width != width;
271 /* Set node2's widths */
272 pci_write_config8(node2, link2 + PCI_HT_CAP_HOST_WIDTH + 1, width);
277 static void fill_row(u8 node, u8 row, u32 value)
279 pci_write_config32(NODE_HT(node), 0x40+(row<<2), value);
282 static void setup_row(u8 source, u8 dest, u8 cpus)
284 fill_row(source,dest,generate_row(source,dest,cpus));
287 static void setup_temp_row(u8 source, u8 dest, u8 cpus)
289 fill_row(source,7,generate_temp_row(source,dest,cpus));
292 static void setup_node(u8 node, u8 cpus)
295 for(row=0; row<cpus; row++)
296 setup_row(node, row, cpus);
299 static void setup_remote_row(u8 source, u8 dest, u8 cpus)
301 fill_row(7, dest, generate_row(source, dest, cpus));
304 static void setup_remote_node(u8 node, u8 cpus)
306 static const uint8_t pci_reg[] = {
307 0x44, 0x4c, 0x54, 0x5c, 0x64, 0x6c, 0x74, 0x7c,
308 0x40, 0x48, 0x50, 0x58, 0x60, 0x68, 0x70, 0x78,
309 0x84, 0x8c, 0x94, 0x9c, 0xa4, 0xac, 0xb4, 0xbc,
310 0x80, 0x88, 0x90, 0x98, 0xa0, 0xa8, 0xb0, 0xb8,
311 0xc4, 0xcc, 0xd4, 0xdc,
312 0xc0, 0xc8, 0xd0, 0xd8,
313 0xe0, 0xe4, 0xe8, 0xec,
318 print_spew("setup_remote_node\r\n");
319 for(row=0; row<cpus; row++)
320 setup_remote_row(node, row, cpus);
322 /* copy the default resource map from node 0 */
323 for(i = 0; i < sizeof(pci_reg)/sizeof(pci_reg[0]); i++) {
327 value = pci_read_config32(NODE_MP(0), reg);
328 pci_write_config32(NODE_MP(7), reg, value);
331 print_spew("setup_remote_done\r\n");
336 #if CONFIG_MAX_CPUS > 2
337 static void setup_temp_node(u8 node, u8 cpus)
340 for(row=0; row<cpus; row++)
341 fill_row(7,row,generate_row(node,row,cpus));
345 static void setup_uniprocessor(void)
347 print_spew("Enabling UP settings\r\n");
351 struct setup_smp_result {
356 #if CONFIG_MAX_CPUS > 1
357 static struct setup_smp_result setup_smp(void)
359 struct setup_smp_result result;
361 result.needs_reset = 0;
363 print_spew("Enabling SMP settings\r\n");
365 setup_row(0, 0, result.cpus);
366 /* Setup and check a temporary connection to node 1 */
367 setup_temp_row(0, 1, result.cpus);
369 if (!check_connection(0, 7, CONNECTION_0_1)) {
370 print_spew("No connection to Node 1.\r\n");
371 clear_temp_row(0); /* delete temp connection */
372 setup_uniprocessor(); /* and get up working */
377 /* We found 2 nodes so far */
379 optimize_connection(NODE_HT(0), 0x80 + CONNECTION_0_1, NODE_HT(7), 0x80 + CONNECTION_0_1);
380 setup_node(0, result.cpus); /* Node 1 is there. Setup Node 0 correctly */
381 setup_remote_node(1, result.cpus); /* Setup the routes on the remote node */
382 rename_temp_node(1); /* Rename Node 7 to Node 1 */
383 enable_routing(1); /* Enable routing on Node 1 */
385 clear_temp_row(0); /* delete temporary connection */
387 #if CONFIG_MAX_CPUS > 2
390 /* Setup and check temporary connection from Node 0 to Node 2 */
391 setup_temp_row(0,2, result.cpus);
393 if (!check_connection(0, 7, CONNECTION_0_2)) {
394 print_spew("No connection to Node 2.\r\n");
395 clear_temp_row(0); /* delete temp connection */
400 /* We found 3 nodes so far. Now setup a temporary
401 * connection from node 0 to node 3 via node 1
404 setup_temp_row(0,1, result.cpus); /* temp. link between nodes 0 and 1 */
405 setup_temp_row(1,3, result.cpus); /* temp. link between nodes 1 and 3 */
407 if (!check_connection(1, 7, CONNECTION_1_3)) {
408 print_spew("No connection to Node 3.\r\n");
409 clear_temp_row(0); /* delete temp connection */
410 clear_temp_row(1); /* delete temp connection */
415 #warning "FIXME optimize the physical connections"
417 /* We found 4 nodes so far. Now setup all nodes for 4p */
419 setup_node(0, result.cpus); /* The first 2 nodes are configured */
420 setup_node(1, result.cpus); /* already. Just configure them for 4p */
422 setup_temp_row(0,2, result.cpus);
423 setup_temp_node(2, result.cpus);
427 setup_temp_row(0,1, result.cpus);
428 setup_temp_row(1,3, result.cpus);
429 setup_temp_node(3, result.cpus);
431 enable_routing(3); /* enable routing on node 3 (temp.) */
439 print_debug_hex32(result.cpus);
440 print_debug(" nodes initialized.\r\n");
445 #if CONFIG_MAX_CPUS > 1
446 static unsigned verify_mp_capabilities(unsigned cpus)
448 unsigned node, row, mask;
452 mask=0x06; /* BigMPCap */
454 mask=0x02; /* MPCap */
457 for (node=0; node<cpus; node++) {
458 if ((pci_read_config32(NODE_MC(node), 0xe8) & mask) != mask) {
467 /* one of our cpus is not mp capable */
469 print_err("One of the CPUs is not MP capable. Going back to UP\r\n");
471 for (node = cpus; node > 0; node--) {
472 for (row = cpus; row > 0; row--) {
473 fill_row(NODE_HT(node-1), row-1, DEFAULT);
476 setup_uniprocessor();
482 static void coherent_ht_finalize(unsigned cpus)
487 /* set up cpu count and node count and enable Limit
488 * Config Space Range for all available CPUs.
489 * Also clear non coherent hypertransport bus range
490 * registers on Hammer A0 revision.
494 print_debug("coherent_ht_finalize\r\n");
496 rev_a0 = is_cpu_rev_a0();
497 for (node = 0; node < cpus; node++) {
502 /* Set the Total CPU and Node count in the system */
503 val = pci_read_config32(dev, 0x60);
504 val &= (~0x000F0070);
505 val |= ((cpus-1)<<16)|((cpus-1)<<4);
506 pci_write_config32(dev, 0x60, val);
508 /* Only respond to real cpu pci configuration cycles
509 * and optimize the HT settings
511 val=pci_read_config32(dev, 0x68);
512 val &= ~((HTTC_BUF_REL_PRI_MASK << HTTC_BUF_REL_PRI_SHIFT) |
513 (HTTC_MED_PRI_BYP_CNT_MASK << HTTC_MED_PRI_BYP_CNT_SHIFT) |
514 (HTTC_HI_PRI_BYP_CNT_MASK << HTTC_HI_PRI_BYP_CNT_SHIFT));
515 val |= HTTC_LIMIT_CLDT_CFG |
516 (HTTC_BUF_REL_PRI_8 << HTTC_BUF_REL_PRI_SHIFT) |
518 (3 << HTTC_MED_PRI_BYP_CNT_SHIFT) |
519 (3 << HTTC_HI_PRI_BYP_CNT_SHIFT);
520 pci_write_config32(dev, 0x68, val);
523 pci_write_config32(dev, 0x94, 0);
524 pci_write_config32(dev, 0xb4, 0);
525 pci_write_config32(dev, 0xd4, 0);
532 print_debug("done\r\n");
536 static int apply_cpu_errata_fixes(unsigned cpus, int needs_reset)
539 for(node = 0; node < cpus; node++) {
543 if (is_cpu_pre_c0()) {
546 * Limit the number of downstream posted requests to 1
548 cmd = pci_read_config32(dev, 0x70);
549 if ((cmd & (3 << 0)) != 2) {
552 pci_write_config32(dev, 0x70, cmd );
555 cmd = pci_read_config32(dev, 0x7c);
556 if ((cmd & (3 << 4)) != 0) {
559 pci_write_config32(dev, 0x7c, cmd );
562 /* Clock Power/Timing Low */
563 cmd = pci_read_config32(dev, 0xd4);
564 if (cmd != 0x000D0001) {
566 pci_write_config32(dev, 0xd4, cmd);
567 needs_reset = 1; /* Needed? */
574 * Set Clk Ramp Hystersis to 7
575 * Clock Power/Timing Low
577 cmd_ref = 0x04e20707; /* Registered */
578 cmd = pci_read_config32(dev, 0xd4);
580 pci_write_config32(dev, 0xd4, cmd_ref );
581 needs_reset = 1; /* Needed? */
588 static int optimize_link_read_pointers(unsigned cpus, int needs_reset)
591 for(node = 0; node < cpus; node = node + 1) {
592 device_t f0_dev, f3_dev;
593 uint32_t cmd_ref, cmd;
595 f0_dev = NODE_HT(node);
596 f3_dev = NODE_MC(node);
597 cmd_ref = cmd = pci_read_config32(f3_dev, 0xdc);
598 for(link = 0; link < 3; link = link + 1) {
601 reg = 0x98 + (link * 0x20);
602 link_type = pci_read_config32(f0_dev, reg);
603 if (link_type & LinkConnected) {
604 cmd &= 0xff << (link *8);
605 /* FIXME this assumes the device on the other side is an AMD device */
606 cmd |= 0x25 << (link *8);
609 if (cmd != cmd_ref) {
610 pci_write_config32(f3_dev, 0xdc, cmd);
617 static int setup_coherent_ht_domain(void)
619 struct setup_smp_result result;
621 result.needs_reset = 0;
623 enable_bsp_routing();
625 #if CONFIG_MAX_CPUS == 1
626 setup_uniprocessor();
628 result = setup_smp();
629 result.cpus = verify_mp_capabilities(result.cpus);
631 coherent_ht_finalize(result.cpus);
632 result.needs_reset = apply_cpu_errata_fixes(result.cpus, result.needs_reset);
633 #if CONFIG_MAX_CPUS > 1 /* Why doesn't this work on the solo? */
634 result.needs_reset = optimize_link_read_pointers(result.cpus, result.needs_reset);
637 return result.needs_reset;