1 /* coherent hypertransport initialization for AMD64
3 * written by Stefan Reinauer <stepan@openbios.org>
4 * (c) 2003-2004 by SuSE Linux AG
6 * (c) 2004 Tyan Computer
7 * 2004.12 yhlu added support to create support to create routing table dynamically.
8 * it also support 8 ways too. (8 ways ladder or 8 ways crossbar)
9 * This code is licensed under GPL.
13 * This algorithm assumes a grid configuration as follows:
16 * org. : 1x1 2x1 2x2 2x3 2x4
20 #include <device/pci_def.h>
21 #include <device/pci_ids.h>
22 #include <device/hypertransport_def.h>
23 #include "arch/romcc_io.h"
26 /* when generating a temporary row configuration we
27 * don't want broadcast to be enabled for that node.
30 #define enable_bsp_routing() enable_routing(0)
32 #define NODE_HT(x) PCI_DEV(0,24+x,0)
33 #define NODE_MP(x) PCI_DEV(0,24+x,1)
34 #define NODE_MC(x) PCI_DEV(0,24+x,3)
36 #define DEFAULT 0x00010101 /* default row entry */
45 static u8 link_to_register(int ldt)
48 * [ 0: 3] Request Route
49 * [0] Route to this node
55 if (ldt&0x08) return 0x40;
56 if (ldt&0x04) return 0x20;
57 if (ldt&0x02) return 0x00;
59 /* we should never get here */
60 print_spew("Unknown Link\n");
64 static void disable_probes(void)
66 /* disable read/write/fill probes for uniprocessor setup
67 * they don't make sense if only one cpu is available
70 /* Hypetransport Transaction Control Register
72 * [ 0: 0] Disable read byte probe
74 * 1 = Probes not issued
75 * [ 1: 1] Disable Read Doubleword probe
77 * 1 = Probes not issued
78 * [ 2: 2] Disable write byte probes
80 * 1 = Probes not issued
81 * [ 3: 3] Disable Write Doubleword Probes
83 * 1 = Probes not issued.
84 * [10:10] Disable Fill Probe
85 * 0 = Probes issued for cache fills
86 * 1 = Probes not issued for cache fills.
91 print_spew("Disabling read/write/fill probes for UP... ");
93 val=pci_read_config32(NODE_HT(0), 0x68);
94 val |= (1<<10)|(1<<9)|(1<<8)|(1<<4)|(1<<3)|(1<<2)|(1<<1)|(1 << 0);
95 pci_write_config32(NODE_HT(0), 0x68, val);
97 print_spew("done.\r\n");
101 static void enable_routing(u8 node)
105 /* HT Initialization Control Register
107 * [ 0: 0] Routing Table Disable
108 * 0 = Packets are routed according to routing tables
109 * 1 = Packets are routed according to the default link field
110 * [ 1: 1] Request Disable (BSP should clear this)
111 * 0 = Request packets may be generated
112 * 1 = Request packets may not be generated.
113 * [ 3: 2] Default Link (Read-only)
117 * 11 = CPU on same node
119 * - Scratch bit cleared by a cold reset
120 * [ 5: 5] BIOS Reset Detect
121 * - Scratch bit cleared by a cold reset
122 * [ 6: 6] INIT Detect
123 * - Scratch bit cleared by a warm or cold reset not by an INIT
127 /* Enable routing table */
128 print_spew("Enabling routing table for node ");
129 print_spew_hex8(node);
131 val=pci_read_config32(NODE_HT(node), 0x6c);
132 val &= ~((1<<1)|(1<<0));
133 pci_write_config32(NODE_HT(node), 0x6c, val);
135 print_spew(" done.\r\n");
138 static void fill_row(u8 node, u8 row, u32 value)
140 pci_write_config32(NODE_HT(node), 0x40+(row<<2), value);
142 static u32 get_row(u8 node, u8 row)
144 return pci_read_config32(NODE_HT(node), 0x40+(row<<2));
147 static int link_connection(u8 src, u8 dest)
149 /* we generate the needed link information from the rows
150 * by taking the Request Route of the according row.
153 return get_row(src, dest) & 0x0f;
157 #if CONFIG_MAX_CPUS > 1
159 static void rename_temp_node(u8 node)
163 print_spew("Renaming current temporary node to ");
164 print_spew_hex8(node);
166 val=pci_read_config32(NODE_HT(7), 0x60);
167 val &= (~7); /* clear low bits. */
168 val |= node; /* new node */
169 pci_write_config32(NODE_HT(7), 0x60, val);
171 print_spew(" done.\r\n");
174 static bool check_connection(u8 dest)
176 /* See if we have a valid connection to dest */
179 /* Verify that the coherent hypertransport link is
180 * established and actually working by reading the
181 * remode node's vendor/device id
183 val = pci_read_config32(NODE_HT(dest),0);
184 if(val != 0x11001022)
190 static unsigned read_freq_cap(device_t dev, unsigned pos)
192 /* Handle bugs in valid hypertransport frequency reporting */
196 freq_cap = pci_read_config16(dev, pos);
197 freq_cap &= ~(1 << HT_FREQ_VENDOR); /* Ignore Vendor HT frequencies */
199 id = pci_read_config32(dev, 0);
201 /* AMD 8131 Errata 48 */
202 if (id == (PCI_VENDOR_ID_AMD | (PCI_DEVICE_ID_AMD_8131_PCIX << 16))) {
203 freq_cap &= ~(1 << HT_FREQ_800Mhz);
205 /* AMD 8151 Errata 23 */
206 if (id == (PCI_VENDOR_ID_AMD | (PCI_DEVICE_ID_AMD_8151_SYSCTRL << 16))) {
207 freq_cap &= ~(1 << HT_FREQ_800Mhz);
209 /* AMD K8 Unsupported 1Ghz? */
210 if (id == (PCI_VENDOR_ID_AMD | (0x1100 << 16))) {
211 freq_cap &= ~(1 << HT_FREQ_1000Mhz);
216 static int optimize_connection(device_t node1, uint8_t link1, device_t node2, uint8_t link2)
218 static const uint8_t link_width_to_pow2[]= { 3, 4, 0, 5, 1, 2, 0, 0 };
219 static const uint8_t pow2_to_link_width[] = { 0x7, 4, 5, 0, 1, 3 };
220 uint16_t freq_cap1, freq_cap2, freq_cap, freq_mask;
221 uint8_t width_cap1, width_cap2, width_cap, width, old_width, ln_width1, ln_width2;
222 uint8_t freq, old_freq;
224 /* Set link width and frequency */
226 /* Initially assume everything is already optimized and I don't need a reset */
229 /* Get the frequency capabilities */
230 freq_cap1 = read_freq_cap(node1, link1 + PCI_HT_CAP_HOST_FREQ_CAP);
231 freq_cap2 = read_freq_cap(node2, link2 + PCI_HT_CAP_HOST_FREQ_CAP);
233 /* Calculate the highest possible frequency */
234 freq = log2(freq_cap1 & freq_cap2);
236 /* See if I am changing the link freqency */
237 old_freq = pci_read_config8(node1, link1 + PCI_HT_CAP_HOST_FREQ);
238 needs_reset |= old_freq != freq;
239 old_freq = pci_read_config8(node2, link2 + PCI_HT_CAP_HOST_FREQ);
240 needs_reset |= old_freq != freq;
242 /* Set the Calulcated link frequency */
243 pci_write_config8(node1, link1 + PCI_HT_CAP_HOST_FREQ, freq);
244 pci_write_config8(node2, link2 + PCI_HT_CAP_HOST_FREQ, freq);
246 /* Get the width capabilities */
247 width_cap1 = pci_read_config8(node1, link1 + PCI_HT_CAP_HOST_WIDTH);
248 width_cap2 = pci_read_config8(node2, link2 + PCI_HT_CAP_HOST_WIDTH);
250 /* Calculate node1's input width */
251 ln_width1 = link_width_to_pow2[width_cap1 & 7];
252 ln_width2 = link_width_to_pow2[(width_cap2 >> 4) & 7];
253 if (ln_width1 > ln_width2) {
254 ln_width1 = ln_width2;
256 width = pow2_to_link_width[ln_width1];
257 /* Calculate node1's output width */
258 ln_width1 = link_width_to_pow2[(width_cap1 >> 4) & 7];
259 ln_width2 = link_width_to_pow2[width_cap2 & 7];
260 if (ln_width1 > ln_width2) {
261 ln_width1 = ln_width2;
263 width |= pow2_to_link_width[ln_width1] << 4;
265 /* See if I am changing node1's width */
266 old_width = pci_read_config8(node1, link1 + PCI_HT_CAP_HOST_WIDTH + 1);
267 needs_reset |= old_width != width;
269 /* Set node1's widths */
270 pci_write_config8(node1, link1 + PCI_HT_CAP_HOST_WIDTH + 1, width);
272 /* Calculate node2's width */
273 width = ((width & 0x70) >> 4) | ((width & 0x7) << 4);
275 /* See if I am changing node2's width */
276 old_width = pci_read_config8(node2, link2 + PCI_HT_CAP_HOST_WIDTH + 1);
277 needs_reset |= old_width != width;
279 /* Set node2's widths */
280 pci_write_config8(node2, link2 + PCI_HT_CAP_HOST_WIDTH + 1, width);
285 static void setup_row_local(u8 source, u8 row) // source will be 7 when it is for temp use
290 for(linkn = 0; linkn<3; linkn++) {
293 regpos = 0x98 + 0x20 * linkn;
294 reg = pci_read_config32(NODE_HT(source), regpos);
295 if ((reg & 0x17) != 3) continue; // it is not conherent or not connected
300 fill_row(source,row, val);
303 static void setup_row_direct(u8 source, u8 dest, u8 linkn)
308 val |= 1<<(linkn+1+8); //for direct connect response route should equal to request table
309 fill_row(source,dest, val);
311 #if CONFIG_MAX_CPUS>2
312 static void setup_row_indirect(u8 source, u8 dest, u8 gateway)
314 //for indirect connection, we need to compute the val from val_s(source, source), and val_g(source, gateway)
318 #warning "FIXME is it the way to set the RESPONSE TABLE for indirect?"
319 val_s = get_row(source, source);
320 val_g = get_row(source, gateway);
325 if(val_s!=val) { // use another connect as response
327 #warning "FIXME I don't know how to set BROADCAST TABLE for indirect, 1?"
328 val |= (1<<16) | (val_s<<8);
330 val = val_g; // all the same to gateway
333 fill_row(source, dest, val);
336 static void setup_row_indirect_group(const u8 *conn, int num)
339 for(i=0; i<num; i+=3) {
340 setup_row_indirect(conn[i*3], conn[i*3+1],conn[i*3+2]);
345 static void setup_temp_row(u8 source, u8 dest)
347 // copy val from (source, dest) to (source,7)
348 fill_row(source,7,get_row(source,dest));
351 static void clear_temp_row(u8 source)
353 fill_row(source, 7, DEFAULT);
356 static void setup_remote_row_direct(u8 source, u8 dest, u8 linkn)
361 val |= 1<<(linkn+1+8); //for direct connect response route should equal to request table
362 fill_row(7,dest, val );
364 #if CONFIG_MAX_CPUS>2
365 static void setup_remote_row_indirect(u8 source, u8 dest, u8 gateway)
367 //for indirect connection, we need to compute the val from val_s(source, source), and val_g(source, gateway)
372 val_s = get_row(7, source);
373 val_g = get_row(7, gateway);
378 if(val_s!=val) { // use another connect as response
380 val |= 1 | (val_s<<8);
382 val = val_g; // all the same to gateway
385 fill_row(7, dest, val);
390 static void setup_remote_node(u8 node)
392 static const uint8_t pci_reg[] = {
393 0x44, 0x4c, 0x54, 0x5c, 0x64, 0x6c, 0x74, 0x7c,
394 0x40, 0x48, 0x50, 0x58, 0x60, 0x68, 0x70, 0x78,
395 0x84, 0x8c, 0x94, 0x9c, 0xa4, 0xac, 0xb4, 0xbc,
396 0x80, 0x88, 0x90, 0x98, 0xa0, 0xa8, 0xb0, 0xb8,
397 0xc4, 0xcc, 0xd4, 0xdc,
398 0xc0, 0xc8, 0xd0, 0xd8,
399 0xe0, 0xe4, 0xe8, 0xec,
403 print_spew("setup_remote_node: ");
405 /* copy the default resource map from node 0 */
406 for(i = 0; i < sizeof(pci_reg)/sizeof(pci_reg[0]); i++) {
410 value = pci_read_config32(NODE_MP(0), reg);
411 pci_write_config32(NODE_MP(7), reg, value);
414 print_spew("done\r\n");
419 static void setup_uniprocessor(void)
421 print_spew("Enabling UP settings\r\n");
425 struct setup_smp_result {
430 #if CONFIG_MAX_CPUS > 2
431 static int optimize_connection_group(const u8 *opt_conn, int num) {
434 for(i=0; i<num; i+=2) {
435 needs_reset = optimize_connection(
436 NODE_HT(opt_conn[i*2]), 0x80 + link_to_register(link_connection(opt_conn[i*2],opt_conn[i*2+1])),
437 NODE_HT(opt_conn[i*2+1]), 0x80 + link_to_register(link_connection(opt_conn[i*2+1],opt_conn[i*2])) );
443 #if CONFIG_MAX_CPUS > 1
444 static uint8_t get_linkn_first(uint8_t byte)
446 if(byte & 0x02) { byte = 0; }
447 else if(byte & 0x04) { byte = 1; }
448 else if(byte & 0x08) { byte = 2; }
451 static uint8_t get_linkn_last(uint8_t byte)
453 if(byte & 0x02) { byte &= 0x0f; byte |= 0x00; }
454 if(byte & 0x04) { byte &= 0x0f; byte |= 0x10; }
455 if(byte & 0x08) { byte &= 0x0f; byte |= 0x20; }
458 static uint8_t get_linkn_last_count(uint8_t byte)
461 if(byte & 0x02) { byte &= 0xcf; byte |= 0x00; byte+=0x40; }
462 if(byte & 0x04) { byte &= 0xcf; byte |= 0x10; byte+=0x40; }
463 if(byte & 0x08) { byte &= 0xcf; byte |= 0x20; byte+=0x40; }
467 static struct setup_smp_result setup_smp(void)
469 struct setup_smp_result result;
473 result.needs_reset = 0;
475 print_spew("Enabling SMP settings\r\n");
477 setup_row_local(0, 0); // it will update the broadcast RT
480 byte = (val>>16) & 0xfe;
481 if(byte<0x2) { // no coherent connection so get out.
486 /* Setup and check a temporary connection to node 1 */
489 byte = get_linkn_first(byte);
490 setup_row_direct(0,1, byte);
491 setup_temp_row(0, 1);
493 if (!check_connection(7)) {
494 print_spew("No connection to Node 1.\r\n");
495 setup_uniprocessor(); /* and get up working */
500 /* We found 2 nodes so far */
501 val = pci_read_config32(NODE_HT(7), 0x6c);
502 byte = (val>2) & 0x3; // get default link on 7 to 0
503 setup_row_local(7,1);
504 setup_remote_row_direct(1, 0, byte);
506 #if CONFIG_MAX_CPUS>4
508 byte = (val>>16) & 0xfe;
509 byte = get_linkn_last_count(byte);
510 if((byte>>2)==3) { // Oh! we need to treat it as cpu2.
512 byte = (val>>16) & 0xfe;
513 byte = get_linkn_last(byte);
514 setup_row_direct(0,1, byte);
515 setup_temp_row(0, 1);
517 if (!check_connection(7)) {
518 print_spew("No connection to Node 1.\r\n");
519 setup_uniprocessor(); /* and get up working */
524 /* We found 2 nodes so far */
525 val = pci_read_config32(NODE_HT(7), 0x6c);
526 byte = (val>2) & 0x3; // get default link on 7 to 0
527 setup_row_local(7,1);
528 setup_remote_row_direct(1, 0, byte);
532 setup_remote_node(1); /* Setup the regs on the remote node */
533 rename_temp_node(1); /* Rename Node 7 to Node 1 */
534 enable_routing(1); /* Enable routing on Node 1 */
536 // don't need and it is done by clear_dead_links
540 result.needs_reset = optimize_connection(
541 NODE_HT(0), 0x80 + link_to_register(link_connection(0,1)),
542 NODE_HT(1), 0x80 + link_to_register(link_connection(1,0)) );
544 #if CONFIG_MAX_CPUS > 2
547 /* Setup and check temporary connection from Node 0 to Node 2 */
549 byte = ((val>>16) & 0xfe) - link_connection(0,1);
550 byte = get_linkn_last_count(byte);
553 if((byte>>2)==0) { // We should have two coherent for 4p and above
557 byte &= 3; // bit [3,2] is count-1
558 setup_row_direct(0, 2, byte);
559 setup_temp_row(0, 2);
561 if (!check_connection(7) ) {
562 print_spew("No connection to Node 2.\r\n");
567 /* We found 3 nodes so far. Now setup a temporary
568 * connection from node 0 to node 3 via node 1
570 setup_temp_row(0,1); /* temp. link between nodes 0 and 1 */
571 /* here should setup_row_direct(1,3) at first, before that we should find the link in cpu 1 to 3*/
573 byte = ((val>>16) & 0xfe) - link_connection(1,0);
574 byte = get_linkn_first(byte);
575 setup_row_direct(1,3,byte);
576 setup_temp_row(1,3); /* temp. link between nodes 1 and 3 */
578 if (!check_connection(7)) {
579 print_spew("No connection to Node 3.\r\n");
584 /* We found 4 nodes so far. Now setup all nodes for 4p */
585 /* for indirect we will use clockwise routing */
586 static const u8 conn4_1[] = {
591 setup_row_indirect_group(conn4_1, sizeof(conn4_1)/sizeof(conn4_1[0]));
595 val = pci_read_config32(NODE_HT(7), 0x6c);
596 byte = (val>2) & 0x3; // get default link on 7 to 0
598 setup_row_local(7,2);
599 setup_remote_row_direct(2, 0, byte);
600 setup_remote_node(2); /* Setup the regs on the remote node */
602 rename_temp_node(2); /* Rename Node 7 to Node 2 */
603 enable_routing(2); /* Enable routing on Node 2 */
609 val = pci_read_config32(NODE_HT(7), 0x6c);
610 byte = (val>2) & 0x3; // get default link on 7 to 0
612 setup_row_local(7,3);
613 setup_remote_row_direct(3, 1, byte);
614 setup_remote_node(3); /* Setup the regs on the remote node */
617 enable_routing(3); /* enable routing on node 3 (temp.) */
619 /* We need to init link between 2, and 3 direct link */
621 byte = ((val>>16) & 0xfe) - link_connection(2,0);
622 byte = get_linkn_last_count(byte);
623 #if CONFIG_MAX_CPUS>4
624 // We need to find out which link it so CPU3
625 // methods is try to access another 7 actully it is cpu4
626 if((byte>>2)==2) { // one to CPU3, one to cpu0, one to CPU4
628 setup_row_direct(2, 4, byte);
629 setup_temp_row(2, 4);
631 if (check_connection(7)) { // so the link is to CPU4
632 //We need to re compute it
634 byte = (val>>16) & 0xfe;
635 byte = get_linkn_first(byte);
639 setup_row_direct(2,3, byte & 0x3);
642 byte = ((val>>16) & 0xfe) - link_connection(3,1);
643 byte = get_linkn_last_count(byte);
644 #if CONFIG_MAX_CPUS>4
645 // We need to find out which link it so CPU2
646 // methods is try to access another 7 actully it is cpu5
647 if((byte>>2)==2) { // one to CPU2, one to cpu1, one to CPU5
650 setup_row_direct(3, 5, byte);
651 setup_temp_row(3, 5);
653 if (check_connection(7)) { // so the link is to CPU5
654 //We need to re compute it
656 byte = (val>>16) & 0xfe;
657 byte = get_linkn_first(byte);
661 setup_row_direct(3,2, byte & 0x3);
663 /* Set indirect connection to 0, and 1 for indirect we will use clockwise routing */
664 static const u8 conn4_2[] = {
669 setup_row_indirect_group(conn4_2, sizeof(conn4_2)/sizeof(conn4_2[0]));
671 // We need to do sth to reverse work for setup_temp_row (0,1) (1,3)
673 // it will be done by clear_dead_links
678 /* optimize physical connections - by LYH */
679 static const u8 opt_conn4[] = {
685 result.needs_reset = optimize_connection_group(opt_conn4, sizeof(opt_conn4)/sizeof(opt_conn4[0]));
687 #endif /* CONFIG_MAX_CPUS > 2 */
689 #if CONFIG_MAX_CPUS > 4
692 /* Setup and check temporary connection from Node 0 to Node 4 via 2 */
694 byte = ((val>>16) & 0xfe) - link_connection(2,3) - link_connection(2,0);
695 byte = get_linkn_last_count(byte);
698 if((byte>>2)==0) { // We should have two coherent for 4p and above
702 byte &= 3; // bit [3,2] is count-1
703 setup_row_direct(2, 4, byte);
705 /* Setup and check temporary connection from Node 0 to Node 4 through 2*/
706 for(byte=0; byte<4; byte+=2) {
707 setup_temp_row(byte,byte+2); /* temp. link between nodes 0 and 2 */
710 if (!check_connection(7) ) {
711 print_spew("No connection to Node 4.\r\n");
716 /* Setup and check temporary connection from Node 0 to Node 5 through 1, 3*/
719 byte = ((val>>16) & 0xfe) - link_connection(3,2) - link_connection(3,1);
720 byte = get_linkn_last_count(byte);
722 if((byte>>2)==0) { // We should have two coherent for 4p and above
727 byte &= 3; // bit [3,2] is count-1
728 setup_row_direct(3, 5, byte);
730 setup_temp_row(0,1); /* temp. link between nodes 0 and 1 */
731 for(byte=0; byte<4; byte+=2) {
732 setup_temp_row(byte+1,byte+3); /* temp. link between nodes 1 and 3 */
735 if (!check_connection(7)) {
736 print_spew("No connection to Node 5.\r\n");
741 /* We found 6 nodes so far. Now setup all nodes for 6p */
742 static const u8 conn6_1[] = {
755 setup_row_indirect_group(conn6_1, sizeof(conn6_1)/sizeof(conn6_1[0]));
758 for(byte=0; byte<4; byte+=2) {
759 setup_temp_row(byte,byte+2);
761 val = pci_read_config32(NODE_HT(7), 0x6c);
762 byte = (val>2) & 0x3; // get default link on 7 to 0
764 setup_row_local(7,4);
765 setup_remote_row_direct(4, 2, byte);
766 setup_remote_node(4); /* Setup the regs on the remote node */
771 for(byte=0; byte<4; byte+=2) {
772 setup_temp_row(byte+1,byte+3);
775 val = pci_read_config32(NODE_HT(7), 0x6c);
776 byte = (val>2) & 0x3; // get default link on 7 to 0
777 setup_row_local(7,5);
778 setup_remote_row_direct(5, 3, byte);
779 setup_remote_node(5); /* Setup the regs on the remote node */
782 enable_routing(5); /* enable routing on node 5 (temp.) */
785 /* We need to init link between 4, and 5 direct link */
787 byte = ((val>>16) & 0xfe) - link_connection(4,2);
788 byte = get_linkn_last_count(byte);
789 #if CONFIG_MAX_CPUS>4
790 // We need to find out which link it so CPU5
791 // methods is try to access another 7 actully it is cpu6
792 if((byte>>2)==2) { // one to CPU5, one to cpu2, one to CPU6
795 setup_row_direct(4, 6, byte);
796 setup_temp_row(4, 6);
798 if (check_connection(7)) { // so the link is to CPU4
799 //We need to re compute it
801 byte = (val>>16) & 0xfe;
802 byte = get_linkn_first(byte);
806 setup_row_direct(4,5, byte & 0x3);
809 byte = ((val>>16) & 0xfe) - link_connection(5,3);
810 byte = get_linkn_last_count(byte);
811 #if CONFIG_MAX_CPUS>4
812 // We need to find out which link it so CPU4
813 // methods is try to access another 7 actully it is cpu7
814 if((byte>>2)==2) { // one to CPU4, one to cpu3, one to CPU7
818 setup_row_direct(5, 7, byte);
819 setup_temp_row(5, 7);
821 if (check_connection(7)) { // so the link is to CPU5
822 //We need to re compute it
824 byte = (val>>16) & 0xfe;
825 byte = get_linkn_first(byte);
829 setup_row_direct(5,4, byte & 0x3);
830 #endif // !CROSS_BAR_47_56
832 /* Set indirect connection to 0, to 3 for indirect we will use clockwise routing */
833 static const u8 conn6_2[] = {
854 setup_row_indirect_group(conn6_2, sizeof(conn6_2)/sizeof(conn6_2[0]));
856 // We need to do sth about reverse about setup_temp_row (0,1), (2,4), (1, 3), (3,5)
857 // It will be done by clear_dead_links
858 for(byte=0; byte<4; byte++) {
859 clear_temp_row(byte);
863 /* optimize physical connections - by LYH */
864 static const uint8_t opt_conn6[] ={
871 result.needs_reset = optimize_connection_group(opt_conn6, sizeof(opt_conn6)/sizeof(opt_conn6[0]));
874 #endif /* CONFIG_MAX_CPUS > 4 */
876 #if CONFIG_MAX_CPUS >6
879 /* Setup and check temporary connection from Node 0 to Node 6 via 2 and 4 to 7 */
882 byte = ((val>>16) & 0xfe) - link_connection(4,5) - link_connection(4,2);
884 byte = ((val>>16) & 0xfe) - link_connection(4,2);
886 byte = get_linkn_last_count(byte); // Max link to 6
887 if((byte>>2)==0) { // We should have two coherent for 8p and above
891 byte &= 3; // bit [3,2] is count-1
892 setup_row_direct(4, 6, byte);
894 /* Setup and check temporary connection from Node 0 to Node 6 through 2, and 4*/
895 for(byte=0; byte<6; byte+=2) {
896 setup_temp_row(byte,byte+2); /* temp. link between nodes 0 and 2 */
899 if (!check_connection(7) ) {
900 print_spew("No connection to Node 6.\r\n");
905 /* Setup and check temporary connection from Node 0 to Node 5 through 1, 3, 5*/
908 byte = ((val>>16) & 0xfe) - link_connection(5,4) - link_connection(5,3);
909 byte = get_linkn_first(byte);
910 setup_row_direct(5, 7, byte);
912 setup_temp_row(0,1); /* temp. link between nodes 0 and 1 */
913 for(byte=0; byte<6; byte+=2) {
914 setup_temp_row(byte+1,byte+3); /* temp. link between nodes 1 and 3 */
918 byte = ((val>>16) & 0xfe) - link_connection(4,2) ;
919 byte = get_linkn_first(byte); // min link to 7
920 setup_row_direct(4, 7, byte);
922 /* Setup and check temporary connection from Node 0 to Node 7 through 2, and 4*/
923 for(byte=0; byte<4; byte+=2) {
924 setup_temp_row(byte,byte+2); /* temp. link between nodes 0 and 2 */
926 setup_temp_row(4, 7);
930 if (!check_connection(7)) {
931 print_spew("No connection to Node 7.\r\n");
937 /* We found 8 nodes so far. Now setup all nodes for 8p */
938 static const u8 conn8_1[] = {
961 setup_row_indirect_group(conn8_1,sizeof(conn8_1)/sizeof(conn8_1[0]));
963 for(byte=0; byte<6; byte+=2) {
964 setup_temp_row(byte,byte+2);
966 val = pci_read_config32(NODE_HT(7), 0x6c);
967 byte = (val>2) & 0x3; // get default link on 7 to 0
969 setup_row_local(7,6);
970 setup_remote_row_direct(6, 4, byte);
971 setup_remote_node(6); /* Setup the regs on the remote node */
977 for(byte=0; byte<6; byte+=2) {
978 setup_temp_row(byte+1,byte+3);
981 val = pci_read_config32(NODE_HT(7), 0x6c);
982 byte = (val>2) & 0x3; // get default link on 7 to 0
983 setup_row_local(7,7);
984 setup_remote_row_direct(7, 5, byte);
987 for(byte=0; byte<4; byte+=2) {
988 setup_temp_row(byte,byte+2);
991 val = pci_read_config32(NODE_HT(7), 0x6c);
992 byte = (val>2) & 0x3; // get default link on 7 to 0
994 setup_row_local(7,7);
995 setup_remote_row_direct(7, 4, byte);
996 // till now 4-7, 7-4 done.
998 setup_remote_node(7); /* Setup the regs on the remote node */
999 // rename_temp_node(7);
1000 enable_routing(7); /* enable routing on node 5 (temp.) */
1003 //here init 5, 6 and 5, 7
1004 /* Setup and check temporary connection from Node 0 to Node 5 through 1, 3, 5*/
1007 byte = ((val>>16) & 0xfe) - link_connection(5,3);
1008 byte = get_linkn_last(byte);
1009 setup_row_direct(5, 7, byte);
1011 setup_temp_row(0,1); /* temp. link between nodes 0 and 1 */
1012 for(byte=0; byte<6; byte+=2) {
1013 setup_temp_row(byte+1,byte+3); /* temp. link between nodes 1 and 3 */
1016 if (!check_connection(7)) {
1017 // We need to recompute link to 7
1019 byte = ((val>>16) & 0xfe) - link_connection(5,3);
1020 byte = get_linkn_first(byte);
1022 byte &= 3; // bit [3,2] is count-1
1023 setup_row_direct(5, 7, byte);
1025 setup_temp_row(0,1); /* temp. link between nodes 0 and 1 */
1026 for(byte=0; byte<6; byte+=2) {
1027 setup_temp_row(byte+1,byte+3); /* temp. link between nodes 1 and 3 */
1030 setup_temp_row(5,7);
1032 check_connection(7);
1034 val = pci_read_config32(NODE_HT(7), 0x6c);
1035 byte = (val>2) & 0x3; // get default link on 7 to 0
1036 // setup_row_local(7,7);
1037 setup_remote_row_direct(7, 5, byte);
1038 //Till now 57, 75 done
1042 byte = ((val>>16) & 0xfe) - link_connection(5,3) - link_connection(5,7);
1043 byte = get_linkn_first(byte);
1044 setup_row_direct(5, 6, byte);
1048 byte = ((val>>16) & 0xfe) - link_connection(6,4);
1049 byte = get_linkn_last(byte);
1050 setup_row_direct(6, 7, byte);
1052 for(byte=0; byte<6; byte+=2) {
1053 setup_temp_row(byte,byte+2); /* temp. link between nodes 0 and 2 */
1055 setup_temp_row(6,7);
1057 if (!check_connection(7)) {
1058 // We need to recompute link to 7
1060 byte = ((val>>16) & 0xfe) - link_connection(6,4);
1061 byte = get_linkn_first(byte);
1063 setup_row_direct(6, 7, byte);
1065 for(byte=0; byte<6; byte+=2) {
1066 setup_temp_row(byte,byte+2); /* temp. link between nodes 0 and 2 */
1069 setup_temp_row(6,7);
1070 check_connection(7);
1072 val = pci_read_config32(NODE_HT(7), 0x6c);
1073 byte = (val>2) & 0x3; // get default link on 7 to 0
1074 // setup_row_local(7,7);
1075 setup_remote_row_direct(7, 6, byte);
1076 //Till now 67, 76 done
1080 byte = ((val>>16) & 0xfe) - link_connection(6,4) - link_connection(6,7);
1081 byte = get_linkn_first(byte);
1082 setup_row_direct(6, 5, byte);
1086 #if !CROSS_BAR_47_56
1087 /* We need to init link between 6, and 7 direct link */
1089 byte = ((val>>16) & 0xfe) - link_connection(6,4);
1090 byte = get_linkn_first(byte);
1091 setup_row_direct(6,7, byte & 0x3);
1094 byte = ((val>>16) & 0xfe) - link_connection(7,5);
1095 byte = get_linkn_first(byte);
1096 setup_row_direct(7,6, byte & 0x3);
1099 /* Set indirect connection to 0, to 3 for indirect we will use clockwise routing */
1100 static const u8 conn8_2[] = {
1101 #if !CROSS_BAR_47_56
1102 0, 7, 1, // restore it
1119 0, 7, 2, // restore it
1137 setup_row_indirect_group(conn8_2, sizeof(conn8_2)/sizeof(conn8_2[0]));
1139 static const uint8_t opt_conn8[] ={
1148 /* optimize physical connections - by LYH */
1149 result.needs_reset = optimize_connection_group(opt_conn8, sizeof(opt_conn6)/sizeof(opt_conn8[0]));
1151 #endif /* CONFIG_MAX_CPUS > 6 */
1153 print_debug_hex8(result.nodes);
1154 print_debug(" nodes initialized.\r\n");
1159 static unsigned verify_mp_capabilities(unsigned nodes)
1161 unsigned node, row, mask;
1165 mask=0x06; /* BigMPCap */
1166 } else if (nodes == 2) {
1167 mask=0x02; /* MPCap */
1169 mask=0x00; /* Non SMP */
1172 for (node=0; node<nodes; node++) {
1173 if ((pci_read_config32(NODE_MC(node), 0xe8) & mask) != mask) {
1182 /* one of our nodes is not mp capable */
1184 print_err("One of the CPUs is not MP capable. Going back to UP\r\n");
1188 static void clear_dead_routes(unsigned nodes)
1192 #if CONFIG_MAX_CPUS>6
1193 if(nodes==8) return;// don't touch (7,7)
1199 for(node = 7; node >= 0; node--) {
1200 for(row = 7; row >= last_row; row--) {
1201 fill_row(node, row, DEFAULT);
1206 static void coherent_ht_finalize(unsigned nodes)
1211 /* set up cpu count and node count and enable Limit
1212 * Config Space Range for all available CPUs.
1213 * Also clear non coherent hypertransport bus range
1214 * registers on Hammer A0 revision.
1217 print_spew("coherent_ht_finalize\r\n");
1218 rev_a0 = is_cpu_rev_a0();
1219 for (node = 0; node < nodes; node++) {
1222 dev = NODE_HT(node);
1224 /* Set the Total CPU and Node count in the system */
1225 val = pci_read_config32(dev, 0x60);
1226 val &= (~0x000F0070);
1227 val |= ((nodes-1)<<16)|((nodes-1)<<4);
1228 pci_write_config32(dev, 0x60, val);
1230 /* Only respond to real cpu pci configuration cycles
1231 * and optimize the HT settings
1233 val=pci_read_config32(dev, 0x68);
1234 val &= ~((HTTC_BUF_REL_PRI_MASK << HTTC_BUF_REL_PRI_SHIFT) |
1235 (HTTC_MED_PRI_BYP_CNT_MASK << HTTC_MED_PRI_BYP_CNT_SHIFT) |
1236 (HTTC_HI_PRI_BYP_CNT_MASK << HTTC_HI_PRI_BYP_CNT_SHIFT));
1237 val |= HTTC_LIMIT_CLDT_CFG |
1238 (HTTC_BUF_REL_PRI_8 << HTTC_BUF_REL_PRI_SHIFT) |
1240 (3 << HTTC_MED_PRI_BYP_CNT_SHIFT) |
1241 (3 << HTTC_HI_PRI_BYP_CNT_SHIFT);
1242 pci_write_config32(dev, 0x68, val);
1245 print_spew("shit it is an old cup\n");
1246 pci_write_config32(dev, 0x94, 0);
1247 pci_write_config32(dev, 0xb4, 0);
1248 pci_write_config32(dev, 0xd4, 0);
1252 print_spew("done\r\n");
1255 static int apply_cpu_errata_fixes(unsigned nodes, int needs_reset)
1258 for(node = 0; node < nodes; node++) {
1261 dev = NODE_MC(node);
1262 if (is_cpu_pre_c0()) {
1265 * Limit the number of downstream posted requests to 1
1267 cmd = pci_read_config32(dev, 0x70);
1268 if ((cmd & (3 << 0)) != 2) {
1271 pci_write_config32(dev, 0x70, cmd );
1274 cmd = pci_read_config32(dev, 0x7c);
1275 if ((cmd & (3 << 4)) != 0) {
1278 pci_write_config32(dev, 0x7c, cmd );
1281 /* Clock Power/Timing Low */
1282 cmd = pci_read_config32(dev, 0xd4);
1283 if (cmd != 0x000D0001) {
1285 pci_write_config32(dev, 0xd4, cmd);
1286 needs_reset = 1; /* Needed? */
1293 * Set Clk Ramp Hystersis to 7
1294 * Clock Power/Timing Low
1296 cmd_ref = 0x04e20707; /* Registered */
1297 cmd = pci_read_config32(dev, 0xd4);
1298 if(cmd != cmd_ref) {
1299 pci_write_config32(dev, 0xd4, cmd_ref );
1300 needs_reset = 1; /* Needed? */
1307 static int optimize_link_read_pointers(unsigned nodes, int needs_reset)
1310 for(node = 0; node < nodes; node = node + 1) {
1311 device_t f0_dev, f3_dev;
1312 uint32_t cmd_ref, cmd;
1314 f0_dev = NODE_HT(node);
1315 f3_dev = NODE_MC(node);
1316 cmd_ref = cmd = pci_read_config32(f3_dev, 0xdc);
1317 for(link = 0; link < 3; link = link + 1) {
1320 /* This works on an Athlon64 because unimplemented links return 0 */
1321 reg = 0x98 + (link * 0x20);
1322 link_type = pci_read_config32(f0_dev, reg);
1323 if (link_type & LinkConnected) {
1324 cmd &= 0xff << (link *8);
1325 /* FIXME this assumes the device on the other
1326 * side is an AMD device
1328 cmd |= 0x25 << (link *8);
1331 if (cmd != cmd_ref) {
1332 pci_write_config32(f3_dev, 0xdc, cmd);
1339 static int setup_coherent_ht_domain(void)
1341 struct setup_smp_result result;
1343 result.needs_reset = 0;
1345 enable_bsp_routing();
1347 #if CONFIG_MAX_CPUS > 1
1348 result = setup_smp();
1350 result.nodes = verify_mp_capabilities(result.nodes);
1351 clear_dead_routes(result.nodes);
1352 if (result.nodes == 1) {
1353 setup_uniprocessor();
1355 coherent_ht_finalize(result.nodes);
1356 result.needs_reset = apply_cpu_errata_fixes(result.nodes, result.needs_reset);
1357 result.needs_reset = optimize_link_read_pointers(result.nodes, result.needs_reset);
1358 return result.needs_reset;