1 /* coherent hypertransport initialization for AMD64
3 * written by Stefan Reinauer <stepan@openbios.org>
4 * (c) 2003-2004 by SuSE Linux AG
6 * (c) 2004 Tyan Computer
7 * 2004.12 yhlu added support to create support to create routing table dynamically.
8 * it also support 8 ways too. (8 ways ladder or 8 ways crossbar)
9 * This code is licensed under GPL.
13 * This algorithm assumes a grid configuration as follows:
16 * org. : 1x1 2x1 2x2 2x3 2x4
20 #include <device/pci_def.h>
21 #include <device/pci_ids.h>
22 #include <device/hypertransport_def.h>
23 #include "arch/romcc_io.h"
26 /* when generating a temporary row configuration we
27 * don't want broadcast to be enabled for that node.
30 #define enable_bsp_routing() enable_routing(0)
32 #define NODE_HT(x) PCI_DEV(0,24+x,0)
33 #define NODE_MP(x) PCI_DEV(0,24+x,1)
34 #define NODE_MC(x) PCI_DEV(0,24+x,3)
36 #define DEFAULT 0x00010101 /* default row entry */
45 static u8 link_to_register(int ldt)
48 * [ 0: 3] Request Route
49 * [0] Route to this node
55 if (ldt&0x08) return 0x40;
56 if (ldt&0x04) return 0x20;
57 if (ldt&0x02) return 0x00;
59 /* we should never get here */
60 print_spew("Unknown Link\n");
64 static void disable_probes(void)
66 /* disable read/write/fill probes for uniprocessor setup
67 * they don't make sense if only one cpu is available
70 /* Hypetransport Transaction Control Register
72 * [ 0: 0] Disable read byte probe
74 * 1 = Probes not issued
75 * [ 1: 1] Disable Read Doubleword probe
77 * 1 = Probes not issued
78 * [ 2: 2] Disable write byte probes
80 * 1 = Probes not issued
81 * [ 3: 3] Disable Write Doubleword Probes
83 * 1 = Probes not issued.
84 * [10:10] Disable Fill Probe
85 * 0 = Probes issued for cache fills
86 * 1 = Probes not issued for cache fills.
91 print_spew("Disabling read/write/fill probes for UP... ");
93 val=pci_read_config32(NODE_HT(0), 0x68);
94 val |= (1<<10)|(1<<9)|(1<<8)|(1<<4)|(1<<3)|(1<<2)|(1<<1)|(1 << 0);
95 pci_write_config32(NODE_HT(0), 0x68, val);
97 print_spew("done.\r\n");
101 static void enable_routing(u8 node)
105 /* HT Initialization Control Register
107 * [ 0: 0] Routing Table Disable
108 * 0 = Packets are routed according to routing tables
109 * 1 = Packets are routed according to the default link field
110 * [ 1: 1] Request Disable (BSP should clear this)
111 * 0 = Request packets may be generated
112 * 1 = Request packets may not be generated.
113 * [ 3: 2] Default Link (Read-only)
117 * 11 = CPU on same node
119 * - Scratch bit cleared by a cold reset
120 * [ 5: 5] BIOS Reset Detect
121 * - Scratch bit cleared by a cold reset
122 * [ 6: 6] INIT Detect
123 * - Scratch bit cleared by a warm or cold reset not by an INIT
127 /* Enable routing table */
128 print_spew("Enabling routing table for node ");
129 print_spew_hex8(node);
131 val=pci_read_config32(NODE_HT(node), 0x6c);
132 val &= ~((1<<1)|(1<<0));
133 pci_write_config32(NODE_HT(node), 0x6c, val);
135 print_spew(" done.\r\n");
138 static void fill_row(u8 node, u8 row, u32 value)
140 pci_write_config32(NODE_HT(node), 0x40+(row<<2), value);
142 static u32 get_row(u8 node, u8 row)
144 return pci_read_config32(NODE_HT(node), 0x40+(row<<2));
147 static int link_connection(u8 src, u8 dest)
149 /* we generate the needed link information from the rows
150 * by taking the Request Route of the according row.
153 return get_row(src, dest) & 0x0f;
157 #if CONFIG_MAX_CPUS > 1
159 static void rename_temp_node(u8 node)
163 print_spew("Renaming current temporary node to ");
164 print_spew_hex8(node);
166 val=pci_read_config32(NODE_HT(7), 0x60);
167 val &= (~7); /* clear low bits. */
168 val |= node; /* new node */
169 pci_write_config32(NODE_HT(7), 0x60, val);
171 print_spew(" done.\r\n");
174 static bool check_connection(u8 dest)
176 /* See if we have a valid connection to dest */
179 /* Verify that the coherent hypertransport link is
180 * established and actually working by reading the
181 * remode node's vendor/device id
183 val = pci_read_config32(NODE_HT(dest),0);
184 if(val != 0x11001022)
190 static unsigned read_freq_cap(device_t dev, unsigned pos)
192 /* Handle bugs in valid hypertransport frequency reporting */
196 freq_cap = pci_read_config16(dev, pos);
197 freq_cap &= ~(1 << HT_FREQ_VENDOR); /* Ignore Vendor HT frequencies */
199 id = pci_read_config32(dev, 0);
201 /* AMD 8131 Errata 48 */
202 if (id == (PCI_VENDOR_ID_AMD | (PCI_DEVICE_ID_AMD_8131_PCIX << 16))) {
203 freq_cap &= ~(1 << HT_FREQ_800Mhz);
205 /* AMD 8151 Errata 23 */
206 if (id == (PCI_VENDOR_ID_AMD | (PCI_DEVICE_ID_AMD_8151_SYSCTRL << 16))) {
207 freq_cap &= ~(1 << HT_FREQ_800Mhz);
209 /* AMD K8 Unsupported 1Ghz? */
210 if (id == (PCI_VENDOR_ID_AMD | (0x1100 << 16))) {
211 freq_cap &= ~(1 << HT_FREQ_1000Mhz);
216 static int optimize_connection(device_t node1, uint8_t link1, device_t node2, uint8_t link2)
218 static const uint8_t link_width_to_pow2[]= { 3, 4, 0, 5, 1, 2, 0, 0 };
219 static const uint8_t pow2_to_link_width[] = { 0x7, 4, 5, 0, 1, 3 };
220 uint16_t freq_cap1, freq_cap2, freq_cap, freq_mask;
221 uint8_t width_cap1, width_cap2, width_cap, width, old_width, ln_width1, ln_width2;
222 uint8_t freq, old_freq;
224 /* Set link width and frequency */
226 /* Initially assume everything is already optimized and I don't need a reset */
229 /* Get the frequency capabilities */
230 freq_cap1 = read_freq_cap(node1, link1 + PCI_HT_CAP_HOST_FREQ_CAP);
231 freq_cap2 = read_freq_cap(node2, link2 + PCI_HT_CAP_HOST_FREQ_CAP);
233 /* Calculate the highest possible frequency */
234 freq = log2(freq_cap1 & freq_cap2);
236 /* See if I am changing the link freqency */
237 old_freq = pci_read_config8(node1, link1 + PCI_HT_CAP_HOST_FREQ);
238 needs_reset |= old_freq != freq;
239 old_freq = pci_read_config8(node2, link2 + PCI_HT_CAP_HOST_FREQ);
240 needs_reset |= old_freq != freq;
242 /* Set the Calulcated link frequency */
243 pci_write_config8(node1, link1 + PCI_HT_CAP_HOST_FREQ, freq);
244 pci_write_config8(node2, link2 + PCI_HT_CAP_HOST_FREQ, freq);
246 /* Get the width capabilities */
247 width_cap1 = pci_read_config8(node1, link1 + PCI_HT_CAP_HOST_WIDTH);
248 width_cap2 = pci_read_config8(node2, link2 + PCI_HT_CAP_HOST_WIDTH);
250 /* Calculate node1's input width */
251 ln_width1 = link_width_to_pow2[width_cap1 & 7];
252 ln_width2 = link_width_to_pow2[(width_cap2 >> 4) & 7];
253 if (ln_width1 > ln_width2) {
254 ln_width1 = ln_width2;
256 width = pow2_to_link_width[ln_width1];
257 /* Calculate node1's output width */
258 ln_width1 = link_width_to_pow2[(width_cap1 >> 4) & 7];
259 ln_width2 = link_width_to_pow2[width_cap2 & 7];
260 if (ln_width1 > ln_width2) {
261 ln_width1 = ln_width2;
263 width |= pow2_to_link_width[ln_width1] << 4;
265 /* See if I am changing node1's width */
266 old_width = pci_read_config8(node1, link1 + PCI_HT_CAP_HOST_WIDTH + 1);
267 needs_reset |= old_width != width;
269 /* Set node1's widths */
270 pci_write_config8(node1, link1 + PCI_HT_CAP_HOST_WIDTH + 1, width);
272 /* Calculate node2's width */
273 width = ((width & 0x70) >> 4) | ((width & 0x7) << 4);
275 /* See if I am changing node2's width */
276 old_width = pci_read_config8(node2, link2 + PCI_HT_CAP_HOST_WIDTH + 1);
277 needs_reset |= old_width != width;
279 /* Set node2's widths */
280 pci_write_config8(node2, link2 + PCI_HT_CAP_HOST_WIDTH + 1, width);
285 static void setup_row_local(u8 source, u8 row) // source will be 7 when it is for temp use
290 for(linkn = 0; linkn<3; linkn++) {
293 regpos = 0x98 + 0x20 * linkn;
294 reg = pci_read_config32(NODE_HT(source), regpos);
295 if ((reg & 0x17) != 3) continue; // it is not conherent or not connected
300 fill_row(source,row, val);
303 static void setup_row_direct(u8 source, u8 dest, u8 linkn)
308 val |= 1<<(linkn+1+8); //for direct connect response route should equal to request table
311 if((source &1)!=(dest &1)){
314 val_s = get_row(source, source);
315 val |= ((val_s>>16) - (1<<(linkn+1)))<<16;
318 if(((source &1)!=(dest &1)) && (source<4) &&(dest<<4)){
321 //for CROSS_BAR_47_56 47, 74, 56, 65 should be here too
322 val_s = get_row(source, source);
323 val |= ((val_s>>16) - (1<<(linkn+1)))<<16;
328 fill_row(source,dest, val);
330 static uint8_t get_linkn_first(uint8_t byte)
332 if(byte & 0x02) { byte = 0; }
333 else if(byte & 0x04) { byte = 1; }
334 else if(byte & 0x08) { byte = 2; }
337 static uint8_t get_linkn_last(uint8_t byte)
339 if(byte & 0x02) { byte &= 0x0f; byte |= 0x00; }
340 if(byte & 0x04) { byte &= 0x0f; byte |= 0x10; }
341 if(byte & 0x08) { byte &= 0x0f; byte |= 0x20; }
344 static uint8_t get_linkn_last_count(uint8_t byte)
347 if(byte & 0x02) { byte &= 0xcf; byte |= 0x00; byte+=0x40; }
348 if(byte & 0x04) { byte &= 0xcf; byte |= 0x10; byte+=0x40; }
349 if(byte & 0x08) { byte &= 0xcf; byte |= 0x20; byte+=0x40; }
353 #if CONFIG_MAX_CPUS>2
355 static void setup_row_indirect(u8 source, u8 dest, u8 gateway)
357 static void setup_row_indirect(u8 source, u8 dest, u8 gateway, u8 diff)
360 //for indirect connection, we need to compute the val from val_s(source, source), and val_g(source, gateway)
363 val_s = get_row(source, source);
364 val = get_row(source, gateway);
370 if(((source&1)!=(dest &1)) && (val_s!=val) ) { // use another connect as response
372 #if CONFIG_MAX_CPUS>4
374 // Some node have two links left
376 byte = get_linkn_last_count(byte);
377 if((byte>>2)>1) { // make sure not the corner
379 val_s-=link_connection(source, source-2); // - down
381 val_s-=link_connection(source, source+2); // - up
389 if((source&1)!=(dest &1)) { // different rungs
393 val_s = get_row(source, source);
394 val |= ((val_s>>16) - link_connection(source, gateway))<<16;
399 if(diff && (val_s!=val) ) { // use another connect as response
401 #if CONFIG_MAX_CPUS>4
403 // Some node have two links left
404 // don't worry we only have (2, (3 as source need to handle
406 byte = get_linkn_last_count(byte);
407 if((byte>>2)>1) { // make sure not the corner
409 val_s-=link_connection(source, source-2); // -down
411 val_s-=link_connection(source, source+2); // -up
419 if(diff) { // cross rung?
423 val_s = get_row(source, source);
424 val |= ((val_s>>16) - link_connection(source, gateway))<<16;
429 fill_row(source, dest, val);
432 static void setup_row_indirect_group(const u8 *conn, int num)
435 for(i=0; i<num; i+=4) {
437 setup_row_indirect(conn[i*3], conn[i*3+1],conn[i*3+2]);
439 setup_row_indirect(conn[i*4], conn[i*4+1],conn[i*4+2], conn[i*4+3]);
446 static void setup_temp_row(u8 source, u8 dest)
448 // copy val from (source, dest) to (source,7)
449 fill_row(source,7,get_row(source,dest));
452 static void clear_temp_row(u8 source)
454 fill_row(source, 7, DEFAULT);
457 static void setup_remote_row_direct(u8 source, u8 dest, u8 linkn)
462 val |= 1<<(linkn+1+8); //for direct connect response route should equal to request table
465 if((source &1)!=(dest &1)){
468 //for CROSS_BAR_47_56 47, 74, 56, 65 should be here too
469 val_s = get_row(7, source);
470 val |= ((val_s>>16) - (1<<(linkn+1)))<<16;
473 if(((source &1)!=(dest &1)) && (source<4) &&(dest<<4)){
476 //for CROSS_BAR_47_56 47, 74, 56, 65 should be here too
477 val_s = get_row(7, source);
478 val |= ((val_s>>16) - (1<<(linkn+1)))<<16;
482 fill_row(7,dest, val );
485 static void setup_remote_node(u8 node)
487 static const uint8_t pci_reg[] = {
488 0x44, 0x4c, 0x54, 0x5c, 0x64, 0x6c, 0x74, 0x7c,
489 0x40, 0x48, 0x50, 0x58, 0x60, 0x68, 0x70, 0x78,
490 0x84, 0x8c, 0x94, 0x9c, 0xa4, 0xac, 0xb4, 0xbc,
491 0x80, 0x88, 0x90, 0x98, 0xa0, 0xa8, 0xb0, 0xb8,
492 0xc4, 0xcc, 0xd4, 0xdc,
493 0xc0, 0xc8, 0xd0, 0xd8,
494 0xe0, 0xe4, 0xe8, 0xec,
498 print_spew("setup_remote_node: ");
500 /* copy the default resource map from node 0 */
501 for(i = 0; i < sizeof(pci_reg)/sizeof(pci_reg[0]); i++) {
505 value = pci_read_config32(NODE_MP(0), reg);
506 pci_write_config32(NODE_MP(7), reg, value);
509 print_spew("done\r\n");
514 static void setup_uniprocessor(void)
516 print_spew("Enabling UP settings\r\n");
517 #if CONFIG_LOGICAL_CPUS==1
518 unsigned tmp = (pci_read_config32(NODE_MC(0), 0xe8) >> 12) & 3;
524 struct setup_smp_result {
529 #if CONFIG_MAX_CPUS > 2
530 static int optimize_connection_group(const u8 *opt_conn, int num) {
533 for(i=0; i<num; i+=2) {
534 needs_reset = optimize_connection(
535 NODE_HT(opt_conn[i*2]), 0x80 + link_to_register(link_connection(opt_conn[i*2],opt_conn[i*2+1])),
536 NODE_HT(opt_conn[i*2+1]), 0x80 + link_to_register(link_connection(opt_conn[i*2+1],opt_conn[i*2])) );
542 #if CONFIG_MAX_CPUS > 1
543 static struct setup_smp_result setup_smp(void)
545 struct setup_smp_result result;
549 result.needs_reset = 0;
551 print_spew("Enabling SMP settings\r\n");
553 setup_row_local(0, 0); // it will update the broadcast RT
556 byte = (val>>16) & 0xfe;
557 if(byte<0x2) { // no coherent connection so get out.
562 /* Setup and check a temporary connection to node 1 */
565 byte = get_linkn_first(byte);
566 setup_row_direct(0,1, byte);
567 setup_temp_row(0, 1);
569 if (!check_connection(7)) {
570 print_spew("No connection to Node 1.\r\n");
571 setup_uniprocessor(); /* and get up working */
576 /* We found 2 nodes so far */
577 val = pci_read_config32(NODE_HT(7), 0x6c);
578 byte = (val>2) & 0x3; // get default link on 7 to 0
579 setup_row_local(7,1);
580 setup_remote_row_direct(1, 0, byte);
582 #if CONFIG_MAX_CPUS>4
584 byte = (val>>16) & 0xfe;
585 byte = get_linkn_last_count(byte);
586 if((byte>>2)==3) { // Oh! we need to treat it as cpu2.
588 byte = (val>>16) & 0xfe;
589 byte = get_linkn_last(byte);
590 setup_row_direct(0,1, byte);
591 setup_temp_row(0, 1);
593 if (!check_connection(7)) {
594 print_spew("No connection to Node 1.\r\n");
595 setup_uniprocessor(); /* and get up working */
600 /* We found 2 nodes so far */
601 val = pci_read_config32(NODE_HT(7), 0x6c);
602 byte = (val>2) & 0x3; // get default link on 7 to 0
603 setup_row_local(7,1);
604 setup_remote_row_direct(1, 0, byte);
608 setup_remote_node(1); /* Setup the regs on the remote node */
609 rename_temp_node(1); /* Rename Node 7 to Node 1 */
610 enable_routing(1); /* Enable routing on Node 1 */
612 // don't need and it is done by clear_dead_links
616 result.needs_reset = optimize_connection(
617 NODE_HT(0), 0x80 + link_to_register(link_connection(0,1)),
618 NODE_HT(1), 0x80 + link_to_register(link_connection(1,0)) );
620 #if CONFIG_MAX_CPUS > 2
623 /* Setup and check temporary connection from Node 0 to Node 2 */
625 byte = ((val>>16) & 0xfe) - link_connection(0,1);
626 byte = get_linkn_last_count(byte);
629 if((byte>>2)==0) { // We should have two coherent for 4p and above
633 byte &= 3; // bit [3,2] is count-1
634 setup_row_direct(0, 2, byte);
635 setup_temp_row(0, 2);
637 if (!check_connection(7) ) {
638 print_spew("No connection to Node 2.\r\n");
643 /* We found 3 nodes so far. Now setup a temporary
644 * connection from node 0 to node 3 via node 1
646 setup_temp_row(0,1); /* temp. link between nodes 0 and 1 */
647 /* here should setup_row_direct(1,3) at first, before that we should find the link in cpu 1 to 3*/
649 byte = ((val>>16) & 0xfe) - link_connection(1,0);
650 byte = get_linkn_first(byte);
651 setup_row_direct(1,3,byte);
652 setup_temp_row(1,3); /* temp. link between nodes 1 and 3 */
654 if (!check_connection(7)) {
655 print_spew("No connection to Node 3.\r\n");
660 /* We found 4 nodes so far. Now setup all nodes for 4p */
661 /* for indirect we will use clockwise routing */
663 static const u8 conn4_1[] = {
668 static const u8 conn4_1[] = {
674 setup_row_indirect_group(conn4_1, sizeof(conn4_1)/sizeof(conn4_1[0]));
678 val = pci_read_config32(NODE_HT(7), 0x6c);
679 byte = (val>2) & 0x3; // get default link on 7 to 0
681 setup_row_local(7,2);
682 setup_remote_row_direct(2, 0, byte);
683 setup_remote_node(2); /* Setup the regs on the remote node */
685 rename_temp_node(2); /* Rename Node 7 to Node 2 */
686 enable_routing(2); /* Enable routing on Node 2 */
692 val = pci_read_config32(NODE_HT(7), 0x6c);
693 byte = (val>2) & 0x3; // get default link on 7 to 0
695 setup_row_local(7,3);
696 setup_remote_row_direct(3, 1, byte);
697 setup_remote_node(3); /* Setup the regs on the remote node */
700 enable_routing(3); /* enable routing on node 3 (temp.) */
702 /* We need to init link between 2, and 3 direct link */
704 byte = ((val>>16) & 0xfe) - link_connection(2,0);
705 byte = get_linkn_last_count(byte);
706 #if CONFIG_MAX_CPUS>4
707 // We need to find out which link it so CPU3
708 // methods is try to access another 7 actully it is cpu4
709 if((byte>>2)==2) { // one to CPU3, one to cpu0, one to CPU4
711 setup_row_direct(2, 4, byte);
712 setup_temp_row(2, 4);
714 if (check_connection(7)) { // so the link is to CPU4
715 //We need to re compute it
717 byte = (val>>16) & 0xfe;
718 byte = get_linkn_first(byte);
722 setup_row_direct(2,3, byte & 0x3);
725 byte = ((val>>16) & 0xfe) - link_connection(3,1);
726 byte = get_linkn_last_count(byte);
727 #if CONFIG_MAX_CPUS>4
728 // We need to find out which link it so CPU2
729 // methods is try to access another 7 actully it is cpu5
730 if((byte>>2)==2) { // one to CPU2, one to cpu1, one to CPU5
733 setup_row_direct(3, 5, byte);
734 setup_temp_row(3, 5);
736 if (check_connection(7)) { // so the link is to CPU5
737 //We need to re compute it
739 byte = (val>>16) & 0xfe;
740 byte = get_linkn_first(byte);
744 setup_row_direct(3,2, byte & 0x3);
746 /* Set indirect connection to 0, and 1 for indirect we will use clockwise routing */
748 static const u8 conn4_2[] = {
753 static const u8 conn4_2[] = {
759 setup_row_indirect_group(conn4_2, sizeof(conn4_2)/sizeof(conn4_2[0]));
761 // We need to do sth to reverse work for setup_temp_row (0,1) (1,3)
763 // it will be done by clear_dead_links
768 /* optimize physical connections - by LYH */
769 static const u8 opt_conn4[] = {
775 result.needs_reset = optimize_connection_group(opt_conn4, sizeof(opt_conn4)/sizeof(opt_conn4[0]));
777 #endif /* CONFIG_MAX_CPUS > 2 */
779 #if CONFIG_MAX_CPUS > 4
782 /* Setup and check temporary connection from Node 0 to Node 4 via 2 */
784 byte = ((val>>16) & 0xfe) - link_connection(2,3) - link_connection(2,0);
785 byte = get_linkn_last_count(byte);
788 if((byte>>2)==0) { // We should have two coherent for 4p and above
792 byte &= 3; // bit [3,2] is count-1
793 setup_row_direct(2, 4, byte);
795 /* Setup and check temporary connection from Node 0 to Node 4 through 2*/
796 for(byte=0; byte<4; byte+=2) {
797 setup_temp_row(byte,byte+2); /* temp. link between nodes 0 and 2 */
800 if (!check_connection(7) ) {
801 print_spew("No connection to Node 4.\r\n");
806 /* Setup and check temporary connection from Node 0 to Node 5 through 1, 3*/
809 byte = ((val>>16) & 0xfe) - link_connection(3,2) - link_connection(3,1);
810 byte = get_linkn_last_count(byte);
812 if((byte>>2)==0) { // We should have two coherent for 4p and above
817 byte &= 3; // bit [3,2] is count-1
818 setup_row_direct(3, 5, byte);
820 setup_temp_row(0,1); /* temp. link between nodes 0 and 1 */
821 for(byte=0; byte<4; byte+=2) {
822 setup_temp_row(byte+1,byte+3); /* temp. link between nodes 1 and 3 */
825 if (!check_connection(7)) {
826 print_spew("No connection to Node 5.\r\n");
831 /* We found 6 nodes so far. Now setup all nodes for 6p */
832 static const u8 conn6_1[] = {
850 setup_row_indirect_group(conn6_1, sizeof(conn6_1)/sizeof(conn6_1[0]));
853 for(byte=0; byte<4; byte+=2) {
854 setup_temp_row(byte,byte+2);
856 val = pci_read_config32(NODE_HT(7), 0x6c);
857 byte = (val>2) & 0x3; // get default link on 7 to 0
859 setup_row_local(7,4);
860 setup_remote_row_direct(4, 2, byte);
861 setup_remote_node(4); /* Setup the regs on the remote node */
866 for(byte=0; byte<4; byte+=2) {
867 setup_temp_row(byte+1,byte+3);
870 val = pci_read_config32(NODE_HT(7), 0x6c);
871 byte = (val>2) & 0x3; // get default link on 7 to 0
872 setup_row_local(7,5);
873 setup_remote_row_direct(5, 3, byte);
874 setup_remote_node(5); /* Setup the regs on the remote node */
877 enable_routing(5); /* enable routing on node 5 (temp.) */
880 /* We need to init link between 4, and 5 direct link */
882 byte = ((val>>16) & 0xfe) - link_connection(4,2);
883 byte = get_linkn_last_count(byte);
884 #if CONFIG_MAX_CPUS>4
885 // We need to find out which link it so CPU5
886 // methods is try to access another 7 actully it is cpu6
887 if((byte>>2)==2) { // one to CPU5, one to cpu2, one to CPU6
890 setup_row_direct(4, 6, byte);
891 setup_temp_row(4, 6);
893 if (check_connection(7)) { // so the link is to CPU4
894 //We need to re compute it
896 byte = (val>>16) & 0xfe;
897 byte = get_linkn_first(byte);
901 setup_row_direct(4,5, byte & 0x3);
904 byte = ((val>>16) & 0xfe) - link_connection(5,3);
905 byte = get_linkn_last_count(byte);
906 #if CONFIG_MAX_CPUS>4
907 // We need to find out which link it so CPU4
908 // methods is try to access another 7 actully it is cpu7
909 if((byte>>2)==2) { // one to CPU4, one to cpu3, one to CPU7
913 setup_row_direct(5, 7, byte);
914 setup_temp_row(5, 7);
916 if (check_connection(7)) { // so the link is to CPU5
917 //We need to re compute it
919 byte = (val>>16) & 0xfe;
920 byte = get_linkn_first(byte);
924 setup_row_direct(5,4, byte & 0x3);
925 #endif // !CROSS_BAR_47_56
927 /* Set indirect connection to 0, to 3 for indirect we will use clockwise routing */
928 static const u8 conn6_2[] = {
948 setup_row_indirect_group(conn6_2, sizeof(conn6_2)/sizeof(conn6_2[0]));
950 // We need to do sth about reverse about setup_temp_row (0,1), (2,4), (1, 3), (3,5)
951 // It will be done by clear_dead_links
952 for(byte=0; byte<4; byte++) {
953 clear_temp_row(byte);
957 /* optimize physical connections - by LYH */
958 static const uint8_t opt_conn6[] ={
965 result.needs_reset = optimize_connection_group(opt_conn6, sizeof(opt_conn6)/sizeof(opt_conn6[0]));
968 #endif /* CONFIG_MAX_CPUS > 4 */
970 #if CONFIG_MAX_CPUS >6
973 /* Setup and check temporary connection from Node 0 to Node 6 via 2 and 4 to 7 */
976 byte = ((val>>16) & 0xfe) - link_connection(4,5) - link_connection(4,2);
978 byte = ((val>>16) & 0xfe) - link_connection(4,2);
980 byte = get_linkn_last_count(byte); // Max link to 6
981 if((byte>>2)==0) { // We should have two coherent for 8p and above
985 byte &= 3; // bit [3,2] is count-1
986 setup_row_direct(4, 6, byte);
988 /* Setup and check temporary connection from Node 0 to Node 6 through 2, and 4*/
989 for(byte=0; byte<6; byte+=2) {
990 setup_temp_row(byte,byte+2); /* temp. link between nodes 0 and 2 */
993 if (!check_connection(7) ) {
994 print_spew("No connection to Node 6.\r\n");
999 /* Setup and check temporary connection from Node 0 to Node 5 through 1, 3, 5*/
1002 byte = ((val>>16) & 0xfe) - link_connection(5,4) - link_connection(5,3);
1003 byte = get_linkn_first(byte);
1004 setup_row_direct(5, 7, byte);
1006 setup_temp_row(0,1); /* temp. link between nodes 0 and 1 */
1007 for(byte=0; byte<6; byte+=2) {
1008 setup_temp_row(byte+1,byte+3); /* temp. link between nodes 1 and 3 */
1012 byte = ((val>>16) & 0xfe) - link_connection(4,2) ;
1013 byte = get_linkn_first(byte); // min link to 7
1014 setup_row_direct(4, 7, byte);
1016 /* Setup and check temporary connection from Node 0 to Node 7 through 2, and 4*/
1017 for(byte=0; byte<4; byte+=2) {
1018 setup_temp_row(byte,byte+2); /* temp. link between nodes 0 and 2 */
1020 setup_temp_row(4, 7);
1024 if (!check_connection(7)) {
1025 print_spew("No connection to Node 7.\r\n");
1031 /* We found 8 nodes so far. Now setup all nodes for 8p */
1032 static const u8 conn8_1[] = {
1033 #if !CROSS_BAR_47_56
1055 setup_row_indirect_group(conn8_1,sizeof(conn8_1)/sizeof(conn8_1[0]));
1057 for(byte=0; byte<6; byte+=2) {
1058 setup_temp_row(byte,byte+2);
1060 val = pci_read_config32(NODE_HT(7), 0x6c);
1061 byte = (val>2) & 0x3; // get default link on 7 to 0
1063 setup_row_local(7,6);
1064 setup_remote_row_direct(6, 4, byte);
1065 setup_remote_node(6); /* Setup the regs on the remote node */
1066 rename_temp_node(6);
1069 #if !CROSS_BAR_47_56
1070 setup_temp_row(0,1);
1071 for(byte=0; byte<6; byte+=2) {
1072 setup_temp_row(byte+1,byte+3);
1075 val = pci_read_config32(NODE_HT(7), 0x6c);
1076 byte = (val>2) & 0x3; // get default link on 7 to 0
1077 setup_row_local(7,7);
1078 setup_remote_row_direct(7, 5, byte);
1081 for(byte=0; byte<4; byte+=2) {
1082 setup_temp_row(byte,byte+2);
1084 setup_temp_row(4,7);
1085 val = pci_read_config32(NODE_HT(7), 0x6c);
1086 byte = (val>2) & 0x3; // get default link on 7 to 0
1088 setup_row_local(7,7);
1089 setup_remote_row_direct(7, 4, byte);
1090 // till now 4-7, 7-4 done.
1092 setup_remote_node(7); /* Setup the regs on the remote node */
1093 // rename_temp_node(7);
1094 enable_routing(7); /* enable routing on node 5 (temp.) */
1097 //here init 5, 6 and 5, 7
1098 /* Setup and check temporary connection from Node 0 to Node 5 through 1, 3, 5*/
1101 byte = ((val>>16) & 0xfe) - link_connection(5,3);
1102 byte = get_linkn_last(byte);
1103 setup_row_direct(5, 7, byte);
1105 setup_temp_row(0,1); /* temp. link between nodes 0 and 1 */
1106 for(byte=0; byte<6; byte+=2) {
1107 setup_temp_row(byte+1,byte+3); /* temp. link between nodes 1 and 3 */
1110 if (!check_connection(7)) {
1111 // We need to recompute link to 7
1113 byte = ((val>>16) & 0xfe) - link_connection(5,3);
1114 byte = get_linkn_first(byte);
1116 byte &= 3; // bit [3,2] is count-1
1117 setup_row_direct(5, 7, byte);
1119 setup_temp_row(0,1); /* temp. link between nodes 0 and 1 */
1120 for(byte=0; byte<6; byte+=2) {
1121 setup_temp_row(byte+1,byte+3); /* temp. link between nodes 1 and 3 */
1124 setup_temp_row(5,7);
1126 check_connection(7);
1128 val = pci_read_config32(NODE_HT(7), 0x6c);
1129 byte = (val>2) & 0x3; // get default link on 7 to 0
1130 // setup_row_local(7,7);
1131 setup_remote_row_direct(7, 5, byte);
1132 //Till now 57, 75 done
1136 byte = ((val>>16) & 0xfe) - link_connection(5,3) - link_connection(5,7);
1137 byte = get_linkn_first(byte);
1138 setup_row_direct(5, 6, byte);
1142 byte = ((val>>16) & 0xfe) - link_connection(6,4);
1143 byte = get_linkn_last(byte);
1144 setup_row_direct(6, 7, byte);
1146 for(byte=0; byte<6; byte+=2) {
1147 setup_temp_row(byte,byte+2); /* temp. link between nodes 0 and 2 */
1149 setup_temp_row(6,7);
1151 if (!check_connection(7)) {
1152 // We need to recompute link to 7
1154 byte = ((val>>16) & 0xfe) - link_connection(6,4);
1155 byte = get_linkn_first(byte);
1157 setup_row_direct(6, 7, byte);
1159 for(byte=0; byte<6; byte+=2) {
1160 setup_temp_row(byte,byte+2); /* temp. link between nodes 0 and 2 */
1163 setup_temp_row(6,7);
1164 check_connection(7);
1166 val = pci_read_config32(NODE_HT(7), 0x6c);
1167 byte = (val>2) & 0x3; // get default link on 7 to 0
1168 // setup_row_local(7,7);
1169 setup_remote_row_direct(7, 6, byte);
1170 //Till now 67, 76 done
1174 byte = ((val>>16) & 0xfe) - link_connection(6,4) - link_connection(6,7);
1175 byte = get_linkn_first(byte);
1176 setup_row_direct(6, 5, byte);
1180 #if !CROSS_BAR_47_56
1181 /* We need to init link between 6, and 7 direct link */
1183 byte = ((val>>16) & 0xfe) - link_connection(6,4);
1184 byte = get_linkn_first(byte);
1185 setup_row_direct(6,7, byte & 0x3);
1188 byte = ((val>>16) & 0xfe) - link_connection(7,5);
1189 byte = get_linkn_first(byte);
1190 setup_row_direct(7,6, byte & 0x3);
1193 /* Set indirect connection to 0, to 3 for indirect we will use clockwise routing */
1194 static const u8 conn8_2[] = {
1195 #if !CROSS_BAR_47_56
1196 0, 7, 1, // restore it
1213 0, 7, 2, 0, // restore it
1231 setup_row_indirect_group(conn8_2, sizeof(conn8_2)/sizeof(conn8_2[0]));
1233 static const uint8_t opt_conn8[] ={
1242 /* optimize physical connections - by LYH */
1243 result.needs_reset = optimize_connection_group(opt_conn8, sizeof(opt_conn6)/sizeof(opt_conn8[0]));
1245 #endif /* CONFIG_MAX_CPUS > 6 */
1247 print_debug_hex8(result.nodes);
1248 print_debug(" nodes initialized.\r\n");
1253 static unsigned verify_mp_capabilities(unsigned nodes)
1255 unsigned node, row, mask;
1259 mask=0x06; /* BigMPCap */
1260 } else if (nodes == 2) {
1261 mask=0x02; /* MPCap */
1263 mask=0x00; /* Non SMP */
1266 for (node=0; node<nodes; node++) {
1267 if ((pci_read_config32(NODE_MC(node), 0xe8) & mask) != mask) {
1276 /* one of our nodes is not mp capable */
1278 print_err("One of the CPUs is not MP capable. Going back to UP\r\n");
1283 static void clear_dead_routes(unsigned nodes)
1287 #if CONFIG_MAX_CPUS>6
1288 if(nodes==8) return;// don't touch (7,7)
1294 for(node = 7; node >= 0; node--) {
1295 for(row = 7; row >= last_row; row--) {
1296 fill_row(node, row, DEFAULT);
1301 static void coherent_ht_finalize(unsigned nodes)
1306 /* set up cpu count and node count and enable Limit
1307 * Config Space Range for all available CPUs.
1308 * Also clear non coherent hypertransport bus range
1309 * registers on Hammer A0 revision.
1312 print_spew("coherent_ht_finalize\r\n");
1313 rev_a0 = is_cpu_rev_a0();
1314 for (node = 0; node < nodes; node++) {
1317 dev = NODE_HT(node);
1319 /* Set the Total CPU and Node count in the system */
1320 val = pci_read_config32(dev, 0x60);
1321 val &= (~0x000F0070);
1322 val |= ((nodes-1)<<16)|((nodes-1)<<4);
1323 pci_write_config32(dev, 0x60, val);
1325 /* Only respond to real cpu pci configuration cycles
1326 * and optimize the HT settings
1328 val=pci_read_config32(dev, 0x68);
1329 val &= ~((HTTC_BUF_REL_PRI_MASK << HTTC_BUF_REL_PRI_SHIFT) |
1330 (HTTC_MED_PRI_BYP_CNT_MASK << HTTC_MED_PRI_BYP_CNT_SHIFT) |
1331 (HTTC_HI_PRI_BYP_CNT_MASK << HTTC_HI_PRI_BYP_CNT_SHIFT));
1332 val |= HTTC_LIMIT_CLDT_CFG |
1333 (HTTC_BUF_REL_PRI_8 << HTTC_BUF_REL_PRI_SHIFT) |
1335 (3 << HTTC_MED_PRI_BYP_CNT_SHIFT) |
1336 (3 << HTTC_HI_PRI_BYP_CNT_SHIFT);
1337 pci_write_config32(dev, 0x68, val);
1340 print_spew("shit it is an old cup\n");
1341 pci_write_config32(dev, 0x94, 0);
1342 pci_write_config32(dev, 0xb4, 0);
1343 pci_write_config32(dev, 0xd4, 0);
1347 print_spew("done\r\n");
1350 static int apply_cpu_errata_fixes(unsigned nodes, int needs_reset)
1353 for(node = 0; node < nodes; node++) {
1356 dev = NODE_MC(node);
1357 if (is_cpu_pre_c0()) {
1360 * Limit the number of downstream posted requests to 1
1362 cmd = pci_read_config32(dev, 0x70);
1363 if ((cmd & (3 << 0)) != 2) {
1366 pci_write_config32(dev, 0x70, cmd );
1369 cmd = pci_read_config32(dev, 0x7c);
1370 if ((cmd & (3 << 4)) != 0) {
1373 pci_write_config32(dev, 0x7c, cmd );
1376 /* Clock Power/Timing Low */
1377 cmd = pci_read_config32(dev, 0xd4);
1378 if (cmd != 0x000D0001) {
1380 pci_write_config32(dev, 0xd4, cmd);
1381 needs_reset = 1; /* Needed? */
1388 * Set Clk Ramp Hystersis to 7
1389 * Clock Power/Timing Low
1391 cmd_ref = 0x04e20707; /* Registered */
1392 cmd = pci_read_config32(dev, 0xd4);
1393 if(cmd != cmd_ref) {
1394 pci_write_config32(dev, 0xd4, cmd_ref );
1395 needs_reset = 1; /* Needed? */
1402 static int optimize_link_read_pointers(unsigned nodes, int needs_reset)
1405 for(node = 0; node < nodes; node = node + 1) {
1406 device_t f0_dev, f3_dev;
1407 uint32_t cmd_ref, cmd;
1409 f0_dev = NODE_HT(node);
1410 f3_dev = NODE_MC(node);
1411 cmd_ref = cmd = pci_read_config32(f3_dev, 0xdc);
1412 for(link = 0; link < 3; link = link + 1) {
1415 /* This works on an Athlon64 because unimplemented links return 0 */
1416 reg = 0x98 + (link * 0x20);
1417 link_type = pci_read_config32(f0_dev, reg);
1418 if (link_type & LinkConnected) {
1419 cmd &= 0xff << (link *8);
1420 /* FIXME this assumes the device on the other
1421 * side is an AMD device
1423 cmd |= 0x25 << (link *8);
1426 if (cmd != cmd_ref) {
1427 pci_write_config32(f3_dev, 0xdc, cmd);
1434 static int setup_coherent_ht_domain(void)
1436 struct setup_smp_result result;
1438 result.needs_reset = 0;
1440 enable_bsp_routing();
1442 #if CONFIG_MAX_CPUS > 1
1443 result = setup_smp();
1445 result.nodes = verify_mp_capabilities(result.nodes);
1446 clear_dead_routes(result.nodes);
1447 if (result.nodes == 1) {
1448 setup_uniprocessor();
1450 coherent_ht_finalize(result.nodes);
1451 result.needs_reset = apply_cpu_errata_fixes(result.nodes, result.needs_reset);
1452 result.needs_reset = optimize_link_read_pointers(result.nodes, result.needs_reset);
1453 return result.needs_reset;