1 /* coherent hypertransport initialization for AMD64
3 * written by Stefan Reinauer <stepan@openbios.org>
4 * (c) 2003-2004 by SuSE Linux AG
6 * (c) 2004 Tyan Computer
7 * 2004.12 yhlu added support to create support to create routing table dynamically.
8 * it also support 8 ways too. (8 ways ladder or 8 ways crossbar)
9 * This code is licensed under GPL.
13 * This algorithm assumes a grid configuration as follows:
16 * org. : 1x1 2x1 2x2 2x3 2x4
20 #include <device/pci_def.h>
21 #include <device/pci_ids.h>
22 #include <device/hypertransport_def.h>
23 #include "arch/romcc_io.h"
26 /* when generating a temporary row configuration we
27 * don't want broadcast to be enabled for that node.
30 #define enable_bsp_routing() enable_routing(0)
32 #define NODE_HT(x) PCI_DEV(0,24+x,0)
33 #define NODE_MP(x) PCI_DEV(0,24+x,1)
34 #define NODE_MC(x) PCI_DEV(0,24+x,3)
36 #define DEFAULT 0x00010101 /* default row entry */
45 static u8 link_to_register(int ldt)
48 * [ 0: 3] Request Route
49 * [0] Route to this node
55 if (ldt&0x08) return 0x40;
56 if (ldt&0x04) return 0x20;
57 if (ldt&0x02) return 0x00;
59 /* we should never get here */
60 print_spew("Unknown Link\n");
64 static void disable_probes(void)
66 /* disable read/write/fill probes for uniprocessor setup
67 * they don't make sense if only one cpu is available
70 /* Hypetransport Transaction Control Register
72 * [ 0: 0] Disable read byte probe
74 * 1 = Probes not issued
75 * [ 1: 1] Disable Read Doubleword probe
77 * 1 = Probes not issued
78 * [ 2: 2] Disable write byte probes
80 * 1 = Probes not issued
81 * [ 3: 3] Disable Write Doubleword Probes
83 * 1 = Probes not issued.
84 * [10:10] Disable Fill Probe
85 * 0 = Probes issued for cache fills
86 * 1 = Probes not issued for cache fills.
91 print_spew("Disabling read/write/fill probes for UP... ");
93 val=pci_read_config32(NODE_HT(0), 0x68);
94 val |= (1<<10)|(1<<9)|(1<<8)|(1<<4)|(1<<3)|(1<<2)|(1<<1)|(1 << 0);
95 pci_write_config32(NODE_HT(0), 0x68, val);
97 print_spew("done.\r\n");
101 static void enable_routing(u8 node)
105 /* HT Initialization Control Register
107 * [ 0: 0] Routing Table Disable
108 * 0 = Packets are routed according to routing tables
109 * 1 = Packets are routed according to the default link field
110 * [ 1: 1] Request Disable (BSP should clear this)
111 * 0 = Request packets may be generated
112 * 1 = Request packets may not be generated.
113 * [ 3: 2] Default Link (Read-only)
117 * 11 = CPU on same node
119 * - Scratch bit cleared by a cold reset
120 * [ 5: 5] BIOS Reset Detect
121 * - Scratch bit cleared by a cold reset
122 * [ 6: 6] INIT Detect
123 * - Scratch bit cleared by a warm or cold reset not by an INIT
127 /* Enable routing table */
128 print_spew("Enabling routing table for node ");
129 print_spew_hex8(node);
131 val=pci_read_config32(NODE_HT(node), 0x6c);
132 val &= ~((1<<1)|(1<<0));
133 pci_write_config32(NODE_HT(node), 0x6c, val);
135 print_spew(" done.\r\n");
138 static void fill_row(u8 node, u8 row, u32 value)
140 pci_write_config32(NODE_HT(node), 0x40+(row<<2), value);
142 static u32 get_row(u8 node, u8 row)
144 return pci_read_config32(NODE_HT(node), 0x40+(row<<2));
147 static int link_connection(u8 src, u8 dest)
149 /* we generate the needed link information from the rows
150 * by taking the Request Route of the according row.
153 return get_row(src, dest) & 0x0f;
157 #if CONFIG_MAX_CPUS > 1
159 static void rename_temp_node(u8 node)
163 print_spew("Renaming current temporary node to ");
164 print_spew_hex8(node);
166 val=pci_read_config32(NODE_HT(7), 0x60);
167 val &= (~7); /* clear low bits. */
168 val |= node; /* new node */
169 pci_write_config32(NODE_HT(7), 0x60, val);
171 print_spew(" done.\r\n");
174 static bool check_connection(u8 dest)
176 /* See if we have a valid connection to dest */
179 /* Verify that the coherent hypertransport link is
180 * established and actually working by reading the
181 * remode node's vendor/device id
183 val = pci_read_config32(NODE_HT(dest),0);
184 if(val != 0x11001022)
190 static unsigned read_freq_cap(device_t dev, unsigned pos)
192 /* Handle bugs in valid hypertransport frequency reporting */
196 freq_cap = pci_read_config16(dev, pos);
197 freq_cap &= ~(1 << HT_FREQ_VENDOR); /* Ignore Vendor HT frequencies */
199 id = pci_read_config32(dev, 0);
201 /* AMD 8131 Errata 48 */
202 if (id == (PCI_VENDOR_ID_AMD | (PCI_DEVICE_ID_AMD_8131_PCIX << 16))) {
203 freq_cap &= ~(1 << HT_FREQ_800Mhz);
205 /* AMD 8151 Errata 23 */
206 if (id == (PCI_VENDOR_ID_AMD | (PCI_DEVICE_ID_AMD_8151_SYSCTRL << 16))) {
207 freq_cap &= ~(1 << HT_FREQ_800Mhz);
209 /* AMD K8 Unsupported 1Ghz? */
210 if (id == (PCI_VENDOR_ID_AMD | (0x1100 << 16))) {
211 freq_cap &= ~(1 << HT_FREQ_1000Mhz);
216 static int optimize_connection(device_t node1, uint8_t link1, device_t node2, uint8_t link2)
218 static const uint8_t link_width_to_pow2[]= { 3, 4, 0, 5, 1, 2, 0, 0 };
219 static const uint8_t pow2_to_link_width[] = { 0x7, 4, 5, 0, 1, 3 };
220 uint16_t freq_cap1, freq_cap2, freq_cap, freq_mask;
221 uint8_t width_cap1, width_cap2, width_cap, width, old_width, ln_width1, ln_width2;
222 uint8_t freq, old_freq;
224 /* Set link width and frequency */
226 /* Initially assume everything is already optimized and I don't need a reset */
229 /* Get the frequency capabilities */
230 freq_cap1 = read_freq_cap(node1, link1 + PCI_HT_CAP_HOST_FREQ_CAP);
231 freq_cap2 = read_freq_cap(node2, link2 + PCI_HT_CAP_HOST_FREQ_CAP);
233 /* Calculate the highest possible frequency */
234 freq = log2(freq_cap1 & freq_cap2);
236 /* See if I am changing the link freqency */
237 old_freq = pci_read_config8(node1, link1 + PCI_HT_CAP_HOST_FREQ);
238 needs_reset |= old_freq != freq;
239 old_freq = pci_read_config8(node2, link2 + PCI_HT_CAP_HOST_FREQ);
240 needs_reset |= old_freq != freq;
242 /* Set the Calulcated link frequency */
243 pci_write_config8(node1, link1 + PCI_HT_CAP_HOST_FREQ, freq);
244 pci_write_config8(node2, link2 + PCI_HT_CAP_HOST_FREQ, freq);
246 /* Get the width capabilities */
247 width_cap1 = pci_read_config8(node1, link1 + PCI_HT_CAP_HOST_WIDTH);
248 width_cap2 = pci_read_config8(node2, link2 + PCI_HT_CAP_HOST_WIDTH);
250 /* Calculate node1's input width */
251 ln_width1 = link_width_to_pow2[width_cap1 & 7];
252 ln_width2 = link_width_to_pow2[(width_cap2 >> 4) & 7];
253 if (ln_width1 > ln_width2) {
254 ln_width1 = ln_width2;
256 width = pow2_to_link_width[ln_width1];
257 /* Calculate node1's output width */
258 ln_width1 = link_width_to_pow2[(width_cap1 >> 4) & 7];
259 ln_width2 = link_width_to_pow2[width_cap2 & 7];
260 if (ln_width1 > ln_width2) {
261 ln_width1 = ln_width2;
263 width |= pow2_to_link_width[ln_width1] << 4;
265 /* See if I am changing node1's width */
266 old_width = pci_read_config8(node1, link1 + PCI_HT_CAP_HOST_WIDTH + 1);
267 needs_reset |= old_width != width;
269 /* Set node1's widths */
270 pci_write_config8(node1, link1 + PCI_HT_CAP_HOST_WIDTH + 1, width);
272 /* Calculate node2's width */
273 width = ((width & 0x70) >> 4) | ((width & 0x7) << 4);
275 /* See if I am changing node2's width */
276 old_width = pci_read_config8(node2, link2 + PCI_HT_CAP_HOST_WIDTH + 1);
277 needs_reset |= old_width != width;
279 /* Set node2's widths */
280 pci_write_config8(node2, link2 + PCI_HT_CAP_HOST_WIDTH + 1, width);
285 static void setup_row_local(u8 source, u8 row) // source will be 7 when it is for temp use
290 for(linkn = 0; linkn<3; linkn++) {
293 regpos = 0x98 + 0x20 * linkn;
294 reg = pci_read_config32(NODE_HT(source), regpos);
295 if ((reg & 0x17) != 3) continue; // it is not conherent or not connected
300 fill_row(source,row, val);
303 static void setup_row_direct(u8 source, u8 dest, u8 linkn)
308 val |= 1<<(linkn+1+8); //for direct connect response route should equal to request table
309 fill_row(source,dest, val);
311 static uint8_t get_linkn_first(uint8_t byte)
313 if(byte & 0x02) { byte = 0; }
314 else if(byte & 0x04) { byte = 1; }
315 else if(byte & 0x08) { byte = 2; }
318 static uint8_t get_linkn_last(uint8_t byte)
320 if(byte & 0x02) { byte &= 0x0f; byte |= 0x00; }
321 if(byte & 0x04) { byte &= 0x0f; byte |= 0x10; }
322 if(byte & 0x08) { byte &= 0x0f; byte |= 0x20; }
325 static uint8_t get_linkn_last_count(uint8_t byte)
328 if(byte & 0x02) { byte &= 0xcf; byte |= 0x00; byte+=0x40; }
329 if(byte & 0x04) { byte &= 0xcf; byte |= 0x10; byte+=0x40; }
330 if(byte & 0x08) { byte &= 0xcf; byte |= 0x20; byte+=0x40; }
334 #if CONFIG_MAX_CPUS>2
336 static void setup_row_indirect(u8 source, u8 dest, u8 gateway)
338 static void setup_row_indirect(u8 source, u8 dest, u8 gateway, u8 diff)
341 //for indirect connection, we need to compute the val from val_s(source, source), and val_g(source, gateway)
346 #warning "FIXME is it the way to set the RESPONSE TABLE for indirect?"
347 #warning "FIXME I don't know how to set BROADCAST TABLE for indirect, 1?"
348 val_s = get_row(source, source);
349 val_g = get_row(source, gateway);
355 if(((source&1)!=(dest &1)) && (val_s!=val) ) { // use another connect as response
357 #if CONFIG_MAX_CPUS>4
358 // Some node have two links left
360 byte = get_linkn_last_count(byte);
363 val_s-=link_connection(source, source-2); // - down
365 val_s-=link_connection(source, source+2); // - up
369 val |= (1<<16) | (val_s<<8);
371 val = val_g; // all the same to gateway
374 if(diff && (val_s!=val) ) { // use another connect as response
376 #if CONFIG_MAX_CPUS>4
377 // Some node have two links left
378 // don't worry we only have (2, (3 as source need to handle
380 byte = get_linkn_last_count(byte);
383 val_s-=link_connection(source, source-2); // -down
385 val_s-=link_connection(source, source+2); // -up
389 val |= (1<<16) | (val_s<<8);
391 val = val_g; // all the same to gateway
396 fill_row(source, dest, val);
399 static void setup_row_indirect_group(const u8 *conn, int num)
402 for(i=0; i<num; i+=4) {
404 setup_row_indirect(conn[i*3], conn[i*3+1],conn[i*3+2]);
406 setup_row_indirect(conn[i*4], conn[i*4+1],conn[i*4+2], conn[i*4+3]);
413 static void setup_temp_row(u8 source, u8 dest)
415 // copy val from (source, dest) to (source,7)
416 fill_row(source,7,get_row(source,dest));
419 static void clear_temp_row(u8 source)
421 fill_row(source, 7, DEFAULT);
424 static void setup_remote_row_direct(u8 source, u8 dest, u8 linkn)
429 val |= 1<<(linkn+1+8); //for direct connect response route should equal to request table
430 fill_row(7,dest, val );
432 #if CONFIG_MAX_CPUS>2
433 static void setup_remote_row_indirect(u8 source, u8 dest, u8 gateway)
435 //for indirect connection, we need to compute the val from val_s(source, source), and val_g(source, gateway)
440 val_s = get_row(7, source);
441 val_g = get_row(7, gateway);
446 if(val_s!=val) { // use another connect as response
448 val |= 1 | (val_s<<8);
450 val = val_g; // all the same to gateway
453 fill_row(7, dest, val);
458 static void setup_remote_node(u8 node)
460 static const uint8_t pci_reg[] = {
461 0x44, 0x4c, 0x54, 0x5c, 0x64, 0x6c, 0x74, 0x7c,
462 0x40, 0x48, 0x50, 0x58, 0x60, 0x68, 0x70, 0x78,
463 0x84, 0x8c, 0x94, 0x9c, 0xa4, 0xac, 0xb4, 0xbc,
464 0x80, 0x88, 0x90, 0x98, 0xa0, 0xa8, 0xb0, 0xb8,
465 0xc4, 0xcc, 0xd4, 0xdc,
466 0xc0, 0xc8, 0xd0, 0xd8,
467 0xe0, 0xe4, 0xe8, 0xec,
471 print_spew("setup_remote_node: ");
473 /* copy the default resource map from node 0 */
474 for(i = 0; i < sizeof(pci_reg)/sizeof(pci_reg[0]); i++) {
478 value = pci_read_config32(NODE_MP(0), reg);
479 pci_write_config32(NODE_MP(7), reg, value);
482 print_spew("done\r\n");
487 static void setup_uniprocessor(void)
489 print_spew("Enabling UP settings\r\n");
493 struct setup_smp_result {
498 #if CONFIG_MAX_CPUS > 2
499 static int optimize_connection_group(const u8 *opt_conn, int num) {
502 for(i=0; i<num; i+=2) {
503 needs_reset = optimize_connection(
504 NODE_HT(opt_conn[i*2]), 0x80 + link_to_register(link_connection(opt_conn[i*2],opt_conn[i*2+1])),
505 NODE_HT(opt_conn[i*2+1]), 0x80 + link_to_register(link_connection(opt_conn[i*2+1],opt_conn[i*2])) );
511 #if CONFIG_MAX_CPUS > 1
512 static struct setup_smp_result setup_smp(void)
514 struct setup_smp_result result;
518 result.needs_reset = 0;
520 print_spew("Enabling SMP settings\r\n");
522 setup_row_local(0, 0); // it will update the broadcast RT
525 byte = (val>>16) & 0xfe;
526 if(byte<0x2) { // no coherent connection so get out.
531 /* Setup and check a temporary connection to node 1 */
534 byte = get_linkn_first(byte);
535 setup_row_direct(0,1, byte);
536 setup_temp_row(0, 1);
538 if (!check_connection(7)) {
539 print_spew("No connection to Node 1.\r\n");
540 setup_uniprocessor(); /* and get up working */
545 /* We found 2 nodes so far */
546 val = pci_read_config32(NODE_HT(7), 0x6c);
547 byte = (val>2) & 0x3; // get default link on 7 to 0
548 setup_row_local(7,1);
549 setup_remote_row_direct(1, 0, byte);
551 #if CONFIG_MAX_CPUS>4
553 byte = (val>>16) & 0xfe;
554 byte = get_linkn_last_count(byte);
555 if((byte>>2)==3) { // Oh! we need to treat it as cpu2.
557 byte = (val>>16) & 0xfe;
558 byte = get_linkn_last(byte);
559 setup_row_direct(0,1, byte);
560 setup_temp_row(0, 1);
562 if (!check_connection(7)) {
563 print_spew("No connection to Node 1.\r\n");
564 setup_uniprocessor(); /* and get up working */
569 /* We found 2 nodes so far */
570 val = pci_read_config32(NODE_HT(7), 0x6c);
571 byte = (val>2) & 0x3; // get default link on 7 to 0
572 setup_row_local(7,1);
573 setup_remote_row_direct(1, 0, byte);
577 setup_remote_node(1); /* Setup the regs on the remote node */
578 rename_temp_node(1); /* Rename Node 7 to Node 1 */
579 enable_routing(1); /* Enable routing on Node 1 */
581 // don't need and it is done by clear_dead_links
585 result.needs_reset = optimize_connection(
586 NODE_HT(0), 0x80 + link_to_register(link_connection(0,1)),
587 NODE_HT(1), 0x80 + link_to_register(link_connection(1,0)) );
589 #if CONFIG_MAX_CPUS > 2
592 /* Setup and check temporary connection from Node 0 to Node 2 */
594 byte = ((val>>16) & 0xfe) - link_connection(0,1);
595 byte = get_linkn_last_count(byte);
598 if((byte>>2)==0) { // We should have two coherent for 4p and above
602 byte &= 3; // bit [3,2] is count-1
603 setup_row_direct(0, 2, byte);
604 setup_temp_row(0, 2);
606 if (!check_connection(7) ) {
607 print_spew("No connection to Node 2.\r\n");
612 /* We found 3 nodes so far. Now setup a temporary
613 * connection from node 0 to node 3 via node 1
615 setup_temp_row(0,1); /* temp. link between nodes 0 and 1 */
616 /* here should setup_row_direct(1,3) at first, before that we should find the link in cpu 1 to 3*/
618 byte = ((val>>16) & 0xfe) - link_connection(1,0);
619 byte = get_linkn_first(byte);
620 setup_row_direct(1,3,byte);
621 setup_temp_row(1,3); /* temp. link between nodes 1 and 3 */
623 if (!check_connection(7)) {
624 print_spew("No connection to Node 3.\r\n");
629 /* We found 4 nodes so far. Now setup all nodes for 4p */
630 /* for indirect we will use clockwise routing */
632 static const u8 conn4_1[] = {
637 static const u8 conn4_1[] = {
643 setup_row_indirect_group(conn4_1, sizeof(conn4_1)/sizeof(conn4_1[0]));
647 val = pci_read_config32(NODE_HT(7), 0x6c);
648 byte = (val>2) & 0x3; // get default link on 7 to 0
650 setup_row_local(7,2);
651 setup_remote_row_direct(2, 0, byte);
652 setup_remote_node(2); /* Setup the regs on the remote node */
654 rename_temp_node(2); /* Rename Node 7 to Node 2 */
655 enable_routing(2); /* Enable routing on Node 2 */
661 val = pci_read_config32(NODE_HT(7), 0x6c);
662 byte = (val>2) & 0x3; // get default link on 7 to 0
664 setup_row_local(7,3);
665 setup_remote_row_direct(3, 1, byte);
666 setup_remote_node(3); /* Setup the regs on the remote node */
669 enable_routing(3); /* enable routing on node 3 (temp.) */
671 /* We need to init link between 2, and 3 direct link */
673 byte = ((val>>16) & 0xfe) - link_connection(2,0);
674 byte = get_linkn_last_count(byte);
675 #if CONFIG_MAX_CPUS>4
676 // We need to find out which link it so CPU3
677 // methods is try to access another 7 actully it is cpu4
678 if((byte>>2)==2) { // one to CPU3, one to cpu0, one to CPU4
680 setup_row_direct(2, 4, byte);
681 setup_temp_row(2, 4);
683 if (check_connection(7)) { // so the link is to CPU4
684 //We need to re compute it
686 byte = (val>>16) & 0xfe;
687 byte = get_linkn_first(byte);
691 setup_row_direct(2,3, byte & 0x3);
694 byte = ((val>>16) & 0xfe) - link_connection(3,1);
695 byte = get_linkn_last_count(byte);
696 #if CONFIG_MAX_CPUS>4
697 // We need to find out which link it so CPU2
698 // methods is try to access another 7 actully it is cpu5
699 if((byte>>2)==2) { // one to CPU2, one to cpu1, one to CPU5
702 setup_row_direct(3, 5, byte);
703 setup_temp_row(3, 5);
705 if (check_connection(7)) { // so the link is to CPU5
706 //We need to re compute it
708 byte = (val>>16) & 0xfe;
709 byte = get_linkn_first(byte);
713 setup_row_direct(3,2, byte & 0x3);
715 /* Set indirect connection to 0, and 1 for indirect we will use clockwise routing */
717 static const u8 conn4_2[] = {
722 static const u8 conn4_2[] = {
728 setup_row_indirect_group(conn4_2, sizeof(conn4_2)/sizeof(conn4_2[0]));
730 // We need to do sth to reverse work for setup_temp_row (0,1) (1,3)
732 // it will be done by clear_dead_links
737 /* optimize physical connections - by LYH */
738 static const u8 opt_conn4[] = {
744 result.needs_reset = optimize_connection_group(opt_conn4, sizeof(opt_conn4)/sizeof(opt_conn4[0]));
746 #endif /* CONFIG_MAX_CPUS > 2 */
748 #if CONFIG_MAX_CPUS > 4
751 /* Setup and check temporary connection from Node 0 to Node 4 via 2 */
753 byte = ((val>>16) & 0xfe) - link_connection(2,3) - link_connection(2,0);
754 byte = get_linkn_last_count(byte);
757 if((byte>>2)==0) { // We should have two coherent for 4p and above
761 byte &= 3; // bit [3,2] is count-1
762 setup_row_direct(2, 4, byte);
764 /* Setup and check temporary connection from Node 0 to Node 4 through 2*/
765 for(byte=0; byte<4; byte+=2) {
766 setup_temp_row(byte,byte+2); /* temp. link between nodes 0 and 2 */
769 if (!check_connection(7) ) {
770 print_spew("No connection to Node 4.\r\n");
775 /* Setup and check temporary connection from Node 0 to Node 5 through 1, 3*/
778 byte = ((val>>16) & 0xfe) - link_connection(3,2) - link_connection(3,1);
779 byte = get_linkn_last_count(byte);
781 if((byte>>2)==0) { // We should have two coherent for 4p and above
786 byte &= 3; // bit [3,2] is count-1
787 setup_row_direct(3, 5, byte);
789 setup_temp_row(0,1); /* temp. link between nodes 0 and 1 */
790 for(byte=0; byte<4; byte+=2) {
791 setup_temp_row(byte+1,byte+3); /* temp. link between nodes 1 and 3 */
794 if (!check_connection(7)) {
795 print_spew("No connection to Node 5.\r\n");
800 /* We found 6 nodes so far. Now setup all nodes for 6p */
801 static const u8 conn6_1[] = {
819 setup_row_indirect_group(conn6_1, sizeof(conn6_1)/sizeof(conn6_1[0]));
822 for(byte=0; byte<4; byte+=2) {
823 setup_temp_row(byte,byte+2);
825 val = pci_read_config32(NODE_HT(7), 0x6c);
826 byte = (val>2) & 0x3; // get default link on 7 to 0
828 setup_row_local(7,4);
829 setup_remote_row_direct(4, 2, byte);
830 setup_remote_node(4); /* Setup the regs on the remote node */
835 for(byte=0; byte<4; byte+=2) {
836 setup_temp_row(byte+1,byte+3);
839 val = pci_read_config32(NODE_HT(7), 0x6c);
840 byte = (val>2) & 0x3; // get default link on 7 to 0
841 setup_row_local(7,5);
842 setup_remote_row_direct(5, 3, byte);
843 setup_remote_node(5); /* Setup the regs on the remote node */
846 enable_routing(5); /* enable routing on node 5 (temp.) */
849 /* We need to init link between 4, and 5 direct link */
851 byte = ((val>>16) & 0xfe) - link_connection(4,2);
852 byte = get_linkn_last_count(byte);
853 #if CONFIG_MAX_CPUS>4
854 // We need to find out which link it so CPU5
855 // methods is try to access another 7 actully it is cpu6
856 if((byte>>2)==2) { // one to CPU5, one to cpu2, one to CPU6
859 setup_row_direct(4, 6, byte);
860 setup_temp_row(4, 6);
862 if (check_connection(7)) { // so the link is to CPU4
863 //We need to re compute it
865 byte = (val>>16) & 0xfe;
866 byte = get_linkn_first(byte);
870 setup_row_direct(4,5, byte & 0x3);
873 byte = ((val>>16) & 0xfe) - link_connection(5,3);
874 byte = get_linkn_last_count(byte);
875 #if CONFIG_MAX_CPUS>4
876 // We need to find out which link it so CPU4
877 // methods is try to access another 7 actully it is cpu7
878 if((byte>>2)==2) { // one to CPU4, one to cpu3, one to CPU7
882 setup_row_direct(5, 7, byte);
883 setup_temp_row(5, 7);
885 if (check_connection(7)) { // so the link is to CPU5
886 //We need to re compute it
888 byte = (val>>16) & 0xfe;
889 byte = get_linkn_first(byte);
893 setup_row_direct(5,4, byte & 0x3);
894 #endif // !CROSS_BAR_47_56
896 /* Set indirect connection to 0, to 3 for indirect we will use clockwise routing */
897 static const u8 conn6_2[] = {
917 setup_row_indirect_group(conn6_2, sizeof(conn6_2)/sizeof(conn6_2[0]));
919 // We need to do sth about reverse about setup_temp_row (0,1), (2,4), (1, 3), (3,5)
920 // It will be done by clear_dead_links
921 for(byte=0; byte<4; byte++) {
922 clear_temp_row(byte);
926 /* optimize physical connections - by LYH */
927 static const uint8_t opt_conn6[] ={
934 result.needs_reset = optimize_connection_group(opt_conn6, sizeof(opt_conn6)/sizeof(opt_conn6[0]));
937 #endif /* CONFIG_MAX_CPUS > 4 */
939 #if CONFIG_MAX_CPUS >6
942 /* Setup and check temporary connection from Node 0 to Node 6 via 2 and 4 to 7 */
945 byte = ((val>>16) & 0xfe) - link_connection(4,5) - link_connection(4,2);
947 byte = ((val>>16) & 0xfe) - link_connection(4,2);
949 byte = get_linkn_last_count(byte); // Max link to 6
950 if((byte>>2)==0) { // We should have two coherent for 8p and above
954 byte &= 3; // bit [3,2] is count-1
955 setup_row_direct(4, 6, byte);
957 /* Setup and check temporary connection from Node 0 to Node 6 through 2, and 4*/
958 for(byte=0; byte<6; byte+=2) {
959 setup_temp_row(byte,byte+2); /* temp. link between nodes 0 and 2 */
962 if (!check_connection(7) ) {
963 print_spew("No connection to Node 6.\r\n");
968 /* Setup and check temporary connection from Node 0 to Node 5 through 1, 3, 5*/
971 byte = ((val>>16) & 0xfe) - link_connection(5,4) - link_connection(5,3);
972 byte = get_linkn_first(byte);
973 setup_row_direct(5, 7, byte);
975 setup_temp_row(0,1); /* temp. link between nodes 0 and 1 */
976 for(byte=0; byte<6; byte+=2) {
977 setup_temp_row(byte+1,byte+3); /* temp. link between nodes 1 and 3 */
981 byte = ((val>>16) & 0xfe) - link_connection(4,2) ;
982 byte = get_linkn_first(byte); // min link to 7
983 setup_row_direct(4, 7, byte);
985 /* Setup and check temporary connection from Node 0 to Node 7 through 2, and 4*/
986 for(byte=0; byte<4; byte+=2) {
987 setup_temp_row(byte,byte+2); /* temp. link between nodes 0 and 2 */
989 setup_temp_row(4, 7);
993 if (!check_connection(7)) {
994 print_spew("No connection to Node 7.\r\n");
1000 /* We found 8 nodes so far. Now setup all nodes for 8p */
1001 static const u8 conn8_1[] = {
1002 #if !CROSS_BAR_47_56
1024 setup_row_indirect_group(conn8_1,sizeof(conn8_1)/sizeof(conn8_1[0]));
1026 for(byte=0; byte<6; byte+=2) {
1027 setup_temp_row(byte,byte+2);
1029 val = pci_read_config32(NODE_HT(7), 0x6c);
1030 byte = (val>2) & 0x3; // get default link on 7 to 0
1032 setup_row_local(7,6);
1033 setup_remote_row_direct(6, 4, byte);
1034 setup_remote_node(6); /* Setup the regs on the remote node */
1035 rename_temp_node(6);
1038 #if !CROSS_BAR_47_56
1039 setup_temp_row(0,1);
1040 for(byte=0; byte<6; byte+=2) {
1041 setup_temp_row(byte+1,byte+3);
1044 val = pci_read_config32(NODE_HT(7), 0x6c);
1045 byte = (val>2) & 0x3; // get default link on 7 to 0
1046 setup_row_local(7,7);
1047 setup_remote_row_direct(7, 5, byte);
1050 for(byte=0; byte<4; byte+=2) {
1051 setup_temp_row(byte,byte+2);
1053 setup_temp_row(4,7);
1054 val = pci_read_config32(NODE_HT(7), 0x6c);
1055 byte = (val>2) & 0x3; // get default link on 7 to 0
1057 setup_row_local(7,7);
1058 setup_remote_row_direct(7, 4, byte);
1059 // till now 4-7, 7-4 done.
1061 setup_remote_node(7); /* Setup the regs on the remote node */
1062 // rename_temp_node(7);
1063 enable_routing(7); /* enable routing on node 5 (temp.) */
1066 //here init 5, 6 and 5, 7
1067 /* Setup and check temporary connection from Node 0 to Node 5 through 1, 3, 5*/
1070 byte = ((val>>16) & 0xfe) - link_connection(5,3);
1071 byte = get_linkn_last(byte);
1072 setup_row_direct(5, 7, byte);
1074 setup_temp_row(0,1); /* temp. link between nodes 0 and 1 */
1075 for(byte=0; byte<6; byte+=2) {
1076 setup_temp_row(byte+1,byte+3); /* temp. link between nodes 1 and 3 */
1079 if (!check_connection(7)) {
1080 // We need to recompute link to 7
1082 byte = ((val>>16) & 0xfe) - link_connection(5,3);
1083 byte = get_linkn_first(byte);
1085 byte &= 3; // bit [3,2] is count-1
1086 setup_row_direct(5, 7, byte);
1088 setup_temp_row(0,1); /* temp. link between nodes 0 and 1 */
1089 for(byte=0; byte<6; byte+=2) {
1090 setup_temp_row(byte+1,byte+3); /* temp. link between nodes 1 and 3 */
1093 setup_temp_row(5,7);
1095 check_connection(7);
1097 val = pci_read_config32(NODE_HT(7), 0x6c);
1098 byte = (val>2) & 0x3; // get default link on 7 to 0
1099 // setup_row_local(7,7);
1100 setup_remote_row_direct(7, 5, byte);
1101 //Till now 57, 75 done
1105 byte = ((val>>16) & 0xfe) - link_connection(5,3) - link_connection(5,7);
1106 byte = get_linkn_first(byte);
1107 setup_row_direct(5, 6, byte);
1111 byte = ((val>>16) & 0xfe) - link_connection(6,4);
1112 byte = get_linkn_last(byte);
1113 setup_row_direct(6, 7, byte);
1115 for(byte=0; byte<6; byte+=2) {
1116 setup_temp_row(byte,byte+2); /* temp. link between nodes 0 and 2 */
1118 setup_temp_row(6,7);
1120 if (!check_connection(7)) {
1121 // We need to recompute link to 7
1123 byte = ((val>>16) & 0xfe) - link_connection(6,4);
1124 byte = get_linkn_first(byte);
1126 setup_row_direct(6, 7, byte);
1128 for(byte=0; byte<6; byte+=2) {
1129 setup_temp_row(byte,byte+2); /* temp. link between nodes 0 and 2 */
1132 setup_temp_row(6,7);
1133 check_connection(7);
1135 val = pci_read_config32(NODE_HT(7), 0x6c);
1136 byte = (val>2) & 0x3; // get default link on 7 to 0
1137 // setup_row_local(7,7);
1138 setup_remote_row_direct(7, 6, byte);
1139 //Till now 67, 76 done
1143 byte = ((val>>16) & 0xfe) - link_connection(6,4) - link_connection(6,7);
1144 byte = get_linkn_first(byte);
1145 setup_row_direct(6, 5, byte);
1149 #if !CROSS_BAR_47_56
1150 /* We need to init link between 6, and 7 direct link */
1152 byte = ((val>>16) & 0xfe) - link_connection(6,4);
1153 byte = get_linkn_first(byte);
1154 setup_row_direct(6,7, byte & 0x3);
1157 byte = ((val>>16) & 0xfe) - link_connection(7,5);
1158 byte = get_linkn_first(byte);
1159 setup_row_direct(7,6, byte & 0x3);
1162 /* Set indirect connection to 0, to 3 for indirect we will use clockwise routing */
1163 static const u8 conn8_2[] = {
1164 #if !CROSS_BAR_47_56
1165 0, 7, 1, // restore it
1182 0, 7, 2, 0, // restore it
1200 setup_row_indirect_group(conn8_2, sizeof(conn8_2)/sizeof(conn8_2[0]));
1202 static const uint8_t opt_conn8[] ={
1211 /* optimize physical connections - by LYH */
1212 result.needs_reset = optimize_connection_group(opt_conn8, sizeof(opt_conn6)/sizeof(opt_conn8[0]));
1214 #endif /* CONFIG_MAX_CPUS > 6 */
1216 print_debug_hex8(result.nodes);
1217 print_debug(" nodes initialized.\r\n");
1222 static unsigned verify_mp_capabilities(unsigned nodes)
1224 unsigned node, row, mask;
1228 mask=0x06; /* BigMPCap */
1229 } else if (nodes == 2) {
1230 mask=0x02; /* MPCap */
1232 mask=0x00; /* Non SMP */
1235 for (node=0; node<nodes; node++) {
1236 if ((pci_read_config32(NODE_MC(node), 0xe8) & mask) != mask) {
1245 /* one of our nodes is not mp capable */
1247 print_err("One of the CPUs is not MP capable. Going back to UP\r\n");
1251 static void clear_dead_routes(unsigned nodes)
1255 #if CONFIG_MAX_CPUS>6
1256 if(nodes==8) return;// don't touch (7,7)
1262 for(node = 7; node >= 0; node--) {
1263 for(row = 7; row >= last_row; row--) {
1264 fill_row(node, row, DEFAULT);
1269 static void coherent_ht_finalize(unsigned nodes)
1274 /* set up cpu count and node count and enable Limit
1275 * Config Space Range for all available CPUs.
1276 * Also clear non coherent hypertransport bus range
1277 * registers on Hammer A0 revision.
1280 print_spew("coherent_ht_finalize\r\n");
1281 rev_a0 = is_cpu_rev_a0();
1282 for (node = 0; node < nodes; node++) {
1285 dev = NODE_HT(node);
1287 /* Set the Total CPU and Node count in the system */
1288 val = pci_read_config32(dev, 0x60);
1289 val &= (~0x000F0070);
1290 val |= ((nodes-1)<<16)|((nodes-1)<<4);
1291 pci_write_config32(dev, 0x60, val);
1293 /* Only respond to real cpu pci configuration cycles
1294 * and optimize the HT settings
1296 val=pci_read_config32(dev, 0x68);
1297 val &= ~((HTTC_BUF_REL_PRI_MASK << HTTC_BUF_REL_PRI_SHIFT) |
1298 (HTTC_MED_PRI_BYP_CNT_MASK << HTTC_MED_PRI_BYP_CNT_SHIFT) |
1299 (HTTC_HI_PRI_BYP_CNT_MASK << HTTC_HI_PRI_BYP_CNT_SHIFT));
1300 val |= HTTC_LIMIT_CLDT_CFG |
1301 (HTTC_BUF_REL_PRI_8 << HTTC_BUF_REL_PRI_SHIFT) |
1303 (3 << HTTC_MED_PRI_BYP_CNT_SHIFT) |
1304 (3 << HTTC_HI_PRI_BYP_CNT_SHIFT);
1305 pci_write_config32(dev, 0x68, val);
1308 print_spew("shit it is an old cup\n");
1309 pci_write_config32(dev, 0x94, 0);
1310 pci_write_config32(dev, 0xb4, 0);
1311 pci_write_config32(dev, 0xd4, 0);
1315 print_spew("done\r\n");
1318 static int apply_cpu_errata_fixes(unsigned nodes, int needs_reset)
1321 for(node = 0; node < nodes; node++) {
1324 dev = NODE_MC(node);
1325 if (is_cpu_pre_c0()) {
1328 * Limit the number of downstream posted requests to 1
1330 cmd = pci_read_config32(dev, 0x70);
1331 if ((cmd & (3 << 0)) != 2) {
1334 pci_write_config32(dev, 0x70, cmd );
1337 cmd = pci_read_config32(dev, 0x7c);
1338 if ((cmd & (3 << 4)) != 0) {
1341 pci_write_config32(dev, 0x7c, cmd );
1344 /* Clock Power/Timing Low */
1345 cmd = pci_read_config32(dev, 0xd4);
1346 if (cmd != 0x000D0001) {
1348 pci_write_config32(dev, 0xd4, cmd);
1349 needs_reset = 1; /* Needed? */
1356 * Set Clk Ramp Hystersis to 7
1357 * Clock Power/Timing Low
1359 cmd_ref = 0x04e20707; /* Registered */
1360 cmd = pci_read_config32(dev, 0xd4);
1361 if(cmd != cmd_ref) {
1362 pci_write_config32(dev, 0xd4, cmd_ref );
1363 needs_reset = 1; /* Needed? */
1370 static int optimize_link_read_pointers(unsigned nodes, int needs_reset)
1373 for(node = 0; node < nodes; node = node + 1) {
1374 device_t f0_dev, f3_dev;
1375 uint32_t cmd_ref, cmd;
1377 f0_dev = NODE_HT(node);
1378 f3_dev = NODE_MC(node);
1379 cmd_ref = cmd = pci_read_config32(f3_dev, 0xdc);
1380 for(link = 0; link < 3; link = link + 1) {
1383 /* This works on an Athlon64 because unimplemented links return 0 */
1384 reg = 0x98 + (link * 0x20);
1385 link_type = pci_read_config32(f0_dev, reg);
1386 if (link_type & LinkConnected) {
1387 cmd &= 0xff << (link *8);
1388 /* FIXME this assumes the device on the other
1389 * side is an AMD device
1391 cmd |= 0x25 << (link *8);
1394 if (cmd != cmd_ref) {
1395 pci_write_config32(f3_dev, 0xdc, cmd);
1402 static int setup_coherent_ht_domain(void)
1404 struct setup_smp_result result;
1406 result.needs_reset = 0;
1408 enable_bsp_routing();
1410 #if CONFIG_MAX_CPUS > 1
1411 result = setup_smp();
1413 result.nodes = verify_mp_capabilities(result.nodes);
1414 clear_dead_routes(result.nodes);
1415 if (result.nodes == 1) {
1416 setup_uniprocessor();
1418 coherent_ht_finalize(result.nodes);
1419 result.needs_reset = apply_cpu_errata_fixes(result.nodes, result.needs_reset);
1420 result.needs_reset = optimize_link_read_pointers(result.nodes, result.needs_reset);
1421 return result.needs_reset;