1 /* coherent hypertransport initialization for AMD64
3 * written by Stefan Reinauer <stepan@openbios.org>
4 * (c) 2003-2004 by SuSE Linux AG
6 * (c) 2004 Tyan Computer
7 * 2004.12 yhlu added support to create routing table dynamically.
8 * it also support 8 ways too. (8 ways ladder or 8 ways crossbar)
10 * This code is licensed under GPL.
14 * This algorithm assumes a grid configuration as follows:
17 * org. : 1x1 2x1 2x2 2x3 2x4
66 #include <device/pci_def.h>
67 #include <device/pci_ids.h>
68 #include <device/hypertransport_def.h>
69 #include "arch/romcc_io.h"
72 #define enable_bsp_routing() enable_routing(0)
74 #define NODE_HT(x) PCI_DEV(0,24+x,0)
75 #define NODE_MP(x) PCI_DEV(0,24+x,1)
76 #define NODE_MC(x) PCI_DEV(0,24+x,3)
78 #define DEFAULT 0x00010101 /* default row entry */
83 #ifndef CROSS_BAR_47_56
84 #define CROSS_BAR_47_56 0
87 #ifndef TRY_HIGH_FIRST
88 #define TRY_HIGH_FIRST 0
92 static inline void print_linkn (const char *strval, uint8_t byteval)
95 print_debug(strval); print_debug_hex8(byteval); print_debug("\r\n");
99 static void disable_probes(void)
101 /* disable read/write/fill probes for uniprocessor setup
102 * they don't make sense if only one cpu is available
105 /* Hypetransport Transaction Control Register
107 * [ 0: 0] Disable read byte probe
109 * 1 = Probes not issued
110 * [ 1: 1] Disable Read Doubleword probe
112 * 1 = Probes not issued
113 * [ 2: 2] Disable write byte probes
115 * 1 = Probes not issued
116 * [ 3: 3] Disable Write Doubleword Probes
118 * 1 = Probes not issued.
119 * [10:10] Disable Fill Probe
120 * 0 = Probes issued for cache fills
121 * 1 = Probes not issued for cache fills.
126 print_spew("Disabling read/write/fill probes for UP... ");
128 val=pci_read_config32(NODE_HT(0), 0x68);
129 val |= (1<<10)|(1<<9)|(1<<8)|(1<<4)|(1<<3)|(1<<2)|(1<<1)|(1 << 0);
130 pci_write_config32(NODE_HT(0), 0x68, val);
132 print_spew("done.\r\n");
136 #ifndef ENABLE_APIC_EXT_ID
137 #define ENABLE_APIC_EXT_ID 0
140 static void enable_apic_ext_id(u8 node)
142 #if ENABLE_APIC_EXT_ID==1
143 #warning "FIXME Is the right place to enable apic ext id here?"
147 val = pci_read_config32(NODE_HT(node), 0x68);
148 val |= HTTC_APIC_EXT_ID | HTTC_APIC_EXT_BRD_CST ;
149 pci_write_config32(NODE_HT(node), 0x68, val);
153 static void enable_routing(u8 node)
157 /* HT Initialization Control Register
159 * [ 0: 0] Routing Table Disable
160 * 0 = Packets are routed according to routing tables
161 * 1 = Packets are routed according to the default link field
162 * [ 1: 1] Request Disable (BSP should clear this)
163 * 0 = Request packets may be generated
164 * 1 = Request packets may not be generated.
165 * [ 3: 2] Default Link (Read-only)
169 * 11 = CPU on same node
171 * - Scratch bit cleared by a cold reset
172 * [ 5: 5] BIOS Reset Detect
173 * - Scratch bit cleared by a cold reset
174 * [ 6: 6] INIT Detect
175 * - Scratch bit cleared by a warm or cold reset not by an INIT
179 /* Enable routing table */
180 print_spew("Enabling routing table for node ");
181 print_spew_hex8(node);
183 val=pci_read_config32(NODE_HT(node), 0x6c);
184 val &= ~((1<<1)|(1<<0));
185 pci_write_config32(NODE_HT(node), 0x6c, val);
187 print_spew(" done.\r\n");
190 static void fill_row(u8 node, u8 row, u32 value)
192 pci_write_config32(NODE_HT(node), 0x40+(row<<2), value);
195 #if CONFIG_MAX_CPUS > 1
196 static u8 link_to_register(int ldt)
199 * [ 0: 3] Request Route
200 * [0] Route to this node
201 * [1] Route to Link 0
202 * [2] Route to Link 1
203 * [3] Route to Link 2
206 if (ldt&0x08) return 0x40;
207 if (ldt&0x04) return 0x20;
208 if (ldt&0x02) return 0x00;
210 /* we should never get here */
211 print_spew("Unknown Link\n");
215 static u32 get_row(u8 node, u8 row)
217 return pci_read_config32(NODE_HT(node), 0x40+(row<<2));
220 static int link_connection(u8 src, u8 dest)
222 return get_row(src, dest) & 0x0f;
225 static void rename_temp_node(u8 node)
229 print_spew("Renaming current temporary node to ");
230 print_spew_hex8(node);
232 val=pci_read_config32(NODE_HT(7), 0x60);
233 val &= (~7); /* clear low bits. */
234 val |= node; /* new node */
235 pci_write_config32(NODE_HT(7), 0x60, val);
237 print_spew(" done.\r\n");
240 static int check_connection(u8 dest)
242 /* See if we have a valid connection to dest */
245 /* Verify that the coherent hypertransport link is
246 * established and actually working by reading the
247 * remode node's vendor/device id
249 val = pci_read_config32(NODE_HT(dest),0);
250 if(val != 0x11001022)
256 static unsigned read_freq_cap(device_t dev, unsigned pos)
258 /* Handle bugs in valid hypertransport frequency reporting */
262 freq_cap = pci_read_config16(dev, pos);
263 freq_cap &= ~(1 << HT_FREQ_VENDOR); /* Ignore Vendor HT frequencies */
265 id = pci_read_config32(dev, 0);
267 /* AMD 8131 Errata 48 */
268 if (id == (PCI_VENDOR_ID_AMD | (PCI_DEVICE_ID_AMD_8131_PCIX << 16))) {
269 freq_cap &= ~(1 << HT_FREQ_800Mhz);
271 /* AMD 8151 Errata 23 */
272 if (id == (PCI_VENDOR_ID_AMD | (PCI_DEVICE_ID_AMD_8151_SYSCTRL << 16))) {
273 freq_cap &= ~(1 << HT_FREQ_800Mhz);
275 /* AMD K8 Unsupported 1Ghz? */
276 if (id == (PCI_VENDOR_ID_AMD | (0x1100 << 16))) {
277 freq_cap &= ~(1 << HT_FREQ_1000Mhz);
282 static int optimize_connection(device_t node1, uint8_t link1, device_t node2, uint8_t link2)
284 static const uint8_t link_width_to_pow2[]= { 3, 4, 0, 5, 1, 2, 0, 0 };
285 static const uint8_t pow2_to_link_width[] = { 0x7, 4, 5, 0, 1, 3 };
286 uint16_t freq_cap1, freq_cap2, freq_cap, freq_mask;
287 uint8_t width_cap1, width_cap2, width_cap, width, old_width, ln_width1, ln_width2;
288 uint8_t freq, old_freq;
290 /* Set link width and frequency */
292 /* Initially assume everything is already optimized and I don't need a reset */
295 /* Get the frequency capabilities */
296 freq_cap1 = read_freq_cap(node1, link1 + PCI_HT_CAP_HOST_FREQ_CAP);
297 freq_cap2 = read_freq_cap(node2, link2 + PCI_HT_CAP_HOST_FREQ_CAP);
299 /* Calculate the highest possible frequency */
300 freq = log2(freq_cap1 & freq_cap2);
302 /* See if I am changing the link freqency */
303 old_freq = pci_read_config8(node1, link1 + PCI_HT_CAP_HOST_FREQ);
304 needs_reset |= old_freq != freq;
305 old_freq = pci_read_config8(node2, link2 + PCI_HT_CAP_HOST_FREQ);
306 needs_reset |= old_freq != freq;
308 /* Set the Calulcated link frequency */
309 pci_write_config8(node1, link1 + PCI_HT_CAP_HOST_FREQ, freq);
310 pci_write_config8(node2, link2 + PCI_HT_CAP_HOST_FREQ, freq);
312 /* Get the width capabilities */
313 width_cap1 = pci_read_config8(node1, link1 + PCI_HT_CAP_HOST_WIDTH);
314 width_cap2 = pci_read_config8(node2, link2 + PCI_HT_CAP_HOST_WIDTH);
316 /* Calculate node1's input width */
317 ln_width1 = link_width_to_pow2[width_cap1 & 7];
318 ln_width2 = link_width_to_pow2[(width_cap2 >> 4) & 7];
319 if (ln_width1 > ln_width2) {
320 ln_width1 = ln_width2;
322 width = pow2_to_link_width[ln_width1];
323 /* Calculate node1's output width */
324 ln_width1 = link_width_to_pow2[(width_cap1 >> 4) & 7];
325 ln_width2 = link_width_to_pow2[width_cap2 & 7];
326 if (ln_width1 > ln_width2) {
327 ln_width1 = ln_width2;
329 width |= pow2_to_link_width[ln_width1] << 4;
331 /* See if I am changing node1's width */
332 old_width = pci_read_config8(node1, link1 + PCI_HT_CAP_HOST_WIDTH + 1);
333 needs_reset |= old_width != width;
335 /* Set node1's widths */
336 pci_write_config8(node1, link1 + PCI_HT_CAP_HOST_WIDTH + 1, width);
338 /* Calculate node2's width */
339 width = ((width & 0x70) >> 4) | ((width & 0x7) << 4);
341 /* See if I am changing node2's width */
342 old_width = pci_read_config8(node2, link2 + PCI_HT_CAP_HOST_WIDTH + 1);
343 needs_reset |= old_width != width;
345 /* Set node2's widths */
346 pci_write_config8(node2, link2 + PCI_HT_CAP_HOST_WIDTH + 1, width);
351 static void setup_row_local(u8 source, u8 row) /* source will be 7 when it is for temp use*/
356 for(linkn = 0; linkn<3; linkn++) {
359 regpos = 0x98 + 0x20 * linkn;
360 reg = pci_read_config32(NODE_HT(source), regpos);
361 if ((reg & 0x17) != 3) continue; /* it is not conherent or not connected*/
366 fill_row(source,row, val);
369 static void setup_row_direct_x(u8 temp, u8 source, u8 dest, u8 linkn)
374 val |= 1<<(linkn+1+8); /*for direct connect response route should equal to request table*/
376 if(((source &1)!=(dest &1))
378 && (source<4) && (dest<4)
383 /*for CROSS_BAR_47_56 47, 74, 56, 65 should be here too*/
384 val_s = get_row(temp, source);
385 val |= ((val_s>>16) - (1<<(linkn+1)))<<16;
388 fill_row(temp,dest, val );
391 static void setup_row_direct(u8 source, u8 dest, u8 linkn){
392 setup_row_direct_x(source, source, dest, linkn);
395 static void setup_remote_row_direct(u8 source, u8 dest, u8 linkn){
396 setup_row_direct_x(7, source, dest, linkn);
399 static uint8_t get_linkn_first(uint8_t byte)
401 if(byte & 0x02) { byte = 0; }
402 else if(byte & 0x04) { byte = 1; }
403 else if(byte & 0x08) { byte = 2; }
407 static uint8_t get_linkn_last(uint8_t byte)
409 if(byte & 0x02) { byte &= 0x0f; byte |= 0x00; }
410 if(byte & 0x04) { byte &= 0x0f; byte |= 0x10; }
411 if(byte & 0x08) { byte &= 0x0f; byte |= 0x20; }
415 static uint8_t get_linkn_last_count(uint8_t byte)
418 if(byte & 0x02) { byte &= 0xcf; byte |= 0x00; byte+=0x40; }
419 if(byte & 0x04) { byte &= 0xcf; byte |= 0x10; byte+=0x40; }
420 if(byte & 0x08) { byte &= 0xcf; byte |= 0x20; byte+=0x40; }
424 static void setup_temp_row(u8 source, u8 dest)
426 /* copy val from (source, dest) to (source,7) */
427 fill_row(source,7,get_row(source,dest));
430 static void clear_temp_row(u8 source)
432 fill_row(source, 7, DEFAULT);
435 static void setup_remote_node(u8 node)
437 static const uint8_t pci_reg[] = {
438 0x44, 0x4c, 0x54, 0x5c, 0x64, 0x6c, 0x74, 0x7c,
439 0x40, 0x48, 0x50, 0x58, 0x60, 0x68, 0x70, 0x78,
440 0x84, 0x8c, 0x94, 0x9c, 0xa4, 0xac, 0xb4, 0xbc,
441 0x80, 0x88, 0x90, 0x98, 0xa0, 0xa8, 0xb0, 0xb8,
442 0xc4, 0xcc, 0xd4, 0xdc,
443 0xc0, 0xc8, 0xd0, 0xd8,
444 0xe0, 0xe4, 0xe8, 0xec,
448 print_spew("setup_remote_node: ");
450 /* copy the default resource map from node 0 */
451 for(i = 0; i < sizeof(pci_reg)/sizeof(pci_reg[0]); i++) {
455 value = pci_read_config32(NODE_MP(0), reg);
456 pci_write_config32(NODE_MP(7), reg, value);
459 print_spew("done\r\n");
462 #endif /* CONFIG_MAX_CPUS > 1*/
465 #if CONFIG_MAX_CPUS > 2
467 static void setup_row_indirect_x(u8 temp, u8 source, u8 dest, u8 gateway)
469 static void setup_row_indirect_x(u8 temp, u8 source, u8 dest, u8 gateway, u8 diff)
472 /*for indirect connection, we need to compute the val from val_s(source, source), and val_g(source, gateway) */
478 val_s = get_row(temp, source);
479 val = get_row(temp, gateway);
486 diff = ((source&1)!=(dest &1));
489 if(diff && (val_s!=(val&0xff)) ) { /* use another connect as response*/
491 #if CONFIG_MAX_CPUS > 4
493 /* Some node have two links left
494 * don't worry we only have (2, (3 as source need to handle
497 byte = get_linkn_last_count(byte);
498 if((byte>>2)>1) { /* make sure not the corner*/
500 val_s-=link_connection(temp, source-2); /* -down*/
502 val_s-=link_connection(temp, source+2); /* -up*/
510 if(diff) { /* cross rung?*/
514 val_s = get_row(temp, source);
515 val |= ((val_s>>16) - link_connection(temp, gateway))<<16;
518 fill_row(temp, dest, val);
523 static void setup_row_indirect(u8 source, u8 dest, u8 gateway)
525 setup_row_indirect_x(source, source, dest, gateway);
528 static void setup_row_indirect(u8 source, u8 dest, u8 gateway, u8 diff)
530 setup_row_indirect_x(source, source, dest, gateway, diff);
534 static void setup_row_indirect_group(const u8 *conn, int num)
539 for(i=0; i<num; i+=3) {
540 setup_row_indirect(conn[i], conn[i+1],conn[i+2]);
542 for(i=0; i<num; i+=4) {
543 setup_row_indirect(conn[i], conn[i+1],conn[i+2], conn[i+3]);
550 static void setup_remote_row_indirect(u8 source, u8 dest, u8 gateway)
552 setup_row_indirect_x(7, source, dest, gateway);
555 static void setup_remote_row_indirect(u8 source, u8 dest, u8 gateway, u8 diff)
557 setup_row_indirect_x(7, source, dest, gateway, diff);
561 static void setup_remote_row_indirect_group(const u8 *conn, int num)
566 for(i=0; i<num; i+=3) {
567 setup_remote_row_indirect(conn[i], conn[i+1],conn[i+2]);
569 for(i=0; i<num; i+=4) {
570 setup_remote_row_indirect(conn[i], conn[i+1],conn[i+2], conn[i+3]);
575 #endif /*CONFIG_MAX_CPUS > 2*/
578 static void setup_uniprocessor(void)
580 print_spew("Enabling UP settings\r\n");
584 struct setup_smp_result {
589 #if CONFIG_MAX_CPUS > 2
590 static int optimize_connection_group(const u8 *opt_conn, int num) {
593 for(i=0; i<num; i+=2) {
594 needs_reset = optimize_connection(
595 NODE_HT(opt_conn[i]), 0x80 + link_to_register(link_connection(opt_conn[i],opt_conn[i+1])),
596 NODE_HT(opt_conn[i+1]), 0x80 + link_to_register(link_connection(opt_conn[i+1],opt_conn[i])) );
602 #if CONFIG_MAX_CPUS > 1
603 static struct setup_smp_result setup_smp2(void)
605 struct setup_smp_result result;
609 result.needs_reset = 0;
611 setup_row_local(0, 0); /* it will update the broadcast RT*/
614 byte = (val>>16) & 0xfe;
615 if(byte<0x2) { /* no coherent connection so get out.*/
620 /* Setup and check a temporary connection to node 1 */
621 #if TRY_HIGH_FIRST == 1
622 byte = get_linkn_last(byte); /* Max Link to node1 */
624 byte = get_linkn_first(byte); /*Min Link to node1 --- according to AMD*/
626 print_linkn("(0,1) link=", byte);
627 setup_row_direct(0,1, byte);
628 setup_temp_row(0, 1);
630 if (!check_connection(7)) {
631 print_spew("No connection to Node 1.\r\n");
636 /* We found 2 nodes so far */
637 val = pci_read_config32(NODE_HT(7), 0x6c);
638 byte = (val>>2) & 0x3; /*get default link on node7 to node0*/
639 print_linkn("(1,0) link=", byte);
640 setup_row_local(7,1);
641 setup_remote_row_direct(1, 0, byte);
643 #if CONFIG_MAX_CPUS > 4
645 byte = (val>>16) & 0xfe;
646 byte = get_linkn_last_count(byte);
647 if((byte>>2)==3) { /* Oh! we need to treat it as node2. So use another link*/
649 byte = (val>>16) & 0xfe;
650 #if TRY_HIGH_FIRST == 1
651 byte = get_linkn_first(byte); /* Min link to Node1 */
653 byte = get_linkn_last(byte); /* Max link to Node1*/
655 print_linkn("-->(0,1) link=", byte);
656 setup_row_direct(0,1, byte);
657 setup_temp_row(0, 1);
659 if (!check_connection(7)) {
660 print_spew("No connection to Node 1.\r\n");
665 /* We found 2 nodes so far */
666 val = pci_read_config32(NODE_HT(7), 0x6c);
667 byte = (val>>2) & 0x3; /* get default link on node7 to node0*/
668 print_linkn("-->(1,0) link=", byte);
669 setup_row_local(7,1);
670 setup_remote_row_direct(1, 0, byte);
674 setup_remote_node(1); /* Setup the regs on the remote node */
675 rename_temp_node(1); /* Rename Node 7 to Node 1 */
676 enable_routing(1); /* Enable routing on Node 1 */
678 /*don't need and it is done by clear_dead_links */
682 result.needs_reset = optimize_connection(
683 NODE_HT(0), 0x80 + link_to_register(link_connection(0,1)),
684 NODE_HT(1), 0x80 + link_to_register(link_connection(1,0)) );
689 #endif /*CONFIG_MAX_CPUS > 1 */
691 #if CONFIG_MAX_CPUS > 2
693 static struct setup_smp_result setup_smp4(int needs_reset)
695 struct setup_smp_result result;
700 result.needs_reset = needs_reset;
702 /* Setup and check temporary connection from Node 0 to Node 2 */
704 byte = ((val>>16) & 0xfe) - link_connection(0,1);
705 byte = get_linkn_last_count(byte);
707 if((byte>>2)==0) { /* We should have two coherent for 4p and above*/
712 byte &= 3; /* bit [3,2] is count-1*/
713 print_linkn("(0,2) link=", byte);
714 setup_row_direct(0, 2, byte); /*(0,2) direct link done*/
715 setup_temp_row(0, 2);
717 if (!check_connection(7) ) {
718 print_spew("No connection to Node 2.\r\n");
723 /* We found 3 nodes so far. Now setup a temporary
724 * connection from node 0 to node 3 via node 1
726 setup_temp_row(0,1); /* temp. link between nodes 0 and 1 */
727 /* here should setup_row_direct(1,3) at first, before that we should find the link in node 1 to 3*/
729 byte = ((val>>16) & 0xfe) - link_connection(1,0);
730 byte = get_linkn_first(byte);
731 print_linkn("(1,3) link=", byte);
732 setup_row_direct(1,3,byte); /* (1, 3) direct link done*/
733 setup_temp_row(1,3); /* temp. link between nodes 1 and 3 */
735 if (!check_connection(7)) {
736 print_spew("No connection to Node 3.\r\n");
741 /* We found 4 nodes so far. Now setup all nodes for 4p */
743 static const u8 conn4_1[] = {
748 static const u8 conn4_1[] = {
754 setup_row_indirect_group(conn4_1, sizeof(conn4_1)/sizeof(conn4_1[0]));
758 val = pci_read_config32(NODE_HT(7), 0x6c);
759 byte = (val>>2) & 0x3; /* get default link on 7 to 0*/
760 print_linkn("(2,0) link=", byte);
762 setup_row_local(7,2);
763 setup_remote_row_direct(2, 0, byte);
764 setup_remote_node(2); /* Setup the regs on the remote node */
766 static const u8 conn4_2[] = {
770 static const u8 conn4_2[] = {
774 setup_remote_row_indirect_group(conn4_2, sizeof(conn4_2)/sizeof(conn4_2[0]));
776 rename_temp_node(2); /* Rename Node 7 to Node 2 */
777 enable_routing(2); /* Enable routing on Node 2 */
782 val = pci_read_config32(NODE_HT(7), 0x6c);
783 byte = (val>>2) & 0x3; /* get default link on 7 to 1*/
784 print_linkn("(3,1) link=", byte);
786 setup_row_local(7,3);
787 setup_remote_row_direct(3, 1, byte);
788 setup_remote_node(3); /* Setup the regs on the remote node */
791 static const u8 conn4_3[] = {
795 static const u8 conn4_3[] = {
799 setup_remote_row_indirect_group(conn4_3, sizeof(conn4_3)/sizeof(conn4_3[0]));
801 /* We need to init link between 2, and 3 direct link */
803 byte = ((val>>16) & 0xfe) - link_connection(2,0);
804 byte = get_linkn_last_count(byte);
805 print_linkn("(2,3) link=", byte & 3);
807 setup_row_direct(2,3, byte & 0x3);
810 check_connection(7); /* to 3*/
812 #if CONFIG_MAX_CPUS > 4
813 /* We need to find out which link is to node3 */
815 if((byte>>2)==2) { /* one to node3, one to node0, one to node4*/
817 if((val>>16) == 1) { /* that link is to node4, because via node3 it has been set, recompute it*/
819 byte = ((val>>16) & 0xfe) - link_connection(2,0);
820 byte = get_linkn_first(byte);
821 print_linkn("-->(2,3) link=", byte);
822 setup_row_direct(2,3,byte);
824 check_connection(7); /* to 3*/
829 val = pci_read_config32(NODE_HT(7), 0x6c);
830 byte = (val>>2) & 0x3; /* get default link on 7 to 2*/
831 print_linkn("(3,2) link=", byte);
832 setup_remote_row_direct(3,2, byte);
834 /* ready to enable RT for Node 3 */
836 enable_routing(3); /* enable routing on node 3 (temp.) */
839 /*We need to do sth to reverse work for setup_temp_row (0,1) (1,3) */
840 /* it will be done by clear_dead_links */
845 /* optimize physical connections - by LYH */
846 static const u8 opt_conn4[] = {
852 result.needs_reset = optimize_connection_group(opt_conn4, sizeof(opt_conn4)/sizeof(opt_conn4[0]));
858 #endif /* CONFIG_MAX_CPUS > 2 */
860 #if CONFIG_MAX_CPUS > 4
862 static struct setup_smp_result setup_smp6(int needs_reset)
864 struct setup_smp_result result;
869 result.needs_reset = needs_reset;
871 /* Setup and check temporary connection from Node 0 to Node 4 via 2 */
873 byte = ((val>>16) & 0xfe) - link_connection(2,3) - link_connection(2,0);
874 byte = get_linkn_last_count(byte);
876 if((byte>>2)==0) { /* We should have two coherent link on node 2 for 6p and above*/
880 byte &= 3; /* bit [3,2] is count-2*/
881 print_linkn("(2,4) link=", byte);
882 setup_row_direct(2, 4, byte);
884 /* Setup and check temporary connection from Node 0 to Node 4 through 2*/
885 for(byte=0; byte<4; byte+=2) {
886 setup_temp_row(byte,byte+2);
889 if (!check_connection(7) ) {
890 print_spew("No connection to Node 4.\r\n");
895 /* Setup and check temporary connection from Node 0 to Node 5 through 1, 3*/
897 byte = ((val>>16) & 0xfe) - link_connection(3,2) - link_connection(3,1);
898 byte = get_linkn_last_count(byte);
899 if((byte>>2)==0) { /* We should have two coherent links on node 3 for 6p and above*/
904 byte &= 3; /*bit [3,2] is count-2*/
905 print_linkn("(3,5) link=", byte);
906 setup_row_direct(3, 5, byte);
908 setup_temp_row(0,1); /* temp. link between nodes 0 and 1 */
909 for(byte=0; byte<4; byte+=2) {
910 setup_temp_row(byte+1,byte+3);
913 if (!check_connection(7)) {
914 print_spew("No connection to Node 5.\r\n");
919 /* We found 6 nodes so far. Now setup all nodes for 6p */
920 #warning "FIXME we need to find out the correct gateway for 6p"
921 static const u8 conn6_1[] = {
939 setup_row_indirect_group(conn6_1, sizeof(conn6_1)/sizeof(conn6_1[0]));
941 for(byte=0; byte<4; byte+=2) {
942 setup_temp_row(byte,byte+2);
944 val = pci_read_config32(NODE_HT(7), 0x6c);
945 byte = (val>>2) & 0x3; /*get default link on 7 to 2*/
946 print_linkn("(4,2) link=", byte);
948 setup_row_local(7,4);
949 setup_remote_row_direct(4, 2, byte);
950 setup_remote_node(4); /* Setup the regs on the remote node */
951 /* Set indirect connection to 0, to 3 */
952 static const u8 conn6_2[] = {
965 setup_remote_row_indirect_group(conn6_2, sizeof(conn6_2)/sizeof(conn6_2[0]));
971 for(byte=0; byte<4; byte+=2) {
972 setup_temp_row(byte+1,byte+3);
975 val = pci_read_config32(NODE_HT(7), 0x6c);
976 byte = (val>>2) & 0x3; /* get default link on 7 to 3*/
977 print_linkn("(5,3) link=", byte);
978 setup_row_local(7,5);
979 setup_remote_row_direct(5, 3, byte);
980 setup_remote_node(5); /* Setup the regs on the remote node */
983 /* We need to init link between 4, and 5 direct link */
985 byte = ((val>>16) & 0xfe) - link_connection(4,2);
986 byte = get_linkn_last_count(byte);
987 print_linkn("(4,5) link=", byte & 3);
989 setup_row_direct(4,5, byte & 0x3);
993 check_connection(7); /* to 5*/
995 #if CONFIG_MAX_CPUS > 6
996 /* We need to find out which link is to node5 */
998 if((byte>>2)==2) { /* one to node5, one to node2, one to node6*/
1000 if((val>>16) == 1) { /* that link is to node6, because via node 3 node 5 has been set*/
1002 byte = ((val>>16) & 0xfe) - link_connection(4,2);
1003 byte = get_linkn_first(byte);
1004 print_linkn("-->(4,5) link=", byte);
1005 setup_row_direct(4,5,byte);
1006 setup_temp_row(4,5);
1007 check_connection(7); /* to 5*/
1012 val = pci_read_config32(NODE_HT(7), 0x6c);
1013 byte = (val>>2) & 0x3; /* get default link on 7 to 4*/
1014 print_linkn("(5,4) link=", byte);
1015 setup_remote_row_direct(5,4, byte);
1018 /* Set indirect connection to 0, to 3 for indirect we will use clockwise routing */
1019 static const u8 conn6_3[] = {
1020 #if !CROSS_BAR_47_56
1032 setup_remote_row_indirect_group(conn6_3, sizeof(conn6_3)/sizeof(conn6_3[0]));
1034 /* ready to enable RT for 5 */
1035 rename_temp_node(5);
1036 enable_routing(5); /* enable routing on node 5 (temp.) */
1039 /* We need to do sth about reverse about setup_temp_row (0,1), (2,4), (1, 3), (3,5)
1040 * It will be done by clear_dead_links
1042 for(byte=0; byte<4; byte++) {
1043 clear_temp_row(byte);
1047 /* optimize physical connections - by LYH */
1048 static const uint8_t opt_conn6[] ={
1051 #if !CROSS_BAR_47_56
1055 result.needs_reset = optimize_connection_group(opt_conn6, sizeof(opt_conn6)/sizeof(opt_conn6[0]));
1061 #endif /* CONFIG_MAX_CPUS > 4 */
1063 #if CONFIG_MAX_CPUS > 6
1065 static struct setup_smp_result setup_smp8(int needs_reset)
1067 struct setup_smp_result result;
1072 result.needs_reset = needs_reset;
1074 /* Setup and check temporary connection from Node 0 to Node 6 via 2 and 4 to 7 */
1076 #if !CROSS_BAR_47_56
1077 byte = ((val>>16) & 0xfe) - link_connection(4,5) - link_connection(4,2);
1079 byte = ((val>>16) & 0xfe) - link_connection(4,2);
1082 #if TRY_HIGH_FIRST == 1
1083 byte = get_linkn_last_count(byte); /* Max link to 6*/
1084 if((byte>>2)==0) { /* We should have two or three coherent links on node 4 for 8p*/
1088 byte &= 3; /* bit [3,2] is count-1 or 2*/
1090 byte = get_linkn_first(byte); /*Min link to 6*/
1092 print_linkn("(4,6) link=", byte);
1093 setup_row_direct(4, 6, byte);
1095 /* Setup and check temporary connection from Node 0 to Node 6 through 2, and 4*/
1096 for(byte=0; byte<6; byte+=2) {
1097 setup_temp_row(byte,byte+2);
1100 if (!check_connection(7) ) {
1101 print_spew("No connection to Node 6.\r\n");
1105 #if !CROSS_BAR_47_56
1106 /* Setup and check temporary connection from Node 0 to Node 7 through 1, 3, 5*/
1108 byte = ((val>>16) & 0xfe) - link_connection(5,4) - link_connection(5,3);
1109 byte = get_linkn_first(byte);
1110 print_linkn("(5,7) link=", byte);
1111 setup_row_direct(5, 7, byte);
1113 setup_temp_row(0,1); /* temp. link between nodes 0 and 1 */
1114 for(byte=0; byte<6; byte+=2) {
1115 setup_temp_row(byte+1,byte+3);
1119 byte = ((val>>16) & 0xfe) - link_connection(4,2) - link_connection(4,6);
1120 byte = get_linkn_first(byte);
1121 print_linkn("(4,7) link=", byte);
1122 setup_row_direct(4, 7, byte);
1124 /* Setup and check temporary connection from Node 0 to Node 7 through 2, and 4*/
1125 for(byte=0; byte<4; byte+=2) {
1126 setup_temp_row(byte,byte+2);
1128 setup_temp_row(4, 7);
1132 if (!check_connection(7)) {
1133 print_spew("No connection to Node 7.\r\n");
1139 /* We found 8 nodes so far. Now setup all nodes for 8p */
1140 static const u8 conn8_1[] = {
1141 #if !CROSS_BAR_47_56
1163 setup_row_indirect_group(conn8_1,sizeof(conn8_1)/sizeof(conn8_1[0]));
1165 for(byte=0; byte<6; byte+=2) {
1166 setup_temp_row(byte,byte+2);
1168 val = pci_read_config32(NODE_HT(7), 0x6c);
1169 byte = (val>>2) & 0x3; /* get default link on 7 to 4*/
1170 print_linkn("(6,4) link=", byte);
1172 setup_row_local(7,6);
1173 setup_remote_row_direct(6, 4, byte);
1174 setup_remote_node(6); /* Setup the regs on the remote node */
1175 /* Set indirect connection to 0, to 3 */
1176 #warning "FIXME we need to find out the correct gateway for 8p"
1177 static const u8 conn8_2[] = {
1178 #if !CROSS_BAR_47_56
1192 setup_remote_row_indirect_group(conn8_2, sizeof(conn8_2)/sizeof(conn8_2[0]));
1194 rename_temp_node(6);
1197 #if !CROSS_BAR_47_56
1198 setup_temp_row(0,1);
1199 for(byte=0; byte<6; byte+=2) {
1200 setup_temp_row(byte+1,byte+3);
1203 val = pci_read_config32(NODE_HT(7), 0x6c);
1204 byte = (val>>2) & 0x3; /* get default link on 7 to 5*/
1205 print_linkn("(7,5) link=", byte);
1206 setup_row_local(7,7);
1207 setup_remote_row_direct(7, 5, byte);
1210 for(byte=0; byte<4; byte+=2) {
1211 setup_temp_row(byte,byte+2);
1213 setup_temp_row(4,7);
1214 val = pci_read_config32(NODE_HT(7), 0x6c);
1215 byte = (val>>2) & 0x3; /* get default link on 7 to 4*/
1216 print_linkn("(7,4) link=", byte);
1217 setup_row_local(7,7);
1218 setup_remote_row_direct(7, 4, byte);
1219 /* till now 4-7, 7-4 done. */
1221 setup_remote_node(7); /* Setup the regs on the remote node */
1224 /* here init 5, 7 */
1225 /* Setup and check temporary connection from Node 0 to Node 5 through 1, 3, 5*/
1227 byte = ((val>>16) & 0xfe) - link_connection(5,3);
1228 byte = get_linkn_last(byte);
1229 print_linkn("(5,7) link=", byte);
1230 setup_row_direct(5, 7, byte);
1232 setup_temp_row(0,1); /* temp. link between nodes 0 and 1 */
1233 for(byte=0; byte<6; byte+=2) {
1234 setup_temp_row(byte+1,byte+3);
1237 if (!check_connection(7)) {
1238 /* We need to recompute link to 7 */
1240 byte = ((val>>16) & 0xfe) - link_connection(5,3);
1241 byte = get_linkn_first(byte);
1243 print_linkn("-->(5,7) link=", byte);
1244 setup_row_direct(5, 7, byte);
1246 setup_temp_row(0,1); /* temp. link between nodes 0 and 1 */
1247 for(byte=0; byte<6; byte+=2) {
1248 setup_temp_row(byte+1,byte+3);
1251 setup_temp_row(5,7);
1253 check_connection(7);
1255 val = pci_read_config32(NODE_HT(7), 0x6c);
1256 byte = (val>>2) & 0x3; /* get default link on 7 to 5*/
1257 print_linkn("(7,5) link=", byte);
1258 setup_remote_row_direct(7, 5, byte);
1259 /*Till now 57, 75 done */
1261 /* init init 5, 6 */
1263 byte = ((val>>16) & 0xfe) - link_connection(5,3) - link_connection(5,7);
1264 byte = get_linkn_first(byte);
1265 print_linkn("(5,6) link=", byte);
1266 setup_row_direct(5, 6, byte);
1270 byte = ((val>>16) & 0xfe) - link_connection(6,4);
1271 byte = get_linkn_last(byte);
1272 print_linkn("(6,7) link=", byte);
1273 setup_row_direct(6, 7, byte);
1275 for(byte=0; byte<6; byte+=2) {
1276 setup_temp_row(byte,byte+2);
1278 setup_temp_row(6,7);
1280 if (!check_connection(7)) {
1281 /* We need to recompute link to 7 */
1283 byte = ((val>>16) & 0xfe) - link_connection(6,4);
1284 byte = get_linkn_first(byte);
1285 print_linkn("-->(6,7) link=", byte);
1287 setup_row_direct(6, 7, byte);
1289 for(byte=0; byte<6; byte+=2) {
1290 setup_temp_row(byte,byte+2); /* temp. link between nodes 0 and 2 */
1293 setup_temp_row(6,7);
1294 check_connection(7);
1296 val = pci_read_config32(NODE_HT(7), 0x6c);
1297 byte = (val>>2) & 0x3; /* get default link on 7 to 6*/
1298 print_linkn("(7,6) link=", byte);
1300 setup_remote_row_direct(7, 6, byte);
1301 /* Till now 67, 76 done*/
1305 byte = ((val>>16) & 0xfe) - link_connection(6,4) - link_connection(6,7);
1306 byte = get_linkn_first(byte);
1307 print_linkn("(6,5) link=", byte);
1308 setup_row_direct(6, 5, byte);
1312 #if !CROSS_BAR_47_56
1313 /* We need to init link between 6, and 7 direct link */
1315 byte = ((val>>16) & 0xfe) - link_connection(6,4);
1316 byte = get_linkn_first(byte);
1317 print_linkn("(6,7) link=", byte);
1318 setup_row_direct(6,7, byte);
1321 byte = ((val>>16) & 0xfe) - link_connection(7,5);
1322 byte = get_linkn_first(byte);
1323 print_linkn("(7,6) link=", byte);
1324 setup_row_direct(7,6, byte);
1327 /* Set indirect connection to 0, to 3 for indirect we will use clockwise routing */
1328 static const u8 conn8_3[] = {
1329 #if !CROSS_BAR_47_56
1330 0, 7, 1, /* restore it*/
1342 0, 7, 2, 0, /* restore it*/
1359 setup_row_indirect_group(conn8_3, sizeof(conn8_3)/sizeof(conn8_3[0]));
1361 /* ready to enable RT for Node 7 */
1362 enable_routing(7); /* enable routing on node 7 (temp.) */
1365 static const uint8_t opt_conn8[] ={
1374 /* optimize physical connections - by LYH */
1375 result.needs_reset = optimize_connection_group(opt_conn8, sizeof(opt_conn8)/sizeof(opt_conn8[0]));
1380 #endif /* CONFIG_MAX_CPUS > 6 */
1383 #if CONFIG_MAX_CPUS > 1
1385 static struct setup_smp_result setup_smp(void)
1387 struct setup_smp_result result;
1389 print_spew("Enabling SMP settings\r\n");
1391 result = setup_smp2();
1392 #if CONFIG_MAX_CPUS > 2
1393 result = setup_smp4(result.needs_reset);
1396 #if CONFIG_MAX_CPUS > 4
1397 result = setup_smp6(result.needs_reset);
1400 #if CONFIG_MAX_CPUS > 6
1401 result = setup_smp6(result.needs_reset);
1404 print_debug_hex8(result.nodes);
1405 print_debug(" nodes initialized.\r\n");
1411 static unsigned verify_mp_capabilities(unsigned nodes)
1413 unsigned node, mask;
1415 mask = 0x06; /* BigMPCap */
1417 for (node=0; node<nodes; node++) {
1418 mask &= pci_read_config32(NODE_MC(node), 0xe8);
1422 #if CONFIG_MAX_CPUS > 2
1423 case 0x02: /* MPCap */
1425 print_err("Going back to DP\r\n");
1430 case 0x00: /* Non SMP */
1432 print_err("Going back to UP\r\n");
1443 static void clear_dead_routes(unsigned nodes)
1447 #if CONFIG_MAX_CPUS > 6
1448 if(nodes==8) return;/* don't touch (7,7)*/
1454 for(node = 7; node >= 0; node--) {
1455 for(row = 7; row >= last_row; row--) {
1456 fill_row(node, row, DEFAULT);
1460 /* Update the local row */
1461 for( node=0; node<nodes; node++) {
1463 for(row =0; row<nodes; row++) {
1464 val |= get_row(node, row);
1466 fill_row(node, node, (((val & 0xff) | ((val >> 8) & 0xff)) << 16) | 0x0101);
1469 #endif /* CONFIG_MAX_CPUS > 1 */
1471 static void coherent_ht_finalize(unsigned nodes)
1476 /* set up cpu count and node count and enable Limit
1477 * Config Space Range for all available CPUs.
1478 * Also clear non coherent hypertransport bus range
1479 * registers on Hammer A0 revision.
1482 print_spew("coherent_ht_finalize\r\n");
1483 rev_a0 = is_cpu_rev_a0();
1484 for (node = 0; node < nodes; node++) {
1487 dev = NODE_HT(node);
1489 /* Set the Total CPU and Node count in the system */
1490 val = pci_read_config32(dev, 0x60);
1491 val &= (~0x000F0070);
1492 val |= ((nodes-1)<<16)|((nodes-1)<<4);
1493 pci_write_config32(dev, 0x60, val);
1495 /* Only respond to real cpu pci configuration cycles
1496 * and optimize the HT settings
1498 val=pci_read_config32(dev, 0x68);
1499 val &= ~((HTTC_BUF_REL_PRI_MASK << HTTC_BUF_REL_PRI_SHIFT) |
1500 (HTTC_MED_PRI_BYP_CNT_MASK << HTTC_MED_PRI_BYP_CNT_SHIFT) |
1501 (HTTC_HI_PRI_BYP_CNT_MASK << HTTC_HI_PRI_BYP_CNT_SHIFT));
1502 val |= HTTC_LIMIT_CLDT_CFG |
1503 (HTTC_BUF_REL_PRI_8 << HTTC_BUF_REL_PRI_SHIFT) |
1505 (3 << HTTC_MED_PRI_BYP_CNT_SHIFT) |
1506 (3 << HTTC_HI_PRI_BYP_CNT_SHIFT);
1507 pci_write_config32(dev, 0x68, val);
1510 print_spew("shit it is an old cup\n");
1511 pci_write_config32(dev, 0x94, 0);
1512 pci_write_config32(dev, 0xb4, 0);
1513 pci_write_config32(dev, 0xd4, 0);
1517 print_spew("done\r\n");
1520 static int apply_cpu_errata_fixes(unsigned nodes, int needs_reset)
1523 for(node = 0; node < nodes; node++) {
1526 dev = NODE_MC(node);
1527 if (is_cpu_pre_c0()) {
1530 * Limit the number of downstream posted requests to 1
1532 cmd = pci_read_config32(dev, 0x70);
1533 if ((cmd & (3 << 0)) != 2) {
1536 pci_write_config32(dev, 0x70, cmd );
1539 cmd = pci_read_config32(dev, 0x7c);
1540 if ((cmd & (3 << 4)) != 0) {
1543 pci_write_config32(dev, 0x7c, cmd );
1546 /* Clock Power/Timing Low */
1547 cmd = pci_read_config32(dev, 0xd4);
1548 if (cmd != 0x000D0001) {
1550 pci_write_config32(dev, 0xd4, cmd);
1551 needs_reset = 1; /* Needed? */
1558 * Set Clk Ramp Hystersis to 7
1559 * Clock Power/Timing Low
1561 cmd_ref = 0x04e20707; /* Registered */
1562 cmd = pci_read_config32(dev, 0xd4);
1563 if(cmd != cmd_ref) {
1564 pci_write_config32(dev, 0xd4, cmd_ref );
1565 needs_reset = 1; /* Needed? */
1572 static int optimize_link_read_pointers(unsigned nodes, int needs_reset)
1575 for(node = 0; node < nodes; node++) {
1576 device_t f0_dev, f3_dev;
1577 uint32_t cmd_ref, cmd;
1579 f0_dev = NODE_HT(node);
1580 f3_dev = NODE_MC(node);
1581 cmd_ref = cmd = pci_read_config32(f3_dev, 0xdc);
1582 for(link = 0; link < 3; link++) {
1585 /* This works on an Athlon64 because unimplemented links return 0 */
1586 reg = 0x98 + (link * 0x20);
1587 link_type = pci_read_config32(f0_dev, reg);
1588 if ((link_type & 7) == 3) {
1589 cmd &= ~(0xff << (link *8));
1590 cmd |= 0x25 << (link *8);
1593 if (cmd != cmd_ref) {
1594 pci_write_config32(f3_dev, 0xdc, cmd);
1601 static int setup_coherent_ht_domain(void)
1603 struct setup_smp_result result;
1605 enable_bsp_routing();
1607 #if CONFIG_MAX_CPUS > 1
1608 result = setup_smp();
1609 result.nodes = verify_mp_capabilities(result.nodes);
1610 clear_dead_routes(result.nodes);
1613 result.needs_reset = 0;
1616 if (result.nodes == 1) {
1617 setup_uniprocessor();
1619 coherent_ht_finalize(result.nodes);
1620 result.needs_reset = apply_cpu_errata_fixes(result.nodes, result.needs_reset);
1621 result.needs_reset = optimize_link_read_pointers(result.nodes, result.needs_reset);
1622 return result.needs_reset;