1 /* coherent hypertransport initialization for AMD64
3 * written by Stefan Reinauer <stepan@openbios.org>
4 * (c) 2003-2004 by SuSE Linux AG
6 * (c) 2004 Tyan Computer
7 * 2004.12 yhlu added support to create routing table dynamically.
8 * it also support 8 ways too. (8 ways ladder or 8 ways crossbar)
10 * This code is licensed under GPL.
14 * This algorithm assumes a grid configuration as follows:
17 * org. : 1x1 2x1 2x2 2x3 2x4
66 #include <device/pci_def.h>
67 #include <device/pci_ids.h>
68 #include <device/hypertransport_def.h>
69 #include "arch/romcc_io.h"
72 #define enable_bsp_routing() enable_routing(0)
74 #define NODE_HT(x) PCI_DEV(0,24+x,0)
75 #define NODE_MP(x) PCI_DEV(0,24+x,1)
76 #define NODE_MC(x) PCI_DEV(0,24+x,3)
78 #define DEFAULT 0x00010101 /* default row entry */
83 #ifndef CROSS_BAR_47_56
84 #define CROSS_BAR_47_56 0
87 #ifndef TRY_HIGH_FIRST
88 #define TRY_HIGH_FIRST 0
92 static inline void print_linkn (const char *strval, uint8_t byteval)
94 print_debug(strval); print_debug_hex8(byteval); print_debug("\r\n");
97 static void disable_probes(void)
99 /* disable read/write/fill probes for uniprocessor setup
100 * they don't make sense if only one cpu is available
103 /* Hypetransport Transaction Control Register
105 * [ 0: 0] Disable read byte probe
107 * 1 = Probes not issued
108 * [ 1: 1] Disable Read Doubleword probe
110 * 1 = Probes not issued
111 * [ 2: 2] Disable write byte probes
113 * 1 = Probes not issued
114 * [ 3: 3] Disable Write Doubleword Probes
116 * 1 = Probes not issued.
117 * [10:10] Disable Fill Probe
118 * 0 = Probes issued for cache fills
119 * 1 = Probes not issued for cache fills.
124 print_spew("Disabling read/write/fill probes for UP... ");
126 val=pci_read_config32(NODE_HT(0), 0x68);
127 val |= (1<<10)|(1<<9)|(1<<8)|(1<<4)|(1<<3)|(1<<2)|(1<<1)|(1 << 0);
128 pci_write_config32(NODE_HT(0), 0x68, val);
130 print_spew("done.\r\n");
135 #ifndef ENABLE_APIC_EXT_ID
136 #define ENABLE_APIC_EXT_ID 0
139 static void enable_apic_ext_id(u8 node)
141 #if ENABLE_APIC_EXT_ID==1
142 #warning "FIXME Is the right place to enable apic ext id here?"
146 val = pci_read_config32(NODE_HT(node), 0x68);
147 val |= (HTTC_APIC_EXT_SPUR | HTTC_APIC_EXT_ID | HTTC_APIC_EXT_BRD_CST);
148 pci_write_config32(NODE_HT(node), 0x68, val);
154 static void enable_routing(u8 node)
158 /* HT Initialization Control Register
160 * [ 0: 0] Routing Table Disable
161 * 0 = Packets are routed according to routing tables
162 * 1 = Packets are routed according to the default link field
163 * [ 1: 1] Request Disable (BSP should clear this)
164 * 0 = Request packets may be generated
165 * 1 = Request packets may not be generated.
166 * [ 3: 2] Default Link (Read-only)
170 * 11 = CPU on same node
172 * - Scratch bit cleared by a cold reset
173 * [ 5: 5] BIOS Reset Detect
174 * - Scratch bit cleared by a cold reset
175 * [ 6: 6] INIT Detect
176 * - Scratch bit cleared by a warm or cold reset not by an INIT
180 /* Enable routing table */
181 print_spew("Enabling routing table for node ");
182 print_spew_hex8(node);
184 val=pci_read_config32(NODE_HT(node), 0x6c);
185 val &= ~((1<<1)|(1<<0));
186 pci_write_config32(NODE_HT(node), 0x6c, val);
188 print_spew(" done.\r\n");
191 static void fill_row(u8 node, u8 row, u32 value)
193 pci_write_config32(NODE_HT(node), 0x40+(row<<2), value);
196 #if CONFIG_MAX_CPUS > 1
197 static u8 link_to_register(int ldt)
200 * [ 0: 3] Request Route
201 * [0] Route to this node
202 * [1] Route to Link 0
203 * [2] Route to Link 1
204 * [3] Route to Link 2
207 if (ldt&0x08) return 0x40;
208 if (ldt&0x04) return 0x20;
209 if (ldt&0x02) return 0x00;
211 /* we should never get here */
212 print_spew("Unknown Link\n");
216 static u32 get_row(u8 node, u8 row)
218 return pci_read_config32(NODE_HT(node), 0x40+(row<<2));
221 static int link_connection(u8 src, u8 dest)
223 return get_row(src, dest) & 0x0f;
226 static void rename_temp_node(u8 node)
230 print_spew("Renaming current temporary node to ");
231 print_spew_hex8(node);
233 val=pci_read_config32(NODE_HT(7), 0x60);
234 val &= (~7); /* clear low bits. */
235 val |= node; /* new node */
236 pci_write_config32(NODE_HT(7), 0x60, val);
238 print_spew(" done.\r\n");
241 static int check_connection(u8 dest)
243 /* See if we have a valid connection to dest */
246 /* Verify that the coherent hypertransport link is
247 * established and actually working by reading the
248 * remode node's vendor/device id
250 val = pci_read_config32(NODE_HT(dest),0);
251 if(val != 0x11001022)
257 static unsigned read_freq_cap(device_t dev, unsigned pos)
259 /* Handle bugs in valid hypertransport frequency reporting */
263 freq_cap = pci_read_config16(dev, pos);
264 freq_cap &= ~(1 << HT_FREQ_VENDOR); /* Ignore Vendor HT frequencies */
266 id = pci_read_config32(dev, 0);
268 //is it coherent ht link?
269 /* AMD 8131 Errata 48 */
270 if (id == (PCI_VENDOR_ID_AMD | (PCI_DEVICE_ID_AMD_8131_PCIX << 16))) {
271 freq_cap &= ~(1 << HT_FREQ_800Mhz);
273 /* AMD 8151 Errata 23 */
274 if (id == (PCI_VENDOR_ID_AMD | (PCI_DEVICE_ID_AMD_8151_SYSCTRL << 16))) {
275 freq_cap &= ~(1 << HT_FREQ_800Mhz);
278 /* AMD K8 Unsupported 1Ghz? */
279 if (id == (PCI_VENDOR_ID_AMD | (0x1100 << 16))) {
280 freq_cap &= ~(1 << HT_FREQ_1000Mhz);
285 static int optimize_connection(device_t node1, uint8_t link1, device_t node2, uint8_t link2)
287 static const uint8_t link_width_to_pow2[]= { 3, 4, 0, 5, 1, 2, 0, 0 };
288 static const uint8_t pow2_to_link_width[] = { 0x7, 4, 5, 0, 1, 3 };
289 uint16_t freq_cap1, freq_cap2, freq_cap, freq_mask;
290 uint8_t width_cap1, width_cap2, width_cap, width, old_width, ln_width1, ln_width2;
291 uint8_t freq, old_freq;
293 /* Set link width and frequency */
295 /* Initially assume everything is already optimized and I don't need a reset */
298 /* Get the frequency capabilities */
299 freq_cap1 = read_freq_cap(node1, link1 + PCI_HT_CAP_HOST_FREQ_CAP);
300 freq_cap2 = read_freq_cap(node2, link2 + PCI_HT_CAP_HOST_FREQ_CAP);
302 /* Calculate the highest possible frequency */
303 freq = log2(freq_cap1 & freq_cap2);
305 /* See if I am changing the link freqency */
306 old_freq = pci_read_config8(node1, link1 + PCI_HT_CAP_HOST_FREQ);
307 needs_reset |= old_freq != freq;
308 old_freq = pci_read_config8(node2, link2 + PCI_HT_CAP_HOST_FREQ);
309 needs_reset |= old_freq != freq;
311 /* Set the Calulcated link frequency */
312 pci_write_config8(node1, link1 + PCI_HT_CAP_HOST_FREQ, freq);
313 pci_write_config8(node2, link2 + PCI_HT_CAP_HOST_FREQ, freq);
315 /* Get the width capabilities */
316 width_cap1 = pci_read_config8(node1, link1 + PCI_HT_CAP_HOST_WIDTH);
317 width_cap2 = pci_read_config8(node2, link2 + PCI_HT_CAP_HOST_WIDTH);
319 /* Calculate node1's input width */
320 ln_width1 = link_width_to_pow2[width_cap1 & 7];
321 ln_width2 = link_width_to_pow2[(width_cap2 >> 4) & 7];
322 if (ln_width1 > ln_width2) {
323 ln_width1 = ln_width2;
325 width = pow2_to_link_width[ln_width1];
326 /* Calculate node1's output width */
327 ln_width1 = link_width_to_pow2[(width_cap1 >> 4) & 7];
328 ln_width2 = link_width_to_pow2[width_cap2 & 7];
329 if (ln_width1 > ln_width2) {
330 ln_width1 = ln_width2;
332 width |= pow2_to_link_width[ln_width1] << 4;
334 /* See if I am changing node1's width */
335 old_width = pci_read_config8(node1, link1 + PCI_HT_CAP_HOST_WIDTH + 1);
336 needs_reset |= old_width != width;
338 /* Set node1's widths */
339 pci_write_config8(node1, link1 + PCI_HT_CAP_HOST_WIDTH + 1, width);
341 /* Calculate node2's width */
342 width = ((width & 0x70) >> 4) | ((width & 0x7) << 4);
344 /* See if I am changing node2's width */
345 old_width = pci_read_config8(node2, link2 + PCI_HT_CAP_HOST_WIDTH + 1);
346 needs_reset |= old_width != width;
348 /* Set node2's widths */
349 pci_write_config8(node2, link2 + PCI_HT_CAP_HOST_WIDTH + 1, width);
354 static void setup_row_local(u8 source, u8 row) /* source will be 7 when it is for temp use*/
359 for(linkn = 0; linkn<3; linkn++) {
362 regpos = 0x98 + 0x20 * linkn;
363 reg = pci_read_config32(NODE_HT(source), regpos);
364 if ((reg & 0x17) != 3) continue; /* it is not conherent or not connected*/
369 fill_row(source,row, val);
372 static void setup_row_direct_x(u8 temp, u8 source, u8 dest, u8 linkn)
377 val |= 1<<(linkn+1+8); /*for direct connect response route should equal to request table*/
379 if(((source &1)!=(dest &1))
381 && (source<4) && (dest<4)
386 /*for CROSS_BAR_47_56 47, 74, 56, 65 should be here too*/
387 val_s = get_row(temp, source);
388 val |= ((val_s>>16) - (1<<(linkn+1)))<<16;
391 fill_row(temp,dest, val );
394 static void setup_row_direct(u8 source, u8 dest, u8 linkn){
395 setup_row_direct_x(source, source, dest, linkn);
398 static void setup_remote_row_direct(u8 source, u8 dest, u8 linkn){
399 setup_row_direct_x(7, source, dest, linkn);
402 static uint8_t get_linkn_first(uint8_t byte)
404 if(byte & 0x02) { byte = 0; }
405 else if(byte & 0x04) { byte = 1; }
406 else if(byte & 0x08) { byte = 2; }
410 static uint8_t get_linkn_last(uint8_t byte)
412 if(byte & 0x02) { byte &= 0x0f; byte |= 0x00; }
413 if(byte & 0x04) { byte &= 0x0f; byte |= 0x10; }
414 if(byte & 0x08) { byte &= 0x0f; byte |= 0x20; }
418 static uint8_t get_linkn_last_count(uint8_t byte)
421 if(byte & 0x02) { byte &= 0xcf; byte |= 0x00; byte+=0x40; }
422 if(byte & 0x04) { byte &= 0xcf; byte |= 0x10; byte+=0x40; }
423 if(byte & 0x08) { byte &= 0xcf; byte |= 0x20; byte+=0x40; }
427 static void setup_temp_row(u8 source, u8 dest)
429 /* copy val from (source, dest) to (source,7) */
430 fill_row(source,7,get_row(source,dest));
433 static void clear_temp_row(u8 source)
435 fill_row(source, 7, DEFAULT);
438 static void setup_remote_node(u8 node)
440 static const uint8_t pci_reg[] = {
441 0x44, 0x4c, 0x54, 0x5c, 0x64, 0x6c, 0x74, 0x7c,
442 0x40, 0x48, 0x50, 0x58, 0x60, 0x68, 0x70, 0x78,
443 0x84, 0x8c, 0x94, 0x9c, 0xa4, 0xac, 0xb4, 0xbc,
444 0x80, 0x88, 0x90, 0x98, 0xa0, 0xa8, 0xb0, 0xb8,
445 0xc4, 0xcc, 0xd4, 0xdc,
446 0xc0, 0xc8, 0xd0, 0xd8,
447 0xe0, 0xe4, 0xe8, 0xec,
451 print_spew("setup_remote_node: ");
453 /* copy the default resource map from node 0 */
454 for(i = 0; i < sizeof(pci_reg)/sizeof(pci_reg[0]); i++) {
458 value = pci_read_config32(NODE_MP(0), reg);
459 pci_write_config32(NODE_MP(7), reg, value);
462 print_spew("done\r\n");
465 #endif /* CONFIG_MAX_CPUS > 1*/
468 #if CONFIG_MAX_CPUS > 2
470 static void setup_row_indirect_x(u8 temp, u8 source, u8 dest, u8 gateway)
472 static void setup_row_indirect_x(u8 temp, u8 source, u8 dest, u8 gateway, u8 diff)
475 /*for indirect connection, we need to compute the val from val_s(source, source), and val_g(source, gateway) */
481 val_s = get_row(temp, source);
482 val = get_row(temp, gateway);
489 diff = ((source&1)!=(dest &1));
492 if(diff && (val_s!=(val&0xff)) ) { /* use another connect as response*/
494 #if CONFIG_MAX_CPUS > 4
496 /* Some node have two links left
497 * don't worry we only have (2, (3 as source need to handle
500 byte = get_linkn_last_count(byte);
501 if((byte>>2)>1) { /* make sure not the corner*/
503 val_s-=link_connection(temp, source-2); /* -down*/
505 val_s-=link_connection(temp, source+2); /* -up*/
513 if(diff) { /* cross rung?*/
517 val_s = get_row(temp, source);
518 val |= ((val_s>>16) - link_connection(temp, gateway))<<16;
521 fill_row(temp, dest, val);
526 static void setup_row_indirect(u8 source, u8 dest, u8 gateway)
528 setup_row_indirect_x(source, source, dest, gateway);
531 static void setup_row_indirect(u8 source, u8 dest, u8 gateway, u8 diff)
533 setup_row_indirect_x(source, source, dest, gateway, diff);
537 static void setup_row_indirect_group(const u8 *conn, int num)
542 for(i=0; i<num; i+=3) {
543 setup_row_indirect(conn[i], conn[i+1],conn[i+2]);
545 for(i=0; i<num; i+=4) {
546 setup_row_indirect(conn[i], conn[i+1],conn[i+2], conn[i+3]);
553 static void setup_remote_row_indirect(u8 source, u8 dest, u8 gateway)
555 setup_row_indirect_x(7, source, dest, gateway);
558 static void setup_remote_row_indirect(u8 source, u8 dest, u8 gateway, u8 diff)
560 setup_row_indirect_x(7, source, dest, gateway, diff);
564 static void setup_remote_row_indirect_group(const u8 *conn, int num)
569 for(i=0; i<num; i+=3) {
570 setup_remote_row_indirect(conn[i], conn[i+1],conn[i+2]);
572 for(i=0; i<num; i+=4) {
573 setup_remote_row_indirect(conn[i], conn[i+1],conn[i+2], conn[i+3]);
578 #endif /*CONFIG_MAX_CPUS > 2*/
581 static void setup_uniprocessor(void)
583 print_spew("Enabling UP settings\r\n");
587 struct setup_smp_result {
592 #if CONFIG_MAX_CPUS > 2
593 static int optimize_connection_group(const u8 *opt_conn, int num) {
596 for(i=0; i<num; i+=2) {
597 needs_reset = optimize_connection(
598 NODE_HT(opt_conn[i]), 0x80 + link_to_register(link_connection(opt_conn[i],opt_conn[i+1])),
599 NODE_HT(opt_conn[i+1]), 0x80 + link_to_register(link_connection(opt_conn[i+1],opt_conn[i])) );
605 #if CONFIG_MAX_CPUS > 1
606 static struct setup_smp_result setup_smp2(void)
608 struct setup_smp_result result;
612 result.needs_reset = 0;
614 setup_row_local(0, 0); /* it will update the broadcast RT*/
617 byte = (val>>16) & 0xfe;
618 if(byte<0x2) { /* no coherent connection so get out.*/
623 /* Setup and check a temporary connection to node 1 */
624 #if TRY_HIGH_FIRST == 1
625 byte = get_linkn_last(byte); /* Max Link to node1 */
627 byte = get_linkn_first(byte); /*Min Link to node1 --- according to AMD*/
629 print_linkn("(0,1) link=", byte);
630 setup_row_direct(0,1, byte);
631 setup_temp_row(0, 1);
633 if (!check_connection(7)) {
634 print_spew("No connection to Node 1.\r\n");
639 /* We found 2 nodes so far */
640 val = pci_read_config32(NODE_HT(7), 0x6c);
641 byte = (val>>2) & 0x3; /*get default link on node7 to node0*/
642 print_linkn("(1,0) link=", byte);
643 setup_row_local(7,1);
644 setup_remote_row_direct(1, 0, byte);
646 #if CONFIG_MAX_CPUS > 4
648 byte = (val>>16) & 0xfe;
649 byte = get_linkn_last_count(byte);
650 if((byte>>2)==3) { /* Oh! we need to treat it as node2. So use another link*/
652 byte = (val>>16) & 0xfe;
653 #if TRY_HIGH_FIRST == 1
654 byte = get_linkn_first(byte); /* Min link to Node1 */
656 byte = get_linkn_last(byte); /* Max link to Node1*/
658 print_linkn("-->(0,1) link=", byte);
659 setup_row_direct(0,1, byte);
660 setup_temp_row(0, 1);
662 if (!check_connection(7)) {
663 print_spew("No connection to Node 1.\r\n");
668 /* We found 2 nodes so far */
669 val = pci_read_config32(NODE_HT(7), 0x6c);
670 byte = (val>>2) & 0x3; /* get default link on node7 to node0*/
671 print_linkn("-->(1,0) link=", byte);
672 setup_row_local(7,1);
673 setup_remote_row_direct(1, 0, byte);
677 setup_remote_node(1); /* Setup the regs on the remote node */
678 rename_temp_node(1); /* Rename Node 7 to Node 1 */
679 enable_routing(1); /* Enable routing on Node 1 */
681 /*don't need and it is done by clear_dead_links */
685 result.needs_reset = optimize_connection(
686 NODE_HT(0), 0x80 + link_to_register(link_connection(0,1)),
687 NODE_HT(1), 0x80 + link_to_register(link_connection(1,0)) );
691 #endif /*CONFIG_MAX_CPUS > 1 */
693 #if CONFIG_MAX_CPUS > 2
695 static struct setup_smp_result setup_smp4(int needs_reset)
697 struct setup_smp_result result;
702 result.needs_reset = needs_reset;
704 /* Setup and check temporary connection from Node 0 to Node 2 */
706 byte = ((val>>16) & 0xfe) - link_connection(0,1);
707 byte = get_linkn_last_count(byte);
709 if((byte>>2)==0) { /* We should have two coherent for 4p and above*/
714 byte &= 3; /* bit [3,2] is count-1*/
715 print_linkn("(0,2) link=", byte);
716 setup_row_direct(0, 2, byte); /*(0,2) direct link done*/
717 setup_temp_row(0, 2);
719 if (!check_connection(7) ) {
720 print_spew("No connection to Node 2.\r\n");
725 /* We found 3 nodes so far. Now setup a temporary
726 * connection from node 0 to node 3 via node 1
728 setup_temp_row(0,1); /* temp. link between nodes 0 and 1 */
729 /* here should setup_row_direct(1,3) at first, before that we should find the link in node 1 to 3*/
731 byte = ((val>>16) & 0xfe) - link_connection(1,0);
732 byte = get_linkn_first(byte);
733 print_linkn("(1,3) link=", byte);
734 setup_row_direct(1,3,byte); /* (1, 3) direct link done*/
735 setup_temp_row(1,3); /* temp. link between nodes 1 and 3 */
737 if (!check_connection(7)) {
738 print_spew("No connection to Node 3.\r\n");
743 /* We found 4 nodes so far. Now setup all nodes for 4p */
745 static const u8 conn4_1[] = {
750 static const u8 conn4_1[] = {
756 setup_row_indirect_group(conn4_1, sizeof(conn4_1)/sizeof(conn4_1[0]));
760 val = pci_read_config32(NODE_HT(7), 0x6c);
761 byte = (val>>2) & 0x3; /* get default link on 7 to 0*/
762 print_linkn("(2,0) link=", byte);
764 setup_row_local(7,2);
765 setup_remote_row_direct(2, 0, byte);
766 setup_remote_node(2); /* Setup the regs on the remote node */
768 static const u8 conn4_2[] = {
772 static const u8 conn4_2[] = {
776 setup_remote_row_indirect_group(conn4_2, sizeof(conn4_2)/sizeof(conn4_2[0]));
778 rename_temp_node(2); /* Rename Node 7 to Node 2 */
779 enable_routing(2); /* Enable routing on Node 2 */
784 val = pci_read_config32(NODE_HT(7), 0x6c);
785 byte = (val>>2) & 0x3; /* get default link on 7 to 1*/
786 print_linkn("(3,1) link=", byte);
788 setup_row_local(7,3);
789 setup_remote_row_direct(3, 1, byte);
790 setup_remote_node(3); /* Setup the regs on the remote node */
793 static const u8 conn4_3[] = {
797 static const u8 conn4_3[] = {
801 setup_remote_row_indirect_group(conn4_3, sizeof(conn4_3)/sizeof(conn4_3[0]));
803 /* We need to init link between 2, and 3 direct link */
805 byte = ((val>>16) & 0xfe) - link_connection(2,0);
806 byte = get_linkn_last_count(byte);
807 print_linkn("(2,3) link=", byte & 3);
809 setup_row_direct(2,3, byte & 0x3);
812 check_connection(7); /* to 3*/
814 #if CONFIG_MAX_CPUS > 4
815 /* We need to find out which link is to node3 */
817 if((byte>>2)==2) { /* one to node3, one to node0, one to node4*/
819 if((val>>16) == 1) { /* that link is to node4, because via node3 it has been set, recompute it*/
821 byte = ((val>>16) & 0xfe) - link_connection(2,0);
822 byte = get_linkn_first(byte);
823 print_linkn("-->(2,3) link=", byte);
824 setup_row_direct(2,3,byte);
826 check_connection(7); /* to 3*/
831 val = pci_read_config32(NODE_HT(7), 0x6c);
832 byte = (val>>2) & 0x3; /* get default link on 7 to 2*/
833 print_linkn("(3,2) link=", byte);
834 setup_remote_row_direct(3,2, byte);
836 /* ready to enable RT for Node 3 */
838 enable_routing(3); /* enable routing on node 3 (temp.) */
841 /*We need to do sth to reverse work for setup_temp_row (0,1) (1,3) */
842 /* it will be done by clear_dead_links */
847 /* optimize physical connections - by LYH */
848 static const u8 opt_conn4[] = {
854 result.needs_reset = optimize_connection_group(opt_conn4, sizeof(opt_conn4)/sizeof(opt_conn4[0]));
860 #endif /* CONFIG_MAX_CPUS > 2 */
862 #if CONFIG_MAX_CPUS > 4
864 static struct setup_smp_result setup_smp6(int needs_reset)
866 struct setup_smp_result result;
871 result.needs_reset = needs_reset;
873 /* Setup and check temporary connection from Node 0 to Node 4 via 2 */
875 byte = ((val>>16) & 0xfe) - link_connection(2,3) - link_connection(2,0);
876 byte = get_linkn_last_count(byte);
878 if((byte>>2)==0) { /* We should have two coherent link on node 2 for 6p and above*/
882 byte &= 3; /* bit [3,2] is count-2*/
883 print_linkn("(2,4) link=", byte);
884 setup_row_direct(2, 4, byte);
886 /* Setup and check temporary connection from Node 0 to Node 4 through 2*/
887 for(byte=0; byte<4; byte+=2) {
888 setup_temp_row(byte,byte+2);
891 if (!check_connection(7) ) {
892 print_spew("No connection to Node 4.\r\n");
897 /* Setup and check temporary connection from Node 0 to Node 5 through 1, 3*/
899 byte = ((val>>16) & 0xfe) - link_connection(3,2) - link_connection(3,1);
900 byte = get_linkn_last_count(byte);
901 if((byte>>2)==0) { /* We should have two coherent links on node 3 for 6p and above*/
906 byte &= 3; /*bit [3,2] is count-2*/
907 print_linkn("(3,5) link=", byte);
908 setup_row_direct(3, 5, byte);
910 setup_temp_row(0,1); /* temp. link between nodes 0 and 1 */
911 for(byte=0; byte<4; byte+=2) {
912 setup_temp_row(byte+1,byte+3);
915 if (!check_connection(7)) {
916 print_spew("No connection to Node 5.\r\n");
921 /* We found 6 nodes so far. Now setup all nodes for 6p */
922 #warning "FIXME we need to find out the correct gateway for 6p"
923 static const u8 conn6_1[] = {
941 setup_row_indirect_group(conn6_1, sizeof(conn6_1)/sizeof(conn6_1[0]));
943 for(byte=0; byte<4; byte+=2) {
944 setup_temp_row(byte,byte+2);
946 val = pci_read_config32(NODE_HT(7), 0x6c);
947 byte = (val>>2) & 0x3; /*get default link on 7 to 2*/
948 print_linkn("(4,2) link=", byte);
950 setup_row_local(7,4);
951 setup_remote_row_direct(4, 2, byte);
952 setup_remote_node(4); /* Setup the regs on the remote node */
953 /* Set indirect connection to 0, to 3 */
954 static const u8 conn6_2[] = {
967 setup_remote_row_indirect_group(conn6_2, sizeof(conn6_2)/sizeof(conn6_2[0]));
973 for(byte=0; byte<4; byte+=2) {
974 setup_temp_row(byte+1,byte+3);
977 val = pci_read_config32(NODE_HT(7), 0x6c);
978 byte = (val>>2) & 0x3; /* get default link on 7 to 3*/
979 print_linkn("(5,3) link=", byte);
980 setup_row_local(7,5);
981 setup_remote_row_direct(5, 3, byte);
982 setup_remote_node(5); /* Setup the regs on the remote node */
985 /* We need to init link between 4, and 5 direct link */
987 byte = ((val>>16) & 0xfe) - link_connection(4,2);
988 byte = get_linkn_last_count(byte);
989 print_linkn("(4,5) link=", byte & 3);
991 setup_row_direct(4,5, byte & 0x3);
995 check_connection(7); /* to 5*/
997 #if CONFIG_MAX_CPUS > 6
998 /* We need to find out which link is to node5 */
1000 if((byte>>2)==2) { /* one to node5, one to node2, one to node6*/
1002 if((val>>16) == 1) { /* that link is to node6, because via node 3 node 5 has been set*/
1004 byte = ((val>>16) & 0xfe) - link_connection(4,2);
1005 byte = get_linkn_first(byte);
1006 print_linkn("-->(4,5) link=", byte);
1007 setup_row_direct(4,5,byte);
1008 setup_temp_row(4,5);
1009 check_connection(7); /* to 5*/
1014 val = pci_read_config32(NODE_HT(7), 0x6c);
1015 byte = (val>>2) & 0x3; /* get default link on 7 to 4*/
1016 print_linkn("(5,4) link=", byte);
1017 setup_remote_row_direct(5,4, byte);
1020 /* Set indirect connection to 0, to 3 for indirect we will use clockwise routing */
1021 static const u8 conn6_3[] = {
1022 #if !CROSS_BAR_47_56
1034 setup_remote_row_indirect_group(conn6_3, sizeof(conn6_3)/sizeof(conn6_3[0]));
1036 /* ready to enable RT for 5 */
1037 rename_temp_node(5);
1038 enable_routing(5); /* enable routing on node 5 (temp.) */
1041 /* We need to do sth about reverse about setup_temp_row (0,1), (2,4), (1, 3), (3,5)
1042 * It will be done by clear_dead_links
1044 for(byte=0; byte<4; byte++) {
1045 clear_temp_row(byte);
1049 /* optimize physical connections - by LYH */
1050 static const uint8_t opt_conn6[] ={
1053 #if !CROSS_BAR_47_56
1057 result.needs_reset = optimize_connection_group(opt_conn6, sizeof(opt_conn6)/sizeof(opt_conn6[0]));
1063 #endif /* CONFIG_MAX_CPUS > 4 */
1065 #if CONFIG_MAX_CPUS > 6
1067 static struct setup_smp_result setup_smp8(int needs_reset)
1069 struct setup_smp_result result;
1074 result.needs_reset = needs_reset;
1076 /* Setup and check temporary connection from Node 0 to Node 6 via 2 and 4 to 7 */
1078 #if !CROSS_BAR_47_56
1079 byte = ((val>>16) & 0xfe) - link_connection(4,5) - link_connection(4,2);
1081 byte = ((val>>16) & 0xfe) - link_connection(4,2);
1084 #if TRY_HIGH_FIRST == 1
1085 byte = get_linkn_last_count(byte); /* Max link to 6*/
1086 if((byte>>2)==0) { /* We should have two or three coherent links on node 4 for 8p*/
1090 byte &= 3; /* bit [3,2] is count-1 or 2*/
1092 byte = get_linkn_first(byte); /*Min link to 6*/
1094 print_linkn("(4,6) link=", byte);
1095 setup_row_direct(4, 6, byte);
1097 /* Setup and check temporary connection from Node 0 to Node 6 through 2, and 4*/
1098 for(byte=0; byte<6; byte+=2) {
1099 setup_temp_row(byte,byte+2);
1102 if (!check_connection(7) ) {
1103 print_spew("No connection to Node 6.\r\n");
1107 #if !CROSS_BAR_47_56
1108 /* Setup and check temporary connection from Node 0 to Node 7 through 1, 3, 5*/
1110 byte = ((val>>16) & 0xfe) - link_connection(5,4) - link_connection(5,3);
1111 byte = get_linkn_first(byte);
1112 print_linkn("(5,7) link=", byte);
1113 setup_row_direct(5, 7, byte);
1115 setup_temp_row(0,1); /* temp. link between nodes 0 and 1 */
1116 for(byte=0; byte<6; byte+=2) {
1117 setup_temp_row(byte+1,byte+3);
1121 byte = ((val>>16) & 0xfe) - link_connection(4,2) - link_connection(4,6);
1122 byte = get_linkn_first(byte);
1123 print_linkn("(4,7) link=", byte);
1124 setup_row_direct(4, 7, byte);
1126 /* Setup and check temporary connection from Node 0 to Node 7 through 2, and 4*/
1127 for(byte=0; byte<4; byte+=2) {
1128 setup_temp_row(byte,byte+2);
1130 setup_temp_row(4, 7);
1134 if (!check_connection(7)) {
1135 print_spew("No connection to Node 7.\r\n");
1141 /* We found 8 nodes so far. Now setup all nodes for 8p */
1142 static const u8 conn8_1[] = {
1143 #if !CROSS_BAR_47_56
1165 setup_row_indirect_group(conn8_1,sizeof(conn8_1)/sizeof(conn8_1[0]));
1167 for(byte=0; byte<6; byte+=2) {
1168 setup_temp_row(byte,byte+2);
1170 val = pci_read_config32(NODE_HT(7), 0x6c);
1171 byte = (val>>2) & 0x3; /* get default link on 7 to 4*/
1172 print_linkn("(6,4) link=", byte);
1174 setup_row_local(7,6);
1175 setup_remote_row_direct(6, 4, byte);
1176 setup_remote_node(6); /* Setup the regs on the remote node */
1177 /* Set indirect connection to 0, to 3 */
1178 #warning "FIXME we need to find out the correct gateway for 8p"
1179 static const u8 conn8_2[] = {
1180 #if !CROSS_BAR_47_56
1194 setup_remote_row_indirect_group(conn8_2, sizeof(conn8_2)/sizeof(conn8_2[0]));
1196 rename_temp_node(6);
1199 #if !CROSS_BAR_47_56
1200 setup_temp_row(0,1);
1201 for(byte=0; byte<6; byte+=2) {
1202 setup_temp_row(byte+1,byte+3);
1205 val = pci_read_config32(NODE_HT(7), 0x6c);
1206 byte = (val>>2) & 0x3; /* get default link on 7 to 5*/
1207 print_linkn("(7,5) link=", byte);
1208 setup_row_local(7,7);
1209 setup_remote_row_direct(7, 5, byte);
1212 for(byte=0; byte<4; byte+=2) {
1213 setup_temp_row(byte,byte+2);
1215 setup_temp_row(4,7);
1216 val = pci_read_config32(NODE_HT(7), 0x6c);
1217 byte = (val>>2) & 0x3; /* get default link on 7 to 4*/
1218 print_linkn("(7,4) link=", byte);
1219 setup_row_local(7,7);
1220 setup_remote_row_direct(7, 4, byte);
1221 /* till now 4-7, 7-4 done. */
1223 setup_remote_node(7); /* Setup the regs on the remote node */
1226 /* here init 5, 7 */
1227 /* Setup and check temporary connection from Node 0 to Node 5 through 1, 3, 5*/
1229 byte = ((val>>16) & 0xfe) - link_connection(5,3);
1230 byte = get_linkn_last(byte);
1231 print_linkn("(5,7) link=", byte);
1232 setup_row_direct(5, 7, byte);
1234 setup_temp_row(0,1); /* temp. link between nodes 0 and 1 */
1235 for(byte=0; byte<6; byte+=2) {
1236 setup_temp_row(byte+1,byte+3);
1239 if (!check_connection(7)) {
1240 /* We need to recompute link to 7 */
1242 byte = ((val>>16) & 0xfe) - link_connection(5,3);
1243 byte = get_linkn_first(byte);
1245 print_linkn("-->(5,7) link=", byte);
1246 setup_row_direct(5, 7, byte);
1248 setup_temp_row(0,1); /* temp. link between nodes 0 and 1 */
1249 for(byte=0; byte<6; byte+=2) {
1250 setup_temp_row(byte+1,byte+3);
1253 setup_temp_row(5,7);
1255 check_connection(7);
1257 val = pci_read_config32(NODE_HT(7), 0x6c);
1258 byte = (val>>2) & 0x3; /* get default link on 7 to 5*/
1259 print_linkn("(7,5) link=", byte);
1260 setup_remote_row_direct(7, 5, byte);
1261 /*Till now 57, 75 done */
1263 /* init init 5, 6 */
1265 byte = ((val>>16) & 0xfe) - link_connection(5,3) - link_connection(5,7);
1266 byte = get_linkn_first(byte);
1267 print_linkn("(5,6) link=", byte);
1268 setup_row_direct(5, 6, byte);
1272 byte = ((val>>16) & 0xfe) - link_connection(6,4);
1273 byte = get_linkn_last(byte);
1274 print_linkn("(6,7) link=", byte);
1275 setup_row_direct(6, 7, byte);
1277 for(byte=0; byte<6; byte+=2) {
1278 setup_temp_row(byte,byte+2);
1280 setup_temp_row(6,7);
1282 if (!check_connection(7)) {
1283 /* We need to recompute link to 7 */
1285 byte = ((val>>16) & 0xfe) - link_connection(6,4);
1286 byte = get_linkn_first(byte);
1287 print_linkn("-->(6,7) link=", byte);
1289 setup_row_direct(6, 7, byte);
1291 for(byte=0; byte<6; byte+=2) {
1292 setup_temp_row(byte,byte+2); /* temp. link between nodes 0 and 2 */
1295 setup_temp_row(6,7);
1296 check_connection(7);
1298 val = pci_read_config32(NODE_HT(7), 0x6c);
1299 byte = (val>>2) & 0x3; /* get default link on 7 to 6*/
1300 print_linkn("(7,6) link=", byte);
1302 setup_remote_row_direct(7, 6, byte);
1303 /* Till now 67, 76 done*/
1307 byte = ((val>>16) & 0xfe) - link_connection(6,4) - link_connection(6,7);
1308 byte = get_linkn_first(byte);
1309 print_linkn("(6,5) link=", byte);
1310 setup_row_direct(6, 5, byte);
1314 #if !CROSS_BAR_47_56
1315 /* We need to init link between 6, and 7 direct link */
1317 byte = ((val>>16) & 0xfe) - link_connection(6,4);
1318 byte = get_linkn_first(byte);
1319 print_linkn("(6,7) link=", byte);
1320 setup_row_direct(6,7, byte);
1323 byte = ((val>>16) & 0xfe) - link_connection(7,5);
1324 byte = get_linkn_first(byte);
1325 print_linkn("(7,6) link=", byte);
1326 setup_row_direct(7,6, byte);
1329 /* Set indirect connection to 0, to 3 for indirect we will use clockwise routing */
1330 static const u8 conn8_3[] = {
1331 #if !CROSS_BAR_47_56
1332 0, 7, 1, /* restore it*/
1344 0, 7, 2, 0, /* restore it*/
1361 setup_row_indirect_group(conn8_3, sizeof(conn8_3)/sizeof(conn8_3[0]));
1363 /* ready to enable RT for Node 7 */
1364 enable_routing(7); /* enable routing on node 7 (temp.) */
1367 static const uint8_t opt_conn8[] ={
1376 /* optimize physical connections - by LYH */
1377 result.needs_reset = optimize_connection_group(opt_conn8, sizeof(opt_conn8)/sizeof(opt_conn8[0]));
1382 #endif /* CONFIG_MAX_CPUS > 6 */
1385 #if CONFIG_MAX_CPUS > 1
1387 static struct setup_smp_result setup_smp(void)
1389 struct setup_smp_result result;
1391 print_spew("Enabling SMP settings\r\n");
1393 result = setup_smp2();
1394 #if CONFIG_MAX_CPUS > 2
1395 if(result.nodes == 2 )
1396 result = setup_smp4(result.needs_reset);
1399 #if CONFIG_MAX_CPUS > 4
1400 if(result.nodes == 4)
1401 result = setup_smp6(result.needs_reset);
1404 #if CONFIG_MAX_CPUS > 6
1405 if(result.nodes == 6)
1406 result = setup_smp8(result.needs_reset);
1409 print_debug_hex8(result.nodes);
1410 print_debug(" nodes initialized.\r\n");
1416 static unsigned verify_mp_capabilities(unsigned nodes)
1418 unsigned node, mask;
1420 mask = 0x06; /* BigMPCap */
1422 for (node=0; node<nodes; node++) {
1423 mask &= pci_read_config32(NODE_MC(node), 0xe8);
1427 #if CONFIG_MAX_CPUS > 2
1428 case 0x02: /* MPCap */
1430 print_err("Going back to DP\r\n");
1435 case 0x00: /* Non SMP */
1437 print_err("Going back to UP\r\n");
1448 static void clear_dead_routes(unsigned nodes)
1452 #if CONFIG_MAX_CPUS > 6
1453 if(nodes==8) return;/* don't touch (7,7)*/
1459 for(node = 7; node >= 0; node--) {
1460 for(row = 7; row >= last_row; row--) {
1461 fill_row(node, row, DEFAULT);
1465 /* Update the local row */
1466 for( node=0; node<nodes; node++) {
1468 for(row =0; row<nodes; row++) {
1469 val |= get_row(node, row);
1471 fill_row(node, node, (((val & 0xff) | ((val >> 8) & 0xff)) << 16) | 0x0101);
1474 #endif /* CONFIG_MAX_CPUS > 1 */
1476 static void coherent_ht_finalize(unsigned nodes)
1481 /* set up cpu count and node count and enable Limit
1482 * Config Space Range for all available CPUs.
1483 * Also clear non coherent hypertransport bus range
1484 * registers on Hammer A0 revision.
1487 print_spew("coherent_ht_finalize\r\n");
1488 rev_a0 = is_cpu_rev_a0();
1489 for (node = 0; node < nodes; node++) {
1492 dev = NODE_HT(node);
1494 /* Set the Total CPU and Node count in the system */
1495 val = pci_read_config32(dev, 0x60);
1496 val &= (~0x000F0070);
1497 val |= ((nodes-1)<<16)|((nodes-1)<<4);
1498 pci_write_config32(dev, 0x60, val);
1500 /* Only respond to real cpu pci configuration cycles
1501 * and optimize the HT settings
1503 val=pci_read_config32(dev, 0x68);
1504 val &= ~((HTTC_BUF_REL_PRI_MASK << HTTC_BUF_REL_PRI_SHIFT) |
1505 (HTTC_MED_PRI_BYP_CNT_MASK << HTTC_MED_PRI_BYP_CNT_SHIFT) |
1506 (HTTC_HI_PRI_BYP_CNT_MASK << HTTC_HI_PRI_BYP_CNT_SHIFT));
1507 val |= HTTC_LIMIT_CLDT_CFG |
1508 (HTTC_BUF_REL_PRI_8 << HTTC_BUF_REL_PRI_SHIFT) |
1510 (3 << HTTC_MED_PRI_BYP_CNT_SHIFT) |
1511 (3 << HTTC_HI_PRI_BYP_CNT_SHIFT);
1512 pci_write_config32(dev, 0x68, val);
1515 print_spew("shit it is an old cup\n");
1516 pci_write_config32(dev, 0x94, 0);
1517 pci_write_config32(dev, 0xb4, 0);
1518 pci_write_config32(dev, 0xd4, 0);
1522 print_spew("done\r\n");
1525 static int apply_cpu_errata_fixes(unsigned nodes, int needs_reset)
1528 for(node = 0; node < nodes; node++) {
1531 dev = NODE_MC(node);
1532 if (is_cpu_pre_c0()) {
1535 * Limit the number of downstream posted requests to 1
1537 cmd = pci_read_config32(dev, 0x70);
1538 if ((cmd & (3 << 0)) != 2) {
1541 pci_write_config32(dev, 0x70, cmd );
1544 cmd = pci_read_config32(dev, 0x7c);
1545 if ((cmd & (3 << 4)) != 0) {
1548 pci_write_config32(dev, 0x7c, cmd );
1551 /* Clock Power/Timing Low */
1552 cmd = pci_read_config32(dev, 0xd4);
1553 if (cmd != 0x000D0001) {
1555 pci_write_config32(dev, 0xd4, cmd);
1556 needs_reset = 1; /* Needed? */
1563 * Set Clk Ramp Hystersis to 7
1564 * Clock Power/Timing Low
1566 cmd_ref = 0x04e20707; /* Registered */
1567 cmd = pci_read_config32(dev, 0xd4);
1568 if(cmd != cmd_ref) {
1569 pci_write_config32(dev, 0xd4, cmd_ref );
1570 needs_reset = 1; /* Needed? */
1577 static int optimize_link_read_pointers(unsigned nodes, int needs_reset)
1580 for(node = 0; node < nodes; node++) {
1581 device_t f0_dev, f3_dev;
1582 uint32_t cmd_ref, cmd;
1584 f0_dev = NODE_HT(node);
1585 f3_dev = NODE_MC(node);
1586 cmd_ref = cmd = pci_read_config32(f3_dev, 0xdc);
1587 for(link = 0; link < 3; link++) {
1590 /* This works on an Athlon64 because unimplemented links return 0 */
1591 reg = 0x98 + (link * 0x20);
1592 link_type = pci_read_config32(f0_dev, reg);
1593 if ((link_type & 7) == 3) { /* only handle coherent link here*/
1594 cmd &= ~(0xff << (link *8));
1595 cmd |= 0x25 << (link *8);
1598 if (cmd != cmd_ref) {
1599 pci_write_config32(f3_dev, 0xdc, cmd);
1606 static int setup_coherent_ht_domain(void)
1608 struct setup_smp_result result;
1610 enable_bsp_routing();
1612 #if CONFIG_MAX_CPUS > 1
1613 result = setup_smp();
1614 result.nodes = verify_mp_capabilities(result.nodes);
1615 clear_dead_routes(result.nodes);
1618 result.needs_reset = 0;
1621 if (result.nodes == 1) {
1622 setup_uniprocessor();
1624 coherent_ht_finalize(result.nodes);
1625 result.needs_reset = apply_cpu_errata_fixes(result.nodes, result.needs_reset);
1626 result.needs_reset = optimize_link_read_pointers(result.nodes, result.needs_reset);
1627 return result.needs_reset;