1 /* coherent hypertransport initialization for AMD64
3 * written by Stefan Reinauer <stepan@openbios.org>
4 * (c) 2003-2004 by SuSE Linux AG
6 * (c) 2004 Tyan Computer
7 * 2004.12 yhlu added support to create routing table dynamically.
8 * it also support 8 ways too. (8 ways ladder or 8 ways crossbar)
10 * This code is licensed under GPL.
14 * This algorithm assumes a grid configuration as follows:
17 * org. : 1x1 2x1 2x2 2x3 2x4
66 #include <device/pci_def.h>
67 #include <device/pci_ids.h>
68 #include <device/hypertransport_def.h>
69 #include "arch/romcc_io.h"
72 #define enable_bsp_routing() enable_routing(0)
74 #define NODE_HT(x) PCI_DEV(0,24+x,0)
75 #define NODE_MP(x) PCI_DEV(0,24+x,1)
76 #define NODE_MC(x) PCI_DEV(0,24+x,3)
78 #define DEFAULT 0x00010101 /* default row entry */
83 #ifndef CROSS_BAR_47_56
84 #define CROSS_BAR_47_56 0
87 #ifndef TRY_HIGH_FIRST
88 #define TRY_HIGH_FIRST 0
91 #ifndef K8_HT_FREQ_1G_SUPPORT
92 #define K8_HT_FREQ_1G_SUPPORT 0
95 #ifndef CONFIG_MAX_CPUS_4_BUT_MORE_INSTALLED
96 #define CONFIG_MAX_CPUS_4_BUT_MORE_INSTALLED 0
100 static inline void print_linkn (const char *strval, uint8_t byteval)
103 print_debug(strval); print_debug_hex8(byteval); print_debug("\r\n");
107 static void disable_probes(void)
109 /* disable read/write/fill probes for uniprocessor setup
110 * they don't make sense if only one cpu is available
113 /* Hypetransport Transaction Control Register
115 * [ 0: 0] Disable read byte probe
117 * 1 = Probes not issued
118 * [ 1: 1] Disable Read Doubleword probe
120 * 1 = Probes not issued
121 * [ 2: 2] Disable write byte probes
123 * 1 = Probes not issued
124 * [ 3: 3] Disable Write Doubleword Probes
126 * 1 = Probes not issued.
127 * [10:10] Disable Fill Probe
128 * 0 = Probes issued for cache fills
129 * 1 = Probes not issued for cache fills.
134 print_spew("Disabling read/write/fill probes for UP... ");
136 val=pci_read_config32(NODE_HT(0), 0x68);
137 val |= (1<<10)|(1<<9)|(1<<8)|(1<<4)|(1<<3)|(1<<2)|(1<<1)|(1 << 0);
138 pci_write_config32(NODE_HT(0), 0x68, val);
140 print_spew("done.\r\n");
145 #ifndef ENABLE_APIC_EXT_ID
146 #define ENABLE_APIC_EXT_ID 0
149 static void enable_apic_ext_id(u8 node)
151 #if ENABLE_APIC_EXT_ID==1
152 #warning "FIXME Is the right place to enable apic ext id here?"
156 val = pci_read_config32(NODE_HT(node), 0x68);
157 val |= (HTTC_APIC_EXT_SPUR | HTTC_APIC_EXT_ID | HTTC_APIC_EXT_BRD_CST);
158 pci_write_config32(NODE_HT(node), 0x68, val);
164 static void enable_routing(u8 node)
168 /* HT Initialization Control Register
170 * [ 0: 0] Routing Table Disable
171 * 0 = Packets are routed according to routing tables
172 * 1 = Packets are routed according to the default link field
173 * [ 1: 1] Request Disable (BSP should clear this)
174 * 0 = Request packets may be generated
175 * 1 = Request packets may not be generated.
176 * [ 3: 2] Default Link (Read-only)
180 * 11 = CPU on same node
182 * - Scratch bit cleared by a cold reset
183 * [ 5: 5] BIOS Reset Detect
184 * - Scratch bit cleared by a cold reset
185 * [ 6: 6] INIT Detect
186 * - Scratch bit cleared by a warm or cold reset not by an INIT
190 /* Enable routing table */
191 print_spew("Enabling routing table for node ");
192 print_spew_hex8(node);
194 val=pci_read_config32(NODE_HT(node), 0x6c);
195 val &= ~((1<<1)|(1<<0));
196 pci_write_config32(NODE_HT(node), 0x6c, val);
198 print_spew(" done.\r\n");
201 static void fill_row(u8 node, u8 row, u32 value)
203 pci_write_config32(NODE_HT(node), 0x40+(row<<2), value);
206 #if CONFIG_MAX_CPUS > 1
207 static u8 link_to_register(int ldt)
210 * [ 0: 3] Request Route
211 * [0] Route to this node
212 * [1] Route to Link 0
213 * [2] Route to Link 1
214 * [3] Route to Link 2
217 if (ldt&0x08) return 0x40;
218 if (ldt&0x04) return 0x20;
219 if (ldt&0x02) return 0x00;
221 /* we should never get here */
222 print_spew("Unknown Link\n");
226 static u32 get_row(u8 node, u8 row)
228 return pci_read_config32(NODE_HT(node), 0x40+(row<<2));
231 static int link_connection(u8 src, u8 dest)
233 return get_row(src, dest) & 0x0f;
236 static void rename_temp_node(u8 node)
240 print_spew("Renaming current temporary node to ");
241 print_spew_hex8(node);
243 val=pci_read_config32(NODE_HT(7), 0x60);
244 val &= (~7); /* clear low bits. */
245 val |= node; /* new node */
246 pci_write_config32(NODE_HT(7), 0x60, val);
248 print_spew(" done.\r\n");
251 static int check_connection(u8 dest)
253 /* See if we have a valid connection to dest */
256 /* Verify that the coherent hypertransport link is
257 * established and actually working by reading the
258 * remode node's vendor/device id
260 val = pci_read_config32(NODE_HT(dest),0);
261 if(val != 0x11001022)
267 static unsigned read_freq_cap(device_t dev, unsigned pos)
269 /* Handle bugs in valid hypertransport frequency reporting */
273 freq_cap = pci_read_config16(dev, pos);
274 freq_cap &= ~(1 << HT_FREQ_VENDOR); /* Ignore Vendor HT frequencies */
276 /* AMD K8 Unsupported 1Ghz? */
277 id = pci_read_config32(dev, 0);
278 if (id == (PCI_VENDOR_ID_AMD | (0x1100 << 16))) {
279 freq_cap &= ~(1 << HT_FREQ_1000Mhz);
285 static int optimize_connection(device_t node1, uint8_t link1, device_t node2, uint8_t link2)
287 static const uint8_t link_width_to_pow2[]= { 3, 4, 0, 5, 1, 2, 0, 0 };
288 static const uint8_t pow2_to_link_width[] = { 0x7, 4, 5, 0, 1, 3 };
289 uint16_t freq_cap1, freq_cap2, freq_cap, freq_mask;
290 uint8_t width_cap1, width_cap2, width_cap, width, old_width, ln_width1, ln_width2;
291 uint8_t freq, old_freq;
293 /* Set link width and frequency */
295 /* Initially assume everything is already optimized and I don't need a reset */
298 /* Get the frequency capabilities */
299 freq_cap1 = read_freq_cap(node1, link1 + PCI_HT_CAP_HOST_FREQ_CAP);
300 freq_cap2 = read_freq_cap(node2, link2 + PCI_HT_CAP_HOST_FREQ_CAP);
302 /* Calculate the highest possible frequency */
303 freq = log2(freq_cap1 & freq_cap2);
305 /* See if I am changing the link freqency */
306 old_freq = pci_read_config8(node1, link1 + PCI_HT_CAP_HOST_FREQ);
307 needs_reset |= old_freq != freq;
308 old_freq = pci_read_config8(node2, link2 + PCI_HT_CAP_HOST_FREQ);
309 needs_reset |= old_freq != freq;
311 /* Set the Calulcated link frequency */
312 pci_write_config8(node1, link1 + PCI_HT_CAP_HOST_FREQ, freq);
313 pci_write_config8(node2, link2 + PCI_HT_CAP_HOST_FREQ, freq);
315 /* Get the width capabilities */
316 width_cap1 = pci_read_config8(node1, link1 + PCI_HT_CAP_HOST_WIDTH);
317 width_cap2 = pci_read_config8(node2, link2 + PCI_HT_CAP_HOST_WIDTH);
319 /* Calculate node1's input width */
320 ln_width1 = link_width_to_pow2[width_cap1 & 7];
321 ln_width2 = link_width_to_pow2[(width_cap2 >> 4) & 7];
322 if (ln_width1 > ln_width2) {
323 ln_width1 = ln_width2;
325 width = pow2_to_link_width[ln_width1];
326 /* Calculate node1's output width */
327 ln_width1 = link_width_to_pow2[(width_cap1 >> 4) & 7];
328 ln_width2 = link_width_to_pow2[width_cap2 & 7];
329 if (ln_width1 > ln_width2) {
330 ln_width1 = ln_width2;
332 width |= pow2_to_link_width[ln_width1] << 4;
334 /* See if I am changing node1's width */
335 old_width = pci_read_config8(node1, link1 + PCI_HT_CAP_HOST_WIDTH + 1);
336 needs_reset |= old_width != width;
338 /* Set node1's widths */
339 pci_write_config8(node1, link1 + PCI_HT_CAP_HOST_WIDTH + 1, width);
341 /* Calculate node2's width */
342 width = ((width & 0x70) >> 4) | ((width & 0x7) << 4);
344 /* See if I am changing node2's width */
345 old_width = pci_read_config8(node2, link2 + PCI_HT_CAP_HOST_WIDTH + 1);
346 needs_reset |= old_width != width;
348 /* Set node2's widths */
349 pci_write_config8(node2, link2 + PCI_HT_CAP_HOST_WIDTH + 1, width);
354 static void setup_row_local(u8 source, u8 row) /* source will be 7 when it is for temp use*/
359 for(linkn = 0; linkn<3; linkn++) {
362 regpos = 0x98 + 0x20 * linkn;
363 reg = pci_read_config32(NODE_HT(source), regpos);
364 if ((reg & 0x17) != 3) continue; /* it is not conherent or not connected*/
369 fill_row(source,row, val);
372 static void setup_row_direct_x(u8 temp, u8 source, u8 dest, u8 linkn)
377 val |= 1<<(linkn+1+8); /*for direct connect response route should equal to request table*/
379 if(((source &1)!=(dest &1))
381 && (source<4) && (dest<4)
386 /*for CROSS_BAR_47_56 47, 74, 56, 65 should be here too*/
387 val_s = get_row(temp, source);
388 val |= ((val_s>>16) - (1<<(linkn+1)))<<16;
391 fill_row(temp,dest, val );
394 static void setup_row_direct(u8 source, u8 dest, u8 linkn){
395 setup_row_direct_x(source, source, dest, linkn);
398 static void setup_remote_row_direct(u8 source, u8 dest, u8 linkn){
399 setup_row_direct_x(7, source, dest, linkn);
402 static uint8_t get_linkn_first(uint8_t byte)
404 if(byte & 0x02) { byte = 0; }
405 else if(byte & 0x04) { byte = 1; }
406 else if(byte & 0x08) { byte = 2; }
410 static uint8_t get_linkn_last(uint8_t byte)
412 if(byte & 0x02) { byte &= 0x0f; byte |= 0x00; }
413 if(byte & 0x04) { byte &= 0x0f; byte |= 0x10; }
414 if(byte & 0x08) { byte &= 0x0f; byte |= 0x20; }
418 static uint8_t get_linkn_last_count(uint8_t byte)
421 if(byte & 0x02) { byte &= 0xcf; byte |= 0x00; byte+=0x40; }
422 if(byte & 0x04) { byte &= 0xcf; byte |= 0x10; byte+=0x40; }
423 if(byte & 0x08) { byte &= 0xcf; byte |= 0x20; byte+=0x40; }
427 static void setup_temp_row(u8 source, u8 dest)
429 /* copy val from (source, dest) to (source,7) */
430 fill_row(source,7,get_row(source,dest));
433 static void clear_temp_row(u8 source)
435 fill_row(source, 7, DEFAULT);
438 static void setup_remote_node(u8 node)
440 static const uint8_t pci_reg[] = {
441 0x44, 0x4c, 0x54, 0x5c, 0x64, 0x6c, 0x74, 0x7c,
442 0x40, 0x48, 0x50, 0x58, 0x60, 0x68, 0x70, 0x78,
443 0x84, 0x8c, 0x94, 0x9c, 0xa4, 0xac, 0xb4, 0xbc,
444 0x80, 0x88, 0x90, 0x98, 0xa0, 0xa8, 0xb0, 0xb8,
445 0xc4, 0xcc, 0xd4, 0xdc,
446 0xc0, 0xc8, 0xd0, 0xd8,
447 0xe0, 0xe4, 0xe8, 0xec,
451 print_spew("setup_remote_node: ");
453 /* copy the default resource map from node 0 */
454 for(i = 0; i < sizeof(pci_reg)/sizeof(pci_reg[0]); i++) {
458 value = pci_read_config32(NODE_MP(0), reg);
459 pci_write_config32(NODE_MP(7), reg, value);
462 print_spew("done\r\n");
465 #endif /* CONFIG_MAX_CPUS > 1*/
468 #if CONFIG_MAX_CPUS > 2
470 static void setup_row_indirect_x(u8 temp, u8 source, u8 dest)
472 static void setup_row_indirect_x(u8 temp, u8 source, u8 dest, u8 gateway, u8 diff)
475 /*for indirect connection, we need to compute the val from val_s(source, source), and val_g(source, gateway) */
482 gateway = source + 2;
484 gateway = source - 2;
487 val_s = get_row(temp, source);
488 val = get_row(temp, gateway);
495 diff = ((source&1)!=(dest &1));
498 if(diff && (val_s!=(val&0xff)) ) { /* use another connect as response*/
500 #if (CONFIG_MAX_CPUS > 4) || (CONFIG_MAX_CPUS_4_BUT_MORE_INSTALLED == 1)
502 /* Some node have two links left
503 * don't worry we only have (2, (3 as source need to handle
506 byte = get_linkn_last_count(byte);
507 if((byte>>2)>1) { /* make sure not the corner*/
509 val_s-=link_connection(temp, source-2); /* -down*/
511 val_s-=link_connection(temp, source+2); /* -up*/
519 if(diff) { /* cross rung?*/
523 val_s = get_row(temp, source);
524 val |= ((val_s>>16) - link_connection(temp, gateway))<<16;
527 fill_row(temp, dest, val);
532 static void setup_row_indirect(u8 source, u8 dest)
534 setup_row_indirect_x(source, source, dest);
537 static void setup_row_indirect(u8 source, u8 dest, u8 gateway, u8 diff)
539 setup_row_indirect_x(source, source, dest, gateway, diff);
543 static void setup_row_indirect_group(const u8 *conn, int num)
548 for(i=0; i<num; i+=2) {
549 setup_row_indirect(conn[i], conn[i+1]);
551 for(i=0; i<num; i+=4) {
552 setup_row_indirect(conn[i], conn[i+1],conn[i+2], conn[i+3]);
559 static void setup_remote_row_indirect(u8 source, u8 dest)
561 setup_row_indirect_x(7, source, dest);
564 static void setup_remote_row_indirect(u8 source, u8 dest, u8 gateway, u8 diff)
566 setup_row_indirect_x(7, source, dest, gateway, diff);
570 static void setup_remote_row_indirect_group(const u8 *conn, int num)
575 for(i=0; i<num; i+=2) {
576 setup_remote_row_indirect(conn[i], conn[i+1]);
578 for(i=0; i<num; i+=4) {
579 setup_remote_row_indirect(conn[i], conn[i+1],conn[i+2], conn[i+3]);
584 #endif /*CONFIG_MAX_CPUS > 2*/
587 static void setup_uniprocessor(void)
589 print_spew("Enabling UP settings\r\n");
593 struct setup_smp_result {
598 #if CONFIG_MAX_CPUS > 2
599 static int optimize_connection_group(const u8 *opt_conn, int num) {
602 for(i=0; i<num; i+=2) {
603 needs_reset = optimize_connection(
604 NODE_HT(opt_conn[i]), 0x80 + link_to_register(link_connection(opt_conn[i],opt_conn[i+1])),
605 NODE_HT(opt_conn[i+1]), 0x80 + link_to_register(link_connection(opt_conn[i+1],opt_conn[i])) );
611 #if CONFIG_MAX_CPUS > 1
612 static struct setup_smp_result setup_smp2(void)
614 struct setup_smp_result result;
618 result.needs_reset = 0;
620 setup_row_local(0, 0); /* it will update the broadcast RT*/
623 byte = (val>>16) & 0xfe;
624 if(byte<0x2) { /* no coherent connection so get out.*/
629 /* Setup and check a temporary connection to node 1 */
630 #if TRY_HIGH_FIRST == 1
631 byte = get_linkn_last(byte); /* Max Link to node1 */
633 byte = get_linkn_first(byte); /*Min Link to node1 --- according to AMD*/
635 print_linkn("(0,1) link=", byte);
636 setup_row_direct(0,1, byte);
637 setup_temp_row(0, 1);
641 /* We found 2 nodes so far */
642 val = pci_read_config32(NODE_HT(7), 0x6c);
643 byte = (val>>2) & 0x3; /*get default link on node7 to node0*/
644 print_linkn("(1,0) link=", byte);
645 setup_row_local(7,1);
646 setup_remote_row_direct(1, 0, byte);
648 #if (CONFIG_MAX_CPUS > 4) || (CONFIG_MAX_CPUS_4_BUT_MORE_INSTALLED == 1)
650 byte = (val>>16) & 0xfe;
651 byte = get_linkn_last_count(byte);
652 if((byte>>2)==3) { /* Oh! we need to treat it as node2. So use another link*/
654 byte = (val>>16) & 0xfe;
655 #if TRY_HIGH_FIRST == 1
656 byte = get_linkn_first(byte); /* Min link to Node1 */
658 byte = get_linkn_last(byte); /* Max link to Node1*/
660 print_linkn("\t-->(0,1) link=", byte);
661 setup_row_direct(0,1, byte);
662 setup_temp_row(0, 1);
666 /* We found 2 nodes so far */
667 val = pci_read_config32(NODE_HT(7), 0x6c);
668 byte = (val>>2) & 0x3; /* get default link on node7 to node0*/
669 print_linkn("\t-->(1,0) link=", byte);
670 setup_row_local(7,1);
671 setup_remote_row_direct(1, 0, byte);
675 setup_remote_node(1); /* Setup the regs on the remote node */
676 rename_temp_node(1); /* Rename Node 7 to Node 1 */
677 enable_routing(1); /* Enable routing on Node 1 */
679 /*don't need and it is done by clear_dead_links */
683 result.needs_reset = optimize_connection(
684 NODE_HT(0), 0x80 + link_to_register(link_connection(0,1)),
685 NODE_HT(1), 0x80 + link_to_register(link_connection(1,0)) );
689 #endif /*CONFIG_MAX_CPUS > 1 */
691 #if CONFIG_MAX_CPUS > 2
693 static struct setup_smp_result setup_smp4(int needs_reset)
695 struct setup_smp_result result;
700 result.needs_reset = needs_reset;
702 /* Setup and check temporary connection from Node 0 to Node 2 */
704 byte = ((val>>16) & 0xfe) - link_connection(0,1);
705 byte = get_linkn_last_count(byte);
707 if((byte>>2)==0) { /* We should have two coherent for 4p and above*/
712 byte &= 3; /* bit [3,2] is count-1*/
713 print_linkn("(0,2) link=", byte);
714 setup_row_direct(0, 2, byte); /*(0,2) direct link done*/
716 /* We found 3 nodes so far. Now setup a temporary
717 * connection from node 0 to node 3 via node 1
719 setup_temp_row(0,1); /* temp. link between nodes 0 and 1 */
720 /* here should setup_row_direct(1,3) at first, before that we should find the link in node 1 to 3*/
722 byte = ((val>>16) & 0xfe) - link_connection(1,0);
723 byte = get_linkn_first(byte);
724 print_linkn("(1,3) link=", byte);
725 setup_row_direct(1,3,byte); /* (1, 3) direct link done*/
727 /* We found 4 nodes so far. Now setup all nodes for 4p */
728 // We need to make sure 0,2 and 1,3 link is set already
730 static const u8 conn4_1[] = {
735 static const u8 conn4_1[] = {
741 setup_row_indirect_group(conn4_1, sizeof(conn4_1)/sizeof(conn4_1[0]));
745 val = pci_read_config32(NODE_HT(7), 0x6c);
746 byte = (val>>2) & 0x3; /* get default link on 7 to 0*/
747 print_linkn("(2,0) link=", byte);
749 setup_row_local(7,2);
750 setup_remote_row_direct(2, 0, byte); /* node 2 to node 0 direct link done */
751 setup_remote_node(2); /* Setup the regs on the remote node */
753 rename_temp_node(2); /* Rename Node 7 to Node 2 */
754 enable_routing(2); /* Enable routing on Node 2 */
760 val = pci_read_config32(NODE_HT(7), 0x6c);
761 byte = (val>>2) & 0x3; /* get default link on 7 to 1*/
762 print_linkn("(3,1) link=", byte);
764 setup_row_local(7,3);
765 setup_remote_row_direct(3, 1, byte); /* node 3 to node 1 direct link done */
766 setup_remote_node(3); /* Setup the regs on the remote node */
768 /* We need to init link between 2, and 3 direct link */
770 byte = ((val>>16) & 0xfe) - link_connection(2,0);
771 byte = get_linkn_last_count(byte);
772 print_linkn("(2,3) link=", byte & 3);
774 setup_row_direct(2,3, byte & 0x3);
777 check_connection(7); /* to 3*/
779 #if (CONFIG_MAX_CPUS > 4) || (CONFIG_MAX_CPUS_4_BUT_MORE_INSTALLED == 1)
780 /* We need to find out which link is to node3 */
781 if((byte>>2)==2) { /* one to node3, one to node0, one to node4*/
783 if((val>>16) == 1) { /* that link is to node4, because via node1 it has been set, recompute it*/
785 byte = ((val>>16) & 0xfe) - link_connection(2,0);
786 byte = get_linkn_first(byte);
787 print_linkn("\t-->(2,3) link=", byte);
788 setup_row_direct(2,3,byte);
790 check_connection(7); /* to 3*/
795 val = pci_read_config32(NODE_HT(7), 0x6c);
796 byte = (val>>2) & 0x3; /* get default link on 7 to 2*/
797 print_linkn("(3,2) link=", byte);
798 setup_remote_row_direct(3,2, byte);
800 #if (CONFIG_MAX_CPUS > 4) || (CONFIG_MAX_CPUS_4_BUT_MORE_INSTALLED == 1)
801 /* set link from 3 to 5 before enable it*/
803 byte = ((val>>16) & 0xfe) - link_connection(7,2) - link_connection(7,1);
804 byte = get_linkn_last_count(byte);
805 if((byte>>2)==1) { /* We should have three coherent links on node 3 for 6p and above*/
806 byte &= 3; /*bit [3,2] is count-2*/
807 print_linkn("(3,5) link=", byte);
808 setup_remote_row_direct(3, 5, byte);
812 byte = ((val>>16) & 0xfe) - link_connection(2,3) - link_connection(2,0);
813 byte = get_linkn_last_count(byte);
815 if((byte>>2)==1) { /* We should have three coherent link on node 2 for 6p and above*/
816 byte &= 3; /* bit [3,2] is count-2*/
817 print_linkn("(2,4) link=", byte);
818 setup_row_direct(2, 4, byte);
822 //Beside 3, 1 is set, We need to make sure 3, 5 is set already in case has three link in 3
824 static const u8 conn4_3[] = {
828 static const u8 conn4_3[] = {
832 setup_remote_row_indirect_group(conn4_3, sizeof(conn4_3)/sizeof(conn4_3[0]));
834 /* ready to enable RT for Node 3 */
836 enable_routing(3); /* enable routing on node 3 (temp.) */
838 // beside 2, 0 is set, We need to make sure 2, 4 link is set already in case has three link in 2
840 static const u8 conn4_2[] = {
844 static const u8 conn4_2[] = {
848 setup_row_indirect_group(conn4_2, sizeof(conn4_2)/sizeof(conn4_2[0]));
851 /*We need to do sth to reverse work for setup_temp_row (0,1) (1,3) */
852 /* it will be done by clear_dead_links */
857 /* optimize physical connections - by LYH */
858 static const u8 opt_conn4[] = {
864 result.needs_reset = optimize_connection_group(opt_conn4, sizeof(opt_conn4)/sizeof(opt_conn4[0]));
870 #endif /* CONFIG_MAX_CPUS > 2 */
872 #if CONFIG_MAX_CPUS > 4
874 static struct setup_smp_result setup_smp6(int needs_reset)
876 struct setup_smp_result result;
881 result.needs_reset = needs_reset;
883 /* Setup and check temporary connection from Node 0 to Node 4 through 2*/
885 byte = ((val>>16) & 0xfe) - link_connection(2,3) - link_connection(2,0);
886 byte = get_linkn_last_count(byte);
888 if((byte>>2)==0) { /* We should have three coherent link on node 2 for 6p and above*/
893 /* Setup and check temporary connection from Node 0 to Node 5 through 1, 3*/
894 /* set link from 3 to 5 before enable it*/
896 byte = ((val>>16) & 0xfe) - link_connection(3,2) - link_connection(3,1);
897 byte = get_linkn_last_count(byte);
898 if((byte>>2)==0) { /* We should have three coherent links on node 3 for 6p and above*/
903 /* We found 6 nodes so far. Now setup all nodes for 6p */
904 #warning "FIXME we need to find out the correct gateway for 6p"
905 static const u8 conn6_1[] = {
923 setup_row_indirect_group(conn6_1, sizeof(conn6_1)/sizeof(conn6_1[0]));
925 for(byte=0; byte<4; byte+=2) {
926 setup_temp_row(byte,byte+2);
929 val = pci_read_config32(NODE_HT(7), 0x6c);
930 byte = (val>>2) & 0x3; /*get default link on 7 to 2*/
931 print_linkn("(4,2) link=", byte);
933 setup_row_local(7,4);
934 setup_remote_row_direct(4, 2, byte);
935 setup_remote_node(4); /* Setup the regs on the remote node */
937 /* Set indirect connection to 0, to 3 */
938 //we only need to set 4,0 here
939 static const u8 conn6_2[] = {
947 setup_remote_row_indirect_group(conn6_2, sizeof(conn6_2)/sizeof(conn6_2[0]));
953 for(byte=0; byte<4; byte+=2) {
954 setup_temp_row(byte+1,byte+3);
958 val = pci_read_config32(NODE_HT(7), 0x6c);
959 byte = (val>>2) & 0x3; /* get default link on 7 to 3*/
960 print_linkn("(5,3) link=", byte);
961 setup_row_local(7,5);
962 setup_remote_row_direct(5, 3, byte);
963 setup_remote_node(5); /* Setup the regs on the remote node */
966 /* We need to init link between 4, and 5 direct link */
968 byte = ((val>>16) & 0xfe) - link_connection(4,2);
969 byte = get_linkn_last_count(byte);
970 print_linkn("(4,5) link=", byte & 3);
972 setup_row_direct(4,5, byte & 0x3);
976 check_connection(7); /* to 5*/
978 #if CONFIG_MAX_CPUS > 6
979 /* We need to find out which link is to node5 */
981 if((byte>>2)==2) { /* one to node5, one to node2, one to node6*/
983 if((val>>16) == 1) { /* that link is to node6, because via node 3 node 5 has been set*/
985 byte = ((val>>16) & 0xfe) - link_connection(4,2);
986 byte = get_linkn_first(byte);
987 print_linkn("\t-->(4,5) link=", byte);
988 setup_row_direct(4,5,byte);
990 check_connection(7); /* to 5*/
995 val = pci_read_config32(NODE_HT(7), 0x6c);
996 byte = (val>>2) & 0x3; /* get default link on 7 to 4*/
997 print_linkn("(5,4) link=", byte);
998 setup_remote_row_direct(5,4, byte);
1002 byte = ((val>>16) & 0xfe) - link_connection(7,4) - link_connection(7,3);
1003 byte = get_linkn_last_count(byte);
1004 if((byte>>2)==1) { /* We should have three coherent links on node 5 for 6p and above*/
1005 byte &= 3; /*bit [3,2] is count-2*/
1006 print_linkn("(5,7) link=", byte);
1007 setup_remote_row_direct(5, 7, byte);
1012 byte = ((val>>16) & 0xfe) - link_connection(4,5) - link_connection(4,2);
1013 byte = get_linkn_last_count(byte);
1015 if((byte>>2)==1) { /* We should have three coherent link on node 4 for 6p and above*/
1016 byte &= 3; /* bit [3,2] is count-2*/
1017 print_linkn("(4,6) link=", byte);
1018 setup_row_direct(4, 6, byte);
1023 //We need to set 5,0 here only, We need to set up 5, 7 to make 5,0
1024 /* Set indirect connection to 0, to 3 for indirect we will use clockwise routing */
1025 static const u8 conn6_3[] = {
1026 #if !CROSS_BAR_47_56
1033 setup_remote_row_indirect_group(conn6_3, sizeof(conn6_3)/sizeof(conn6_3[0]));
1035 /* ready to enable RT for 5 */
1036 rename_temp_node(5);
1037 enable_routing(5); /* enable routing on node 5 (temp.) */
1039 static const u8 conn6_4[] = {
1040 #if !CROSS_BAR_47_56
1059 setup_row_indirect_group(conn6_4, sizeof(conn6_4)/sizeof(conn6_4[0]));
1062 /* We need to do sth about reverse about setup_temp_row (0,1), (2,4), (1, 3), (3,5)
1063 * It will be done by clear_dead_links
1065 for(byte=0; byte<4; byte++) {
1066 clear_temp_row(byte);
1070 /* optimize physical connections - by LYH */
1071 static const uint8_t opt_conn6[] ={
1074 #if !CROSS_BAR_47_56
1078 result.needs_reset = optimize_connection_group(opt_conn6, sizeof(opt_conn6)/sizeof(opt_conn6[0]));
1084 #endif /* CONFIG_MAX_CPUS > 4 */
1086 #if CONFIG_MAX_CPUS > 6
1088 static struct setup_smp_result setup_smp8(int needs_reset)
1090 struct setup_smp_result result;
1095 result.needs_reset = needs_reset;
1097 /* Setup and check temporary connection from Node 0 to Node 6 via 2 and 4 to 7 */
1100 byte = ((val>>16) & 0xfe) - link_connection(4,2);
1102 byte = ((val>>16) & 0xfe) - link_connection(4,5) - link_connection(4,2);
1103 byte = get_linkn_last_count(byte); /* Max link to 6*/
1104 if((byte>>2)==0) { /* We should have two or three coherent links on node 4 for 8p*/
1111 byte = get_linkn_last_count(byte); /* Max link to 6*/
1112 if((byte>>2)<2) { /* We should have two or three coherent links on node 4 for 8p*/
1116 #if TRY_HIGH_FIRST == 1
1117 byte &= 3; /* bit [3,2] is count-1 or 2*/
1119 byte = ((val>>16) & 0xfe) - link_connection(4,2);
1120 byte = get_linkn_first(byte); /*Min link to 6*/
1122 print_linkn("(4,6) link=", byte);
1123 setup_row_direct(4, 6, byte);
1126 #if !CROSS_BAR_47_56
1127 /* Setup and check temporary connection from Node 0 to Node 7 through 1, 3, 5*/
1129 byte = ((val>>16) & 0xfe) - link_connection(5,4) - link_connection(5,3);
1130 byte = get_linkn_last_count(byte);
1131 if((byte>>2)==0) { /* We should have three coherent links on node 5 for 6p and above*/
1137 /* We found 8 nodes so far. Now setup all nodes for 8p */
1138 static const u8 conn8_1[] = {
1139 #if !CROSS_BAR_47_56
1161 setup_row_indirect_group(conn8_1,sizeof(conn8_1)/sizeof(conn8_1[0]));
1163 for(byte=0; byte<6; byte+=2) {
1164 setup_temp_row(byte,byte+2);
1166 check_connection(7);
1167 val = pci_read_config32(NODE_HT(7), 0x6c);
1168 byte = (val>>2) & 0x3; /* get default link on 7 to 4*/
1169 print_linkn("(6,4) link=", byte);
1171 setup_row_local(7,6);
1172 setup_remote_row_direct(6, 4, byte);
1173 setup_remote_node(6); /* Setup the regs on the remote node */
1174 /* Set indirect connection to 0, to 3 */
1175 #warning "FIXME we need to find out the correct gateway for 8p"
1176 static const u8 conn8_2[] = {
1177 #if !CROSS_BAR_47_56
1184 setup_remote_row_indirect_group(conn8_2, sizeof(conn8_2)/sizeof(conn8_2[0]));
1188 /* here init 5, 6 */
1189 /* Setup and check temporary connection from Node 0 to Node 5 through 1, 3, 5*/
1191 byte = ((val>>16) & 0xfe) - link_connection(5,3);
1192 #if TRY_HIGH_FIRST == 1
1193 byte = get_linkn_last(byte);
1195 byte = get_linkn_first(byte);
1197 print_linkn("(5,6) link=", byte);
1198 setup_row_direct(5, 6, byte);
1200 setup_temp_row(0,1); /* temp. link between nodes 0 and 1 */
1201 for(byte=0; byte<4; byte+=2) {
1202 setup_temp_row(byte+1,byte+3);
1204 setup_temp_row(5,6);
1206 check_connection(7);
1208 val = get_row(7,6); // to chect it if it is node6 before renaming
1209 if( (val>>16) == 1) { // it is real node 7 so swap it
1210 /* We need to recompute link to 6 */
1212 byte = ((val>>16) & 0xfe) - link_connection(5,3);
1213 #if TRY_HIGH_FIRST == 1
1214 byte = get_linkn_first(byte);
1216 byte = get_linkn_last(byte);
1218 print_linkn("\t-->(5,6) link=", byte);
1219 setup_row_direct(5, 6, byte);
1221 setup_temp_row(0,1); /* temp. link between nodes 0 and 1 */
1222 for(byte=0; byte<4; byte+=2) {
1223 setup_temp_row(byte+1,byte+3);
1226 setup_temp_row(5,6);
1228 check_connection(7);
1230 val = pci_read_config32(NODE_HT(7), 0x6c);
1231 byte = (val>>2) & 0x3; /* get default link on 7 to 5*/
1232 print_linkn("(6,5) link=", byte);
1233 setup_remote_row_direct(6, 5, byte);
1234 /*Till now 56, 65 done */
1237 rename_temp_node(6);
1240 #if !CROSS_BAR_47_56
1241 setup_temp_row(0,1);
1242 for(byte=0; byte<6; byte+=2) {
1243 setup_temp_row(byte+1,byte+3);
1246 check_connection(7);
1248 val = pci_read_config32(NODE_HT(7), 0x6c);
1249 byte = (val>>2) & 0x3; /* get default link on 7 to 5*/
1250 print_linkn("(7,5) link=", byte);
1251 setup_row_local(7,7);
1252 setup_remote_row_direct(7, 5, byte);
1256 byte = ((val>>16) & 0xfe) - link_connection(4,2) - link_connection(4,6);
1257 byte = get_linkn_first(byte);
1258 print_linkn("(4,7) link=", byte);
1259 setup_row_direct(4, 7, byte);
1261 /* Setup and check temporary connection from Node 0 to Node 7 through 2, and 4*/
1262 for(byte=0; byte<4; byte+=2) {
1263 setup_temp_row(byte,byte+2);
1266 check_connection(7);
1268 val = pci_read_config32(NODE_HT(7), 0x6c);
1269 byte = (val>>2) & 0x3; /* get default link on 7 to 4*/
1270 print_linkn("(7,4) link=", byte);
1271 setup_row_local(7,7);
1272 setup_remote_row_direct(7, 4, byte);
1273 /* till now 4-7, 7-4 done. */
1275 setup_remote_node(7); /* Setup the regs on the remote node */
1278 /* here init 5, 7 */
1279 /* Setup and check temporary connection from Node 0 to Node 5 through 1, 3, 5*/
1281 byte = ((val>>16) & 0xfe) - link_connection(5,3) - link_connection(5,6);
1282 byte = get_linkn_first(byte);
1283 print_linkn("(5,7) link=", byte);
1284 setup_row_direct(5, 7, byte);
1286 setup_temp_row(0,1); /* temp. link between nodes 0 and 1 */
1287 for(byte=0; byte<4; byte+=2) {
1288 setup_temp_row(byte+1,byte+3);
1291 check_connection(7);
1293 val = pci_read_config32(NODE_HT(7), 0x6c);
1294 byte = (val>>2) & 0x3; /* get default link on 7 to 5*/
1295 print_linkn("(7,5) link=", byte);
1296 setup_remote_row_direct(7, 5, byte);
1297 /*Till now 57, 75 done */
1301 /* We need to init link between 6, and 7 direct link */
1303 #if !CROSS_BAR_47_56
1304 byte = ((val>>16) & 0xfe) - link_connection(6,4);
1306 byte = ((val>>16) & 0xfe) - link_connection(6,4) - link_connection(6,5);
1308 byte = get_linkn_first(byte);
1309 print_linkn("(6,7) link=", byte);
1310 setup_row_direct(6,7, byte);
1313 #if !CROSS_BAR_47_56
1314 byte = ((val>>16) & 0xfe) - link_connection(7,5);
1316 byte = ((val>>16) & 0xfe) - link_connection(7,5) - link_connection(7,4);
1318 byte = get_linkn_first(byte);
1319 print_linkn("(7,6) link=", byte);
1320 setup_row_direct(7,6, byte);
1322 /* Set indirect connection to 0, to 3 for indirect we will use clockwise routing */
1323 static const u8 conn8_3[] = {
1324 #if !CROSS_BAR_47_56
1325 0, 7, /* restore it*/
1342 0, 7, 2, 0, /* restore it*/
1361 setup_row_indirect_group(conn8_3, sizeof(conn8_3)/sizeof(conn8_3[0]));
1363 /* ready to enable RT for Node 7 */
1364 enable_routing(7); /* enable routing on node 7 (temp.) */
1367 static const uint8_t opt_conn8[] ={
1376 /* optimize physical connections - by LYH */
1377 result.needs_reset = optimize_connection_group(opt_conn8, sizeof(opt_conn8)/sizeof(opt_conn8[0]));
1382 #endif /* CONFIG_MAX_CPUS > 6 */
1385 #if CONFIG_MAX_CPUS > 1
1387 static struct setup_smp_result setup_smp(void)
1389 struct setup_smp_result result;
1391 print_spew("Enabling SMP settings\r\n");
1393 result = setup_smp2();
1394 #if CONFIG_MAX_CPUS > 2
1395 if(result.nodes == 2)
1396 result = setup_smp4(result.needs_reset);
1399 #if CONFIG_MAX_CPUS > 4
1400 if(result.nodes == 4)
1401 result = setup_smp6(result.needs_reset);
1404 #if CONFIG_MAX_CPUS > 6
1405 if(result.nodes == 6)
1406 result = setup_smp8(result.needs_reset);
1409 print_debug_hex8(result.nodes);
1410 print_debug(" nodes initialized.\r\n");
1416 static unsigned verify_mp_capabilities(unsigned nodes)
1418 unsigned node, mask;
1420 mask = 0x06; /* BigMPCap */
1422 for (node=0; node<nodes; node++) {
1423 mask &= pci_read_config32(NODE_MC(node), 0xe8);
1427 #if CONFIG_MAX_CPUS > 2
1428 case 0x02: /* MPCap */
1430 print_err("Going back to DP\r\n");
1435 case 0x00: /* Non SMP */
1437 print_err("Going back to UP\r\n");
1448 static void clear_dead_routes(unsigned nodes)
1452 #if CONFIG_MAX_CPUS > 6
1453 if(nodes==8) return;/* don't touch (7,7)*/
1459 for(node = 7; node >= 0; node--) {
1460 for(row = 7; row >= last_row; row--) {
1461 fill_row(node, row, DEFAULT);
1465 /* Update the local row */
1466 for( node=0; node<nodes; node++) {
1468 for(row =0; row<nodes; row++) {
1469 val |= get_row(node, row);
1471 fill_row(node, node, (((val & 0xff) | ((val >> 8) & 0xff)) << 16) | 0x0101);
1474 #endif /* CONFIG_MAX_CPUS > 1 */
1476 static void coherent_ht_finalize(unsigned nodes)
1481 /* set up cpu count and node count and enable Limit
1482 * Config Space Range for all available CPUs.
1483 * Also clear non coherent hypertransport bus range
1484 * registers on Hammer A0 revision.
1487 print_spew("coherent_ht_finalize\r\n");
1488 rev_a0 = is_cpu_rev_a0();
1489 for (node = 0; node < nodes; node++) {
1492 dev = NODE_HT(node);
1494 /* Set the Total CPU and Node count in the system */
1495 val = pci_read_config32(dev, 0x60);
1496 val &= (~0x000F0070);
1497 val |= ((nodes-1)<<16)|((nodes-1)<<4);
1498 pci_write_config32(dev, 0x60, val);
1500 /* Only respond to real cpu pci configuration cycles
1501 * and optimize the HT settings
1503 val=pci_read_config32(dev, 0x68);
1504 val &= ~((HTTC_BUF_REL_PRI_MASK << HTTC_BUF_REL_PRI_SHIFT) |
1505 (HTTC_MED_PRI_BYP_CNT_MASK << HTTC_MED_PRI_BYP_CNT_SHIFT) |
1506 (HTTC_HI_PRI_BYP_CNT_MASK << HTTC_HI_PRI_BYP_CNT_SHIFT));
1507 val |= HTTC_LIMIT_CLDT_CFG |
1508 (HTTC_BUF_REL_PRI_8 << HTTC_BUF_REL_PRI_SHIFT) |
1510 (3 << HTTC_MED_PRI_BYP_CNT_SHIFT) |
1511 (3 << HTTC_HI_PRI_BYP_CNT_SHIFT);
1512 pci_write_config32(dev, 0x68, val);
1515 print_spew("shit it is an old cup\n");
1516 pci_write_config32(dev, 0x94, 0);
1517 pci_write_config32(dev, 0xb4, 0);
1518 pci_write_config32(dev, 0xd4, 0);
1522 print_spew("done\r\n");
1525 static int apply_cpu_errata_fixes(unsigned nodes, int needs_reset)
1528 for(node = 0; node < nodes; node++) {
1531 dev = NODE_MC(node);
1532 if (is_cpu_pre_c0()) {
1535 * Limit the number of downstream posted requests to 1
1537 cmd = pci_read_config32(dev, 0x70);
1538 if ((cmd & (3 << 0)) != 2) {
1541 pci_write_config32(dev, 0x70, cmd );
1544 cmd = pci_read_config32(dev, 0x7c);
1545 if ((cmd & (3 << 4)) != 0) {
1548 pci_write_config32(dev, 0x7c, cmd );
1551 /* Clock Power/Timing Low */
1552 cmd = pci_read_config32(dev, 0xd4);
1553 if (cmd != 0x000D0001) {
1555 pci_write_config32(dev, 0xd4, cmd);
1556 needs_reset = 1; /* Needed? */
1563 * Set Clk Ramp Hystersis to 7
1564 * Clock Power/Timing Low
1566 cmd_ref = 0x04e20707; /* Registered */
1567 cmd = pci_read_config32(dev, 0xd4);
1568 if(cmd != cmd_ref) {
1569 pci_write_config32(dev, 0xd4, cmd_ref );
1570 needs_reset = 1; /* Needed? */
1577 static int optimize_link_read_pointers(unsigned nodes, int needs_reset)
1580 for(node = 0; node < nodes; node++) {
1581 device_t f0_dev, f3_dev;
1582 uint32_t cmd_ref, cmd;
1584 f0_dev = NODE_HT(node);
1585 f3_dev = NODE_MC(node);
1586 cmd_ref = cmd = pci_read_config32(f3_dev, 0xdc);
1587 for(link = 0; link < 3; link++) {
1590 /* This works on an Athlon64 because unimplemented links return 0 */
1591 reg = 0x98 + (link * 0x20);
1592 link_type = pci_read_config32(f0_dev, reg);
1593 if ((link_type & 7) == 3) { /* only handle coherent link here*/
1594 cmd &= ~(0xff << (link *8));
1595 cmd |= 0x25 << (link *8);
1598 if (cmd != cmd_ref) {
1599 pci_write_config32(f3_dev, 0xdc, cmd);
1606 static int setup_coherent_ht_domain(void)
1608 struct setup_smp_result result;
1610 enable_bsp_routing();
1612 #if CONFIG_MAX_CPUS > 1
1613 result = setup_smp();
1614 result.nodes = verify_mp_capabilities(result.nodes);
1615 clear_dead_routes(result.nodes);
1618 result.needs_reset = 0;
1621 if (result.nodes == 1) {
1622 setup_uniprocessor();
1624 coherent_ht_finalize(result.nodes);
1625 result.needs_reset = apply_cpu_errata_fixes(result.nodes, result.needs_reset);
1626 result.needs_reset = optimize_link_read_pointers(result.nodes, result.needs_reset);
1627 return result.needs_reset;