/* coherent hypertransport initialization for AMD64 * * written by Stefan Reinauer * (c) 2003-2004 by SuSE Linux AG * * (c) 2004 Tyan Computer * 2004.12 yhlu added support to create support to create routing table dynamically. * it also support 8 ways too. (8 ways ladder or 8 ways crossbar) * This code is licensed under GPL. */ /* * This algorithm assumes a grid configuration as follows: * * nodes : 1 2 4 6 8 * org. : 1x1 2x1 2x2 2x3 2x4 * */ #include #include #include #include "arch/romcc_io.h" #include "amdk8.h" /* when generating a temporary row configuration we * don't want broadcast to be enabled for that node. */ #define enable_bsp_routing() enable_routing(0) #define NODE_HT(x) PCI_DEV(0,24+x,0) #define NODE_MP(x) PCI_DEV(0,24+x,1) #define NODE_MC(x) PCI_DEV(0,24+x,3) #define DEFAULT 0x00010101 /* default row entry */ typedef uint8_t u8; typedef uint32_t u32; typedef int bool; #define TRUE (-1) #define FALSE (0) static u8 link_to_register(int ldt) { /* * [ 0: 3] Request Route * [0] Route to this node * [1] Route to Link 0 * [2] Route to Link 1 * [3] Route to Link 2 */ if (ldt&0x08) return 0x40; if (ldt&0x04) return 0x20; if (ldt&0x02) return 0x00; /* we should never get here */ print_spew("Unknown Link\n"); return 0; } static void disable_probes(void) { /* disable read/write/fill probes for uniprocessor setup * they don't make sense if only one cpu is available */ /* Hypetransport Transaction Control Register * F0:0x68 * [ 0: 0] Disable read byte probe * 0 = Probes issues * 1 = Probes not issued * [ 1: 1] Disable Read Doubleword probe * 0 = Probes issued * 1 = Probes not issued * [ 2: 2] Disable write byte probes * 0 = Probes issued * 1 = Probes not issued * [ 3: 3] Disable Write Doubleword Probes * 0 = Probes issued * 1 = Probes not issued. * [10:10] Disable Fill Probe * 0 = Probes issued for cache fills * 1 = Probes not issued for cache fills. */ u32 val; print_spew("Disabling read/write/fill probes for UP... "); val=pci_read_config32(NODE_HT(0), 0x68); val |= (1<<10)|(1<<9)|(1<<8)|(1<<4)|(1<<3)|(1<<2)|(1<<1)|(1 << 0); pci_write_config32(NODE_HT(0), 0x68, val); print_spew("done.\r\n"); } static void enable_routing(u8 node) { u32 val; /* HT Initialization Control Register * F0:0x6C * [ 0: 0] Routing Table Disable * 0 = Packets are routed according to routing tables * 1 = Packets are routed according to the default link field * [ 1: 1] Request Disable (BSP should clear this) * 0 = Request packets may be generated * 1 = Request packets may not be generated. * [ 3: 2] Default Link (Read-only) * 00 = LDT0 * 01 = LDT1 * 10 = LDT2 * 11 = CPU on same node * [ 4: 4] Cold Reset * - Scratch bit cleared by a cold reset * [ 5: 5] BIOS Reset Detect * - Scratch bit cleared by a cold reset * [ 6: 6] INIT Detect * - Scratch bit cleared by a warm or cold reset not by an INIT * */ /* Enable routing table */ print_spew("Enabling routing table for node "); print_spew_hex8(node); val=pci_read_config32(NODE_HT(node), 0x6c); val &= ~((1<<1)|(1<<0)); pci_write_config32(NODE_HT(node), 0x6c, val); print_spew(" done.\r\n"); } static void fill_row(u8 node, u8 row, u32 value) { pci_write_config32(NODE_HT(node), 0x40+(row<<2), value); } static u32 get_row(u8 node, u8 row) { return pci_read_config32(NODE_HT(node), 0x40+(row<<2)); } static int link_connection(u8 src, u8 dest) { /* we generate the needed link information from the rows * by taking the Request Route of the according row. */ return get_row(src, dest) & 0x0f; } #if CONFIG_MAX_CPUS > 1 static void rename_temp_node(u8 node) { uint32_t val; print_spew("Renaming current temporary node to "); print_spew_hex8(node); val=pci_read_config32(NODE_HT(7), 0x60); val &= (~7); /* clear low bits. */ val |= node; /* new node */ pci_write_config32(NODE_HT(7), 0x60, val); print_spew(" done.\r\n"); } static bool check_connection(u8 dest) { /* See if we have a valid connection to dest */ u32 val; /* Verify that the coherent hypertransport link is * established and actually working by reading the * remode node's vendor/device id */ val = pci_read_config32(NODE_HT(dest),0); if(val != 0x11001022) return 0; return 1; } static unsigned read_freq_cap(device_t dev, unsigned pos) { /* Handle bugs in valid hypertransport frequency reporting */ unsigned freq_cap; uint32_t id; freq_cap = pci_read_config16(dev, pos); freq_cap &= ~(1 << HT_FREQ_VENDOR); /* Ignore Vendor HT frequencies */ id = pci_read_config32(dev, 0); /* AMD 8131 Errata 48 */ if (id == (PCI_VENDOR_ID_AMD | (PCI_DEVICE_ID_AMD_8131_PCIX << 16))) { freq_cap &= ~(1 << HT_FREQ_800Mhz); } /* AMD 8151 Errata 23 */ if (id == (PCI_VENDOR_ID_AMD | (PCI_DEVICE_ID_AMD_8151_SYSCTRL << 16))) { freq_cap &= ~(1 << HT_FREQ_800Mhz); } /* AMD K8 Unsupported 1Ghz? */ if (id == (PCI_VENDOR_ID_AMD | (0x1100 << 16))) { freq_cap &= ~(1 << HT_FREQ_1000Mhz); } return freq_cap; } static int optimize_connection(device_t node1, uint8_t link1, device_t node2, uint8_t link2) { static const uint8_t link_width_to_pow2[]= { 3, 4, 0, 5, 1, 2, 0, 0 }; static const uint8_t pow2_to_link_width[] = { 0x7, 4, 5, 0, 1, 3 }; uint16_t freq_cap1, freq_cap2, freq_cap, freq_mask; uint8_t width_cap1, width_cap2, width_cap, width, old_width, ln_width1, ln_width2; uint8_t freq, old_freq; int needs_reset; /* Set link width and frequency */ /* Initially assume everything is already optimized and I don't need a reset */ needs_reset = 0; /* Get the frequency capabilities */ freq_cap1 = read_freq_cap(node1, link1 + PCI_HT_CAP_HOST_FREQ_CAP); freq_cap2 = read_freq_cap(node2, link2 + PCI_HT_CAP_HOST_FREQ_CAP); /* Calculate the highest possible frequency */ freq = log2(freq_cap1 & freq_cap2); /* See if I am changing the link freqency */ old_freq = pci_read_config8(node1, link1 + PCI_HT_CAP_HOST_FREQ); needs_reset |= old_freq != freq; old_freq = pci_read_config8(node2, link2 + PCI_HT_CAP_HOST_FREQ); needs_reset |= old_freq != freq; /* Set the Calulcated link frequency */ pci_write_config8(node1, link1 + PCI_HT_CAP_HOST_FREQ, freq); pci_write_config8(node2, link2 + PCI_HT_CAP_HOST_FREQ, freq); /* Get the width capabilities */ width_cap1 = pci_read_config8(node1, link1 + PCI_HT_CAP_HOST_WIDTH); width_cap2 = pci_read_config8(node2, link2 + PCI_HT_CAP_HOST_WIDTH); /* Calculate node1's input width */ ln_width1 = link_width_to_pow2[width_cap1 & 7]; ln_width2 = link_width_to_pow2[(width_cap2 >> 4) & 7]; if (ln_width1 > ln_width2) { ln_width1 = ln_width2; } width = pow2_to_link_width[ln_width1]; /* Calculate node1's output width */ ln_width1 = link_width_to_pow2[(width_cap1 >> 4) & 7]; ln_width2 = link_width_to_pow2[width_cap2 & 7]; if (ln_width1 > ln_width2) { ln_width1 = ln_width2; } width |= pow2_to_link_width[ln_width1] << 4; /* See if I am changing node1's width */ old_width = pci_read_config8(node1, link1 + PCI_HT_CAP_HOST_WIDTH + 1); needs_reset |= old_width != width; /* Set node1's widths */ pci_write_config8(node1, link1 + PCI_HT_CAP_HOST_WIDTH + 1, width); /* Calculate node2's width */ width = ((width & 0x70) >> 4) | ((width & 0x7) << 4); /* See if I am changing node2's width */ old_width = pci_read_config8(node2, link2 + PCI_HT_CAP_HOST_WIDTH + 1); needs_reset |= old_width != width; /* Set node2's widths */ pci_write_config8(node2, link2 + PCI_HT_CAP_HOST_WIDTH + 1, width); return needs_reset; } static void setup_row_local(u8 source, u8 row) // source will be 7 when it is for temp use { unsigned linkn; uint32_t val; val = 1; for(linkn = 0; linkn<3; linkn++) { unsigned regpos; uint32_t reg; regpos = 0x98 + 0x20 * linkn; reg = pci_read_config32(NODE_HT(source), regpos); if ((reg & 0x17) != 3) continue; // it is not conherent or not connected val |= 1<<(linkn+1); } val <<= 16; val |= 0x0101; fill_row(source,row, val); } static void setup_row_direct(u8 source, u8 dest, u8 linkn) { uint32_t val; uint32_t val_s; val = 1<<(linkn+1); val |= 1<<(linkn+1+8); //for direct connect response route should equal to request table #if !CROSS_BAR_47_56 if((source &1)!=(dest &1)){ val |= (1<<16); } else { val_s = get_row(source, source); val |= ((val_s>>16) - (1<<(linkn+1)))<<16; } #else if(((source &1)!=(dest &1)) && (source<4) &&(dest<<4)){ val |= (1<<16); } else { //for CROSS_BAR_47_56 47, 74, 56, 65 should be here too val_s = get_row(source, source); val |= ((val_s>>16) - (1<<(linkn+1)))<<16; } #endif fill_row(source,dest, val); } static uint8_t get_linkn_first(uint8_t byte) { if(byte & 0x02) { byte = 0; } else if(byte & 0x04) { byte = 1; } else if(byte & 0x08) { byte = 2; } return byte; } static uint8_t get_linkn_last(uint8_t byte) { if(byte & 0x02) { byte &= 0x0f; byte |= 0x00; } if(byte & 0x04) { byte &= 0x0f; byte |= 0x10; } if(byte & 0x08) { byte &= 0x0f; byte |= 0x20; } return byte>>4; } static uint8_t get_linkn_last_count(uint8_t byte) { byte &= 0x3f; if(byte & 0x02) { byte &= 0xcf; byte |= 0x00; byte+=0x40; } if(byte & 0x04) { byte &= 0xcf; byte |= 0x10; byte+=0x40; } if(byte & 0x08) { byte &= 0xcf; byte |= 0x20; byte+=0x40; } return byte>>4; } #if CONFIG_MAX_CPUS>2 #if !CROSS_BAR_47_56 static void setup_row_indirect(u8 source, u8 dest, u8 gateway) #else static void setup_row_indirect(u8 source, u8 dest, u8 gateway, u8 diff) #endif { //for indirect connection, we need to compute the val from val_s(source, source), and val_g(source, gateway) uint32_t val_s; uint32_t val; val_s = get_row(source, source); val = get_row(source, gateway); val &= 0xffff; val_s >>=16; val_s &=0xfe; #if !CROSS_BAR_47_56 if(((source&1)!=(dest &1)) && (val_s!=(val&0xff)) ) { // use another connect as response val_s -= val & 0xff; #if CONFIG_MAX_CPUS>4 uint8_t byte; // Some node have two links left byte = val_s; byte = get_linkn_last_count(byte); if((byte>>2)>1) { // make sure not the corner if(source>16) - link_connection(source, gateway))<<16; } #else if(diff && (val_s!=(val&0xff)) ) { // use another connect as response val_s -= val & 0xff; #if CONFIG_MAX_CPUS>4 uint8_t byte; // Some node have two links left // don't worry we only have (2, (3 as source need to handle byte = val_s; byte = get_linkn_last_count(byte); if((byte>>2)>1) { // make sure not the corner if(source>16) - link_connection(source, gateway))<<16; } #endif fill_row(source, dest, val); } static void setup_row_indirect_group(const u8 *conn, int num) { int i; for(i=0; i>16) - (1<<(linkn+1)))<<16; } #else if(((source &1)!=(dest &1)) && (source<4) &&(dest<<4)){ val |= (1<<16); } else { //for CROSS_BAR_47_56 47, 74, 56, 65 should be here too val_s = get_row(7, source); val |= ((val_s>>16) - (1<<(linkn+1)))<<16; } #endif fill_row(7,dest, val ); } static void setup_remote_node(u8 node) { static const uint8_t pci_reg[] = { 0x44, 0x4c, 0x54, 0x5c, 0x64, 0x6c, 0x74, 0x7c, 0x40, 0x48, 0x50, 0x58, 0x60, 0x68, 0x70, 0x78, 0x84, 0x8c, 0x94, 0x9c, 0xa4, 0xac, 0xb4, 0xbc, 0x80, 0x88, 0x90, 0x98, 0xa0, 0xa8, 0xb0, 0xb8, 0xc4, 0xcc, 0xd4, 0xdc, 0xc0, 0xc8, 0xd0, 0xd8, 0xe0, 0xe4, 0xe8, 0xec, }; int i; print_spew("setup_remote_node: "); /* copy the default resource map from node 0 */ for(i = 0; i < sizeof(pci_reg)/sizeof(pci_reg[0]); i++) { uint32_t value; uint8_t reg; reg = pci_reg[i]; value = pci_read_config32(NODE_MP(0), reg); pci_write_config32(NODE_MP(7), reg, value); } print_spew("done\r\n"); } #endif static void setup_uniprocessor(void) { print_spew("Enabling UP settings\r\n"); #if CONFIG_LOGICAL_CPUS==1 unsigned tmp = (pci_read_config32(NODE_MC(0), 0xe8) >> 12) & 3; if (tmp>0) return; #endif disable_probes(); } struct setup_smp_result { int nodes; int needs_reset; }; #if CONFIG_MAX_CPUS > 2 static int optimize_connection_group(const u8 *opt_conn, int num) { int needs_reset = 0; int i; for(i=0; i 1 static struct setup_smp_result setup_smp(void) { struct setup_smp_result result; u8 byte; uint32_t val; result.nodes = 2; result.needs_reset = 0; print_spew("Enabling SMP settings\r\n"); setup_row_local(0, 0); // it will update the broadcast RT val = get_row(0,0); byte = (val>>16) & 0xfe; if(byte<0x2) { // no coherent connection so get out. result.nodes = 1; return result; } /* Setup and check a temporary connection to node 1 */ //find out linkn byte = get_linkn_first(byte); setup_row_direct(0,1, byte); setup_temp_row(0, 1); if (!check_connection(7)) { print_spew("No connection to Node 1.\r\n"); setup_uniprocessor(); /* and get up working */ result.nodes = 1; return result; } /* We found 2 nodes so far */ val = pci_read_config32(NODE_HT(7), 0x6c); byte = (val>>2) & 0x3; // get default link on 7 to 0 setup_row_local(7,1); setup_remote_row_direct(1, 0, byte); #if CONFIG_MAX_CPUS>4 val = get_row(7,1); byte = (val>>16) & 0xfe; byte = get_linkn_last_count(byte); if((byte>>2)==3) { // Oh! we need to treat it as cpu2. val = get_row(0,0); byte = (val>>16) & 0xfe; byte = get_linkn_last(byte); setup_row_direct(0,1, byte); setup_temp_row(0, 1); if (!check_connection(7)) { print_spew("No connection to Node 1.\r\n"); setup_uniprocessor(); /* and get up working */ result.nodes = 1; return result; } /* We found 2 nodes so far */ val = pci_read_config32(NODE_HT(7), 0x6c); byte = (val>>2) & 0x3; // get default link on 7 to 0 setup_row_local(7,1); setup_remote_row_direct(1, 0, byte); } #endif setup_remote_node(1); /* Setup the regs on the remote node */ rename_temp_node(1); /* Rename Node 7 to Node 1 */ enable_routing(1); /* Enable routing on Node 1 */ #if 0 // don't need and it is done by clear_dead_links clear_temp_row(0); #endif result.needs_reset = optimize_connection( NODE_HT(0), 0x80 + link_to_register(link_connection(0,1)), NODE_HT(1), 0x80 + link_to_register(link_connection(1,0)) ); #if CONFIG_MAX_CPUS > 2 result.nodes=4; /* Setup and check temporary connection from Node 0 to Node 2 */ val = get_row(0,0); byte = ((val>>16) & 0xfe) - link_connection(0,1); byte = get_linkn_last_count(byte); //find out linkn if((byte>>2)==0) { // We should have two coherent for 4p and above result.nodes = 2; return result; } byte &= 3; // bit [3,2] is count-1 setup_row_direct(0, 2, byte); setup_temp_row(0, 2); if (!check_connection(7) ) { print_spew("No connection to Node 2.\r\n"); result.nodes = 2; return result; } /* We found 3 nodes so far. Now setup a temporary * connection from node 0 to node 3 via node 1 */ setup_temp_row(0,1); /* temp. link between nodes 0 and 1 */ /* here should setup_row_direct(1,3) at first, before that we should find the link in cpu 1 to 3*/ val = get_row(1,1); byte = ((val>>16) & 0xfe) - link_connection(1,0); byte = get_linkn_first(byte); setup_row_direct(1,3,byte); setup_temp_row(1,3); /* temp. link between nodes 1 and 3 */ if (!check_connection(7)) { print_spew("No connection to Node 3.\r\n"); result.nodes = 2; return result; } /* We found 4 nodes so far. Now setup all nodes for 4p */ /* for indirect we will use clockwise routing */ #if !CROSS_BAR_47_56 static const u8 conn4_1[] = { 0,3,1, 1,2,3, }; #else static const u8 conn4_1[] = { 0,3,1,1, 1,2,3,1, }; #endif setup_row_indirect_group(conn4_1, sizeof(conn4_1)/sizeof(conn4_1[0])); setup_temp_row(0,2); val = pci_read_config32(NODE_HT(7), 0x6c); byte = (val>>2) & 0x3; // get default link on 7 to 0 setup_row_local(7,2); setup_remote_row_direct(2, 0, byte); setup_remote_node(2); /* Setup the regs on the remote node */ rename_temp_node(2); /* Rename Node 7 to Node 2 */ enable_routing(2); /* Enable routing on Node 2 */ setup_temp_row(0,1); setup_temp_row(1,3); val = pci_read_config32(NODE_HT(7), 0x6c); byte = (val>>2) & 0x3; // get default link on 7 to 0 setup_row_local(7,3); setup_remote_row_direct(3, 1, byte); setup_remote_node(3); /* Setup the regs on the remote node */ rename_temp_node(3); enable_routing(3); /* enable routing on node 3 (temp.) */ /* We need to init link between 2, and 3 direct link */ val = get_row(2,2); byte = ((val>>16) & 0xfe) - link_connection(2,0); byte = get_linkn_last_count(byte); #if CONFIG_MAX_CPUS>4 // We need to find out which link it so CPU3 // methods is try to access another 7 actully it is cpu4 if((byte>>2)==2) { // one to CPU3, one to cpu0, one to CPU4 setup_temp_row(0,2); setup_row_direct(2, 4, byte); setup_temp_row(2, 4); if (check_connection(7)) { // so the link is to CPU4 //We need to re compute it val = get_row(2,2); byte = (val>>16) & 0xfe; byte = get_linkn_first(byte); } } #endif setup_row_direct(2,3, byte & 0x3); val = get_row(3,3); byte = ((val>>16) & 0xfe) - link_connection(3,1); byte = get_linkn_last_count(byte); #if CONFIG_MAX_CPUS>4 // We need to find out which link it so CPU2 // methods is try to access another 7 actully it is cpu5 if((byte>>2)==2) { // one to CPU2, one to cpu1, one to CPU5 setup_temp_row(0,1); setup_temp_row(1,3); setup_row_direct(3, 5, byte); setup_temp_row(3, 5); if (check_connection(7)) { // so the link is to CPU5 //We need to re compute it val = get_row(3, 3); byte = (val>>16) & 0xfe; byte = get_linkn_first(byte); } } #endif setup_row_direct(3,2, byte & 0x3); /* Set indirect connection to 0, and 1 for indirect we will use clockwise routing */ #if !CROSS_BAR_47_56 static const u8 conn4_2[] = { 2,1,0, 3,0,2, }; #else static const u8 conn4_2[] = { 2,1,0,1, 3,0,2,1, }; #endif setup_row_indirect_group(conn4_2, sizeof(conn4_2)/sizeof(conn4_2[0])); // We need to do sth to reverse work for setup_temp_row (0,1) (1,3) #if 0 // it will be done by clear_dead_links clear_temp_row(0); clear_temp_row(1); #endif /* optimize physical connections - by LYH */ static const u8 opt_conn4[] = { 0,2, 1,3, 2,3, }; result.needs_reset = optimize_connection_group(opt_conn4, sizeof(opt_conn4)/sizeof(opt_conn4[0])); #endif /* CONFIG_MAX_CPUS > 2 */ #if CONFIG_MAX_CPUS > 4 result.nodes=6; /* Setup and check temporary connection from Node 0 to Node 4 via 2 */ val = get_row(2,2); byte = ((val>>16) & 0xfe) - link_connection(2,3) - link_connection(2,0); byte = get_linkn_last_count(byte); //find out linkn if((byte>>2)==0) { // We should have two coherent for 4p and above result.nodes = 4; return result; } byte &= 3; // bit [3,2] is count-1 setup_row_direct(2, 4, byte); /* Setup and check temporary connection from Node 0 to Node 4 through 2*/ for(byte=0; byte<4; byte+=2) { setup_temp_row(byte,byte+2); /* temp. link between nodes 0 and 2 */ } if (!check_connection(7) ) { print_spew("No connection to Node 4.\r\n"); result.nodes = 4; return result; } /* Setup and check temporary connection from Node 0 to Node 5 through 1, 3*/ val = get_row(3,3); byte = ((val>>16) & 0xfe) - link_connection(3,2) - link_connection(3,1); byte = get_linkn_last_count(byte); //find out linkn if((byte>>2)==0) { // We should have two coherent for 4p and above result.nodes = 4; return result; } byte &= 3; // bit [3,2] is count-1 setup_row_direct(3, 5, byte); setup_temp_row(0,1); /* temp. link between nodes 0 and 1 */ for(byte=0; byte<4; byte+=2) { setup_temp_row(byte+1,byte+3); /* temp. link between nodes 1 and 3 */ } if (!check_connection(7)) { print_spew("No connection to Node 5.\r\n"); result.nodes = 4; return result; } /* We found 6 nodes so far. Now setup all nodes for 6p */ static const u8 conn6_1[] = { #if !CROSS_BAR_47_56 0, 4, 2, 0, 5, 1, 1, 4, 3, 1, 5, 3, 2, 5, 3, 3, 4, 5, #else 0, 4, 2, 0, 0, 5, 1, 1, 1, 4, 3, 1, 1, 5, 3, 0, 2, 5, 3, 0, 3, 4, 2, 0, #endif }; setup_row_indirect_group(conn6_1, sizeof(conn6_1)/sizeof(conn6_1[0])); for(byte=0; byte<4; byte+=2) { setup_temp_row(byte,byte+2); } val = pci_read_config32(NODE_HT(7), 0x6c); byte = (val>>2) & 0x3; // get default link on 7 to 0 setup_row_local(7,4); setup_remote_row_direct(4, 2, byte); setup_remote_node(4); /* Setup the regs on the remote node */ rename_temp_node(4); enable_routing(4); setup_temp_row(0,1); for(byte=0; byte<4; byte+=2) { setup_temp_row(byte+1,byte+3); } val = pci_read_config32(NODE_HT(7), 0x6c); byte = (val>>2) & 0x3; // get default link on 7 to 0 setup_row_local(7,5); setup_remote_row_direct(5, 3, byte); setup_remote_node(5); /* Setup the regs on the remote node */ rename_temp_node(5); enable_routing(5); /* enable routing on node 5 (temp.) */ #if !CROSS_BAR_47_56 /* We need to init link between 4, and 5 direct link */ val = get_row(4,4); byte = ((val>>16) & 0xfe) - link_connection(4,2); byte = get_linkn_last_count(byte); #if CONFIG_MAX_CPUS>4 // We need to find out which link it so CPU5 // methods is try to access another 7 actully it is cpu6 if((byte>>2)==2) { // one to CPU5, one to cpu2, one to CPU6 setup_temp_row(0,2); setup_temp_row(2,4); setup_row_direct(4, 6, byte); setup_temp_row(4, 6); if (check_connection(7)) { // so the link is to CPU4 //We need to re compute it val = get_row(4,4); byte = (val>>16) & 0xfe; byte = get_linkn_first(byte); } } #endif setup_row_direct(4,5, byte & 0x3); val = get_row(5,5); byte = ((val>>16) & 0xfe) - link_connection(5,3); byte = get_linkn_last_count(byte); #if CONFIG_MAX_CPUS>4 // We need to find out which link it so CPU4 // methods is try to access another 7 actully it is cpu7 if((byte>>2)==2) { // one to CPU4, one to cpu3, one to CPU7 setup_temp_row(0,1); setup_temp_row(1,3); setup_temp_row(3,7); setup_row_direct(5, 7, byte); setup_temp_row(5, 7); if (check_connection(7)) { // so the link is to CPU5 //We need to re compute it val = get_row(5, 5); byte = (val>>16) & 0xfe; byte = get_linkn_first(byte); } } #endif setup_row_direct(5,4, byte & 0x3); #endif // !CROSS_BAR_47_56 /* Set indirect connection to 0, to 3 for indirect we will use clockwise routing */ static const u8 conn6_2[] = { #if !CROSS_BAR_47_56 4, 0, 2, 4, 1, 2, 4, 3, 2, 5, 0, 4, 5, 2, 4, 5, 1, 3, #else 4, 0, 2, 0, 4, 1, 2, 0, 4, 3, 2, 0, 4, 5, 2, 0, 5, 0, 3, 0, 5, 2, 3, 0, 5, 1, 3, 0, 5, 4, 3, 0, #endif }; setup_row_indirect_group(conn6_2, sizeof(conn6_2)/sizeof(conn6_2[0])); #if 0 // We need to do sth about reverse about setup_temp_row (0,1), (2,4), (1, 3), (3,5) // It will be done by clear_dead_links for(byte=0; byte<4; byte++) { clear_temp_row(byte); } #endif /* optimize physical connections - by LYH */ static const uint8_t opt_conn6[] ={ 2, 4, 3, 5, #if !CROSS_BAR_47_56 4, 5, #endif }; result.needs_reset = optimize_connection_group(opt_conn6, sizeof(opt_conn6)/sizeof(opt_conn6[0])); #endif /* CONFIG_MAX_CPUS > 4 */ #if CONFIG_MAX_CPUS >6 result.nodes=8; /* Setup and check temporary connection from Node 0 to Node 6 via 2 and 4 to 7 */ val = get_row(4,4); #if !CROSS_BAR_47_56 byte = ((val>>16) & 0xfe) - link_connection(4,5) - link_connection(4,2); #else byte = ((val>>16) & 0xfe) - link_connection(4,2); #endif byte = get_linkn_last_count(byte); // Max link to 6 if((byte>>2)==0) { // We should have two coherent for 8p and above result.nodes = 6; return result; } byte &= 3; // bit [3,2] is count-1 setup_row_direct(4, 6, byte); /* Setup and check temporary connection from Node 0 to Node 6 through 2, and 4*/ for(byte=0; byte<6; byte+=2) { setup_temp_row(byte,byte+2); /* temp. link between nodes 0 and 2 */ } if (!check_connection(7) ) { print_spew("No connection to Node 6.\r\n"); result.nodes = 6; return result; } #if !CROSS_BAR_47_56 /* Setup and check temporary connection from Node 0 to Node 5 through 1, 3, 5*/ val = get_row(5,5); byte = ((val>>16) & 0xfe) - link_connection(5,4) - link_connection(5,3); byte = get_linkn_first(byte); setup_row_direct(5, 7, byte); setup_temp_row(0,1); /* temp. link between nodes 0 and 1 */ for(byte=0; byte<6; byte+=2) { setup_temp_row(byte+1,byte+3); /* temp. link between nodes 1 and 3 */ } #else val = get_row(4,4); byte = ((val>>16) & 0xfe) - link_connection(4,2) ; byte = get_linkn_first(byte); // min link to 7 setup_row_direct(4, 7, byte); /* Setup and check temporary connection from Node 0 to Node 7 through 2, and 4*/ for(byte=0; byte<4; byte+=2) { setup_temp_row(byte,byte+2); /* temp. link between nodes 0 and 2 */ } setup_temp_row(4, 7); #endif if (!check_connection(7)) { print_spew("No connection to Node 7.\r\n"); result.nodes = 6; return result; } /* We found 8 nodes so far. Now setup all nodes for 8p */ static const u8 conn8_1[] = { #if !CROSS_BAR_47_56 0, 6, 2, // 0, 7, 1, 1, 6, 3, // 1, 7, 3, 2, 6, 4, // 2, 7, 3, 3, 6, 5, // 3, 7, 5, // 4, 7, 5, #else 0, 6, 2, 0, // 0, 7, 2, 0, 1, 6, 3, 0, // 1, 7, 3, 0, 2, 6, 4, 0, // 2, 7, 4, 0, 3, 6, 5, 0, // 3, 7, 5, 0, #endif }; setup_row_indirect_group(conn8_1,sizeof(conn8_1)/sizeof(conn8_1[0])); for(byte=0; byte<6; byte+=2) { setup_temp_row(byte,byte+2); } val = pci_read_config32(NODE_HT(7), 0x6c); byte = (val>>2) & 0x3; // get default link on 7 to 0 setup_row_local(7,6); setup_remote_row_direct(6, 4, byte); setup_remote_node(6); /* Setup the regs on the remote node */ rename_temp_node(6); enable_routing(6); #if !CROSS_BAR_47_56 setup_temp_row(0,1); for(byte=0; byte<6; byte+=2) { setup_temp_row(byte+1,byte+3); } val = pci_read_config32(NODE_HT(7), 0x6c); byte = (val>>2) & 0x3; // get default link on 7 to 0 setup_row_local(7,7); setup_remote_row_direct(7, 5, byte); #else for(byte=0; byte<4; byte+=2) { setup_temp_row(byte,byte+2); } setup_temp_row(4,7); val = pci_read_config32(NODE_HT(7), 0x6c); byte = (val>>2) & 0x3; // get default link on 7 to 0 setup_row_local(7,7); setup_remote_row_direct(7, 4, byte); // till now 4-7, 7-4 done. #endif setup_remote_node(7); /* Setup the regs on the remote node */ // rename_temp_node(7); enable_routing(7); /* enable routing on node 5 (temp.) */ #if CROSS_BAR_47_56 //here init 5, 6 and 5, 7 /* Setup and check temporary connection from Node 0 to Node 5 through 1, 3, 5*/ val = get_row(5,5); byte = ((val>>16) & 0xfe) - link_connection(5,3); byte = get_linkn_last(byte); setup_row_direct(5, 7, byte); setup_temp_row(0,1); /* temp. link between nodes 0 and 1 */ for(byte=0; byte<6; byte+=2) { setup_temp_row(byte+1,byte+3); /* temp. link between nodes 1 and 3 */ } if (!check_connection(7)) { // We need to recompute link to 7 val = get_row(5,5); byte = ((val>>16) & 0xfe) - link_connection(5,3); byte = get_linkn_first(byte); byte &= 3; // bit [3,2] is count-1 setup_row_direct(5, 7, byte); #if 0 setup_temp_row(0,1); /* temp. link between nodes 0 and 1 */ for(byte=0; byte<6; byte+=2) { setup_temp_row(byte+1,byte+3); /* temp. link between nodes 1 and 3 */ } #else setup_temp_row(5,7); #endif check_connection(7); } val = pci_read_config32(NODE_HT(7), 0x6c); byte = (val>>2) & 0x3; // get default link on 7 to 0 // setup_row_local(7,7); setup_remote_row_direct(7, 5, byte); //Till now 57, 75 done //init 5,6 val = get_row(5,5); byte = ((val>>16) & 0xfe) - link_connection(5,3) - link_connection(5,7); byte = get_linkn_first(byte); setup_row_direct(5, 6, byte); val = get_row(6,6); byte = ((val>>16) & 0xfe) - link_connection(6,4); byte = get_linkn_last(byte); setup_row_direct(6, 7, byte); for(byte=0; byte<6; byte+=2) { setup_temp_row(byte,byte+2); /* temp. link between nodes 0 and 2 */ } setup_temp_row(6,7); if (!check_connection(7)) { // We need to recompute link to 7 val = get_row(6,6); byte = ((val>>16) & 0xfe) - link_connection(6,4); byte = get_linkn_first(byte); setup_row_direct(6, 7, byte); #if 0 for(byte=0; byte<6; byte+=2) { setup_temp_row(byte,byte+2); /* temp. link between nodes 0 and 2 */ } #endif setup_temp_row(6,7); check_connection(7); } val = pci_read_config32(NODE_HT(7), 0x6c); byte = (val>>2) & 0x3; // get default link on 7 to 0 // setup_row_local(7,7); setup_remote_row_direct(7, 6, byte); //Till now 67, 76 done //init 6,5 val = get_row(6,6); byte = ((val>>16) & 0xfe) - link_connection(6,4) - link_connection(6,7); byte = get_linkn_first(byte); setup_row_direct(6, 5, byte); #endif #if !CROSS_BAR_47_56 /* We need to init link between 6, and 7 direct link */ val = get_row(6,6); byte = ((val>>16) & 0xfe) - link_connection(6,4); byte = get_linkn_first(byte); setup_row_direct(6,7, byte & 0x3); val = get_row(7,7); byte = ((val>>16) & 0xfe) - link_connection(7,5); byte = get_linkn_first(byte); setup_row_direct(7,6, byte & 0x3); #endif /* Set indirect connection to 0, to 3 for indirect we will use clockwise routing */ static const u8 conn8_2[] = { #if !CROSS_BAR_47_56 0, 7, 1, // restore it 1, 7, 3, 2, 7, 3, 3, 7, 5, 4, 7, 5, 6, 0, 4, 6, 1, 4, 6, 2, 4, 6, 3, 4, 6, 5, 4, 7, 0, 6, 7, 1, 5, 7, 2, 6, 7, 3, 5, 7, 4, 6, #else 0, 7, 2, 0, // restore it 1, 7, 3, 0, 2, 7, 4, 0, 3, 7, 5, 0, 6, 0, 4, 0, 6, 1, 5, 0, 6, 2, 4, 0, 6, 3, 5, 0, 7, 0, 4, 0, 7, 1, 5, 0, 7, 2, 4, 0, 7, 3, 5, 0, 4, 5, 7, 0, 5, 4, 6, 0, #endif }; setup_row_indirect_group(conn8_2, sizeof(conn8_2)/sizeof(conn8_2[0])); static const uint8_t opt_conn8[] ={ 4, 6, #if CROSS_BAR_47_56 4, 7, 5, 6, #endif 5, 7, 6, 7, }; /* optimize physical connections - by LYH */ result.needs_reset = optimize_connection_group(opt_conn8, sizeof(opt_conn6)/sizeof(opt_conn8[0])); #endif /* CONFIG_MAX_CPUS > 6 */ print_debug_hex8(result.nodes); print_debug(" nodes initialized.\r\n"); return result; } #endif static unsigned verify_mp_capabilities(unsigned nodes) { unsigned node, row, mask; bool mp_cap=TRUE; if (nodes > 2) { mask=0x06; /* BigMPCap */ } else if (nodes == 2) { mask=0x02; /* MPCap */ } else { mask=0x00; /* Non SMP */ } for (node=0; node6 if(nodes==8) return;// don't touch (7,7) #endif last_row = nodes; if (nodes == 1) { last_row = 0; } for(node = 7; node >= 0; node--) { for(row = 7; row >= last_row; row--) { fill_row(node, row, DEFAULT); } } } static void coherent_ht_finalize(unsigned nodes) { unsigned node; bool rev_a0; /* set up cpu count and node count and enable Limit * Config Space Range for all available CPUs. * Also clear non coherent hypertransport bus range * registers on Hammer A0 revision. */ print_spew("coherent_ht_finalize\r\n"); rev_a0 = is_cpu_rev_a0(); for (node = 0; node < nodes; node++) { device_t dev; uint32_t val; dev = NODE_HT(node); /* Set the Total CPU and Node count in the system */ val = pci_read_config32(dev, 0x60); val &= (~0x000F0070); val |= ((nodes-1)<<16)|((nodes-1)<<4); pci_write_config32(dev, 0x60, val); /* Only respond to real cpu pci configuration cycles * and optimize the HT settings */ val=pci_read_config32(dev, 0x68); val &= ~((HTTC_BUF_REL_PRI_MASK << HTTC_BUF_REL_PRI_SHIFT) | (HTTC_MED_PRI_BYP_CNT_MASK << HTTC_MED_PRI_BYP_CNT_SHIFT) | (HTTC_HI_PRI_BYP_CNT_MASK << HTTC_HI_PRI_BYP_CNT_SHIFT)); val |= HTTC_LIMIT_CLDT_CFG | (HTTC_BUF_REL_PRI_8 << HTTC_BUF_REL_PRI_SHIFT) | HTTC_RSP_PASS_PW | (3 << HTTC_MED_PRI_BYP_CNT_SHIFT) | (3 << HTTC_HI_PRI_BYP_CNT_SHIFT); pci_write_config32(dev, 0x68, val); if (rev_a0) { print_spew("shit it is an old cup\n"); pci_write_config32(dev, 0x94, 0); pci_write_config32(dev, 0xb4, 0); pci_write_config32(dev, 0xd4, 0); } } print_spew("done\r\n"); } static int apply_cpu_errata_fixes(unsigned nodes, int needs_reset) { unsigned node; for(node = 0; node < nodes; node++) { device_t dev; uint32_t cmd; dev = NODE_MC(node); if (is_cpu_pre_c0()) { /* Errata 66 * Limit the number of downstream posted requests to 1 */ cmd = pci_read_config32(dev, 0x70); if ((cmd & (3 << 0)) != 2) { cmd &= ~(3<<0); cmd |= (2<<0); pci_write_config32(dev, 0x70, cmd ); needs_reset = 1; } cmd = pci_read_config32(dev, 0x7c); if ((cmd & (3 << 4)) != 0) { cmd &= ~(3<<4); cmd |= (0<<4); pci_write_config32(dev, 0x7c, cmd ); needs_reset = 1; } /* Clock Power/Timing Low */ cmd = pci_read_config32(dev, 0xd4); if (cmd != 0x000D0001) { cmd = 0x000D0001; pci_write_config32(dev, 0xd4, cmd); needs_reset = 1; /* Needed? */ } } else { uint32_t cmd_ref; /* Errata 98 * Set Clk Ramp Hystersis to 7 * Clock Power/Timing Low */ cmd_ref = 0x04e20707; /* Registered */ cmd = pci_read_config32(dev, 0xd4); if(cmd != cmd_ref) { pci_write_config32(dev, 0xd4, cmd_ref ); needs_reset = 1; /* Needed? */ } } } return needs_reset; } static int optimize_link_read_pointers(unsigned nodes, int needs_reset) { unsigned node; for(node = 0; node < nodes; node = node + 1) { device_t f0_dev, f3_dev; uint32_t cmd_ref, cmd; int link; f0_dev = NODE_HT(node); f3_dev = NODE_MC(node); cmd_ref = cmd = pci_read_config32(f3_dev, 0xdc); for(link = 0; link < 3; link = link + 1) { uint32_t link_type; unsigned reg; /* This works on an Athlon64 because unimplemented links return 0 */ reg = 0x98 + (link * 0x20); link_type = pci_read_config32(f0_dev, reg); if (link_type & LinkConnected) { cmd &= 0xff << (link *8); /* FIXME this assumes the device on the other * side is an AMD device */ cmd |= 0x25 << (link *8); } } if (cmd != cmd_ref) { pci_write_config32(f3_dev, 0xdc, cmd); needs_reset = 1; } } return needs_reset; } static int setup_coherent_ht_domain(void) { struct setup_smp_result result; result.nodes = 1; result.needs_reset = 0; enable_bsp_routing(); #if CONFIG_MAX_CPUS > 1 result = setup_smp(); #endif result.nodes = verify_mp_capabilities(result.nodes); clear_dead_routes(result.nodes); if (result.nodes == 1) { setup_uniprocessor(); } coherent_ht_finalize(result.nodes); result.needs_reset = apply_cpu_errata_fixes(result.nodes, result.needs_reset); result.needs_reset = optimize_link_read_pointers(result.nodes, result.needs_reset); return result.needs_reset; }