X-Git-Url: http://wien.tomnetworks.com/gitweb/?a=blobdiff_plain;f=src%2Fnorthbridge%2Famd%2Famdk8%2Fcoherent_ht.c;h=e056f53da74e0200f697a6266fac13307f383d35;hb=1bc5654957dee91bbc20bcbd9484fce09db77a4a;hp=516f0fc6ae283b12e166fe4e2212626c4866753f;hpb=5fb929e6e399ecf41aec9c6053a0340671534a63;p=coreboot.git diff --git a/src/northbridge/amd/amdk8/coherent_ht.c b/src/northbridge/amd/amdk8/coherent_ht.c index 516f0fc6a..e056f53da 100644 --- a/src/northbridge/amd/amdk8/coherent_ht.c +++ b/src/northbridge/amd/amdk8/coherent_ht.c @@ -1,336 +1,11 @@ -#if 0 -static void setup_coherent_ht_domain(void) -{ - static const unsigned int register_values[] = { - /* Routing Table Node i - * F0:0x40 i = 0, - * F0:0x44 i = 1, - * F0:0x48 i = 2, - * F0:0x4c i = 3, - * F0:0x50 i = 4, - * F0:0x54 i = 5, - * F0:0x58 i = 6, - * F0:0x5c i = 7 - * [ 0: 3] Request Route - * [0] Route to this node - * [1] Route to Link 0 - * [2] Route to Link 1 - * [3] Route to Link 2 - * [11: 8] Response Route - * [0] Route to this node - * [1] Route to Link 0 - * [2] Route to Link 1 - * [3] Route to Link 2 - * [19:16] Broadcast route - * [0] Route to this node - * [1] Route to Link 0 - * [2] Route to Link 1 - * [3] Route to Link 2 - */ - PCI_ADDR(0, 0x18, 0, 0x40), 0xfff0f0f0, 0x00010101, - PCI_ADDR(0, 0x18, 0, 0x44), 0xfff0f0f0, 0x00010101, - PCI_ADDR(0, 0x18, 0, 0x48), 0xfff0f0f0, 0x00010101, - PCI_ADDR(0, 0x18, 0, 0x4c), 0xfff0f0f0, 0x00010101, - PCI_ADDR(0, 0x18, 0, 0x50), 0xfff0f0f0, 0x00010101, - PCI_ADDR(0, 0x18, 0, 0x54), 0xfff0f0f0, 0x00010101, - PCI_ADDR(0, 0x18, 0, 0x58), 0xfff0f0f0, 0x00010101, - PCI_ADDR(0, 0x18, 0, 0x5c), 0xfff0f0f0, 0x00010101, - - /* Hypetransport Transaction Control Register - * F0:0x68 - * [ 0: 0] Disable read byte probe - * 0 = Probes issues - * 1 = Probes not issued - * [ 1: 1] Disable Read Doubleword probe - * 0 = Probes issued - * 1 = Probes not issued - * [ 2: 2] Disable write byte probes - * 0 = Probes issued - * 1 = Probes not issued - * [ 3: 3] Disable Write Doubleword Probes - * 0 = Probes issued - * 1 = Probes not issued. - * [ 4: 4] Disable Memroy Controller Target Start - * 0 = TgtStart packets are generated - * 1 = TgtStart packets are not generated. - * [ 5: 5] CPU1 Enable - * 0 = Second CPU disabled or not present - * 1 = Second CPU enabled. - * [ 6: 6] CPU Request PassPW - * 0 = CPU requests do not pass posted writes - * 1 = CPU requests pass posted writes. - * [ 7: 7] CPU read Respons PassPW - * 0 = CPU Responses do not pass posted writes - * 1 = CPU responses pass posted writes. - * [ 8: 8] Disable Probe Memory Cancel - * 0 = Probes may generate MemCancels - * 1 = Probes may not generate MemCancels - * [ 9: 9] Disable Remote Probe Memory Cancel. - * 0 = Probes hitting dirty blocks generate memory cancel packets - * 1 = Only probed caches on the same node as the memory controller - * generate cancel packets. - * [10:10] Disable Fill Probe - * 0 = Probes issued for cache fills - * 1 = Probes not issued for cache fills. - * [11:11] Response PassPw - * 0 = Downstream response PassPW based on original request - * 1 = Downstream response PassPW set to 1 - * [12:12] Change ISOC to Ordered - * 0 = Bit 1 of coherent HT RdSz/WrSz command used for iosynchronous prioritization - * 1 = Bit 1 of coherent HT RdSz/WrSz command used for ordering. - * [14:13] Buffer Release Priority select - * 00 = 64 - * 01 = 16 - * 10 = 8 - * 11 = 2 - * [15:15] Limit Coherent HT Configuration Space Range - * 0 = No coherent HT configuration space restrictions - * 1 = Limit coherent HT configuration space based on node count - * [16:16] Local Interrupt Conversion Enable. - * 0 = ExtInt/NMI interrups unaffected. - * 1 = ExtInt/NMI broadcat interrupts converted to LINT0/1 - * [17:17] APIC Extended Broadcast Enable. - * 0 = APIC broadcast is 0F - * 1 = APIC broadcast is FF - * [18:18] APIC Extended ID Enable - * 0 = APIC ID is 4 bits. - * 1 = APIC ID is 8 bits. - * [19:19] APIC Extended Spurious Vector Enable - * 0 = Lower 4 bits of spurious vector are read-only 1111 - * 1 = Lower 4 bits of spurious vecotr are writeable. - * [20:20] Sequence ID Source Node Enable - * 0 = Normal operation - * 1 = Keep SeqID on routed packets for debugging. - * [22:21] Downstream non-posted request limit - * 00 = No limit - * 01 = Limited to 1 - * 10 = Limited to 4 - * 11 = Limited to 8 - * [23:23] RESERVED - * [25:24] Medium-Priority Bypass Count - * - Maximum # of times a medium priority access can pass a low - * priority access before Medium-Priority mode is disabled for one access. - * [27:26] High-Priority Bypass Count - * - Maximum # of times a high prioirty access can pass a medium or low - * priority access before High-prioirty mode is disabled for one access. - * [28:28] Enable High Priority CPU Reads - * 0 = Cpu reads are medium prioirty - * 1 = Cpu reads are high prioirty - * [29:29] Disable Low Priority Writes - * 0 = Non-isochronous writes are low priority - * 1 = Non-isochronous writes are medium prioirty - * [30:30] Disable High Priority Isochronous writes - * 0 = Isochronous writes are high priority - * 1 = Isochronous writes are medium priority - * [31:31] Disable Medium Priority Isochronous writes - * 0 = Isochronous writes are medium are high - * 1 = With bit 30 set makes Isochrouns writes low priority. - */ - PCI_ADDR(0, 0x18, 0, 0x68), 0x00800000, 0x0f00840f, - /* HT Initialization Control Register - * F0:0x6C ok... - * [ 0: 0] Routing Table Disable - * 0 = Packets are routed according to routing tables - * 1 = Packets are routed according to the default link field - * [ 1: 1] Request Disable (BSP should clear this) - * 0 = Request packets may be generated - * 1 = Request packets may not be generated. - * [ 3: 2] Default Link (Read-only) - * 00 = LDT0 - * 01 = LDT1 - * 10 = LDT2 - * 11 = CPU on same node - * [ 4: 4] Cold Reset - * - Scratch bit cleared by a cold reset - * [ 5: 5] BIOS Reset Detect - * - Scratch bit cleared by a cold reset - * [ 6: 6] INIT Detect - * - Scratch bit cleared by a warm or cold reset not by an INIT - * - */ - PCI_ADDR(0, 0x18, 0, 0x6C), 0xffffff8c, 0x00000000 | (1 << 6) |(1 << 5)| (1 << 4), - /* LDTi Capabilities Registers - * F0:0x80 i = 0, - * F0:0xA0 i = 1, - * F0:0xC0 i = 2, - */ - /* LDTi Link Control Registrs - * F0:0x84 i = 0, - * F0:0xA4 i = 1, - * F0:0xC4 i = 2, - * [ 1: 1] CRC Flood Enable - * 0 = Do not generate sync packets on CRC error - * 1 = Generate sync packets on CRC error - * [ 2: 2] CRC Start Test (Read-Only) - * [ 3: 3] CRC Force Frame Error - * 0 = Do not generate bad CRC - * 1 = Generate bad CRC - * [ 4: 4] Link Failure - * 0 = No link failure detected - * 1 = Link failure detected - * [ 5: 5] Initialization Complete - * 0 = Initialization not complete - * 1 = Initialization complete - * [ 6: 6] Receiver off - * 0 = Recevier on - * 1 = Receiver off - * [ 7: 7] Transmitter Off - * 0 = Transmitter on - * 1 = Transmitter off - * [ 9: 8] CRC_Error - * 00 = No error - * [0] = 1 Error on byte lane 0 - * [1] = 1 Error on byte lane 1 - * [12:12] Isochrnous Enable (Read-Only) - * [13:13] HT Stop Tristate Enable - * 0 = Driven during an LDTSTOP_L - * 1 = Tristated during and LDTSTOP_L - * [14:14] Extended CTL Time - * 0 = CTL is asserted for 16 bit times during link initialization - * 1 = CTL is asserted for 50us during link initialization - * [18:16] Max Link Width In (Read-Only?) - * 000 = 8 bit link - * 001 = 16bit link - * [19:19] Doubleword Flow Control in (Read-Only) - * 0 = This link does not support doubleword flow control - * 1 = This link supports doubleword flow control - * [22:20] Max Link Width Out (Read-Only?) - * 000 = 8 bit link - * 001 = 16bit link - * [23:23] Doubleworld Flow Control out (Read-Only) - * 0 = This link does not support doubleword flow control - * 1 = This link supports doubleworkd flow control - * [26:24] Link Width In - * 000 = Use 8 bits - * 001 = Use 16 bits - * 010 = reserved - * 011 = Use 32 bits - * 100 = Use 2 bits - * 101 = Use 4 bits - * 110 = reserved - * 111 = Link physically not connected - * [27:27] Doubleword Flow Control In Enable - * 0 = Doubleword flow control disabled - * 1 = Doubleword flow control enabled (Not currently supported) - * [30:28] Link Width Out - * 000 = Use 8 bits - * 001 = Use 16 bits - * 010 = reserved - * 011 = Use 32 bits - * 100 = Use 2 bits - * 101 = Use 4 bits - * 110 = reserved - * 111 = Link physically not connected - * [31:31] Doubleworld Flow Control Out Enable - * 0 = Doubleworld flow control disabled - * 1 = Doubleword flow control enabled (Not currently supported) - */ - PCI_ADDR(0, 0x18, 0, 0x84), 0x00009c05, 0x11110020, - /* LDTi Frequency/Revision Registers - * F0:0x88 i = 0, - * F0:0xA8 i = 1, - * F0:0xC8 i = 2, - * [ 4: 0] Minor Revision - * Contains the HT Minor revision - * [ 7: 5] Major Revision - * Contains the HT Major revision - * [11: 8] Link Frequency (Takes effect the next time the link is reconnected) - * 0000 = 200Mhz - * 0001 = reserved - * 0010 = 400Mhz - * 0011 = reserved - * 0100 = 600Mhz - * 0101 = 800Mhz - * 0110 = 1000Mhz - * 0111 = reserved - * 1000 = reserved - * 1001 = reserved - * 1010 = reserved - * 1011 = reserved - * 1100 = reserved - * 1101 = reserved - * 1110 = reserved - * 1111 = 100 Mhz - * [15:12] Error (Not currently Implemented) - * [31:16] Indicates the frequency capabilities of the link - * [16] = 1 encoding 0000 of freq supported - * [17] = 1 encoding 0001 of freq supported - * [18] = 1 encoding 0010 of freq supported - * [19] = 1 encoding 0011 of freq supported - * [20] = 1 encoding 0100 of freq supported - * [21] = 1 encoding 0101 of freq supported - * [22] = 1 encoding 0110 of freq supported - * [23] = 1 encoding 0111 of freq supported - * [24] = 1 encoding 1000 of freq supported - * [25] = 1 encoding 1001 of freq supported - * [26] = 1 encoding 1010 of freq supported - * [27] = 1 encoding 1011 of freq supported - * [28] = 1 encoding 1100 of freq supported - * [29] = 1 encoding 1101 of freq supported - * [30] = 1 encoding 1110 of freq supported - * [31] = 1 encoding 1111 of freq supported - */ - PCI_ADDR(0, 0x18, 0, 0x88), 0xfffff0ff, 0x00000200, - /* LDTi Feature Capability - * F0:0x8C i = 0, - * F0:0xAC i = 1, - * F0:0xCC i = 2, - */ - /* LDTi Buffer Count Registers - * F0:0x90 i = 0, - * F0:0xB0 i = 1, - * F0:0xD0 i = 2, - */ - /* LDTi Bus Number Registers - * F0:0x94 i = 0, - * F0:0xB4 i = 1, - * F0:0xD4 i = 2, - * For NonCoherent HT specifies the bus number downstream (behind the host bridge) - * [ 0: 7] Primary Bus Number - * [15: 8] Secondary Bus Number - * [23:15] Subordiante Bus Number - * [31:24] reserved - */ - PCI_ADDR(0, 0x18, 0, 0x94), 0xff000000, 0x00ff0000, - /* LDTi Type Registers - * F0:0x98 i = 0, - * F0:0xB8 i = 1, - * F0:0xD8 i = 2, - */ - }; - int i; - int max; - print_debug("setting up coherent ht domain....\r\n"); - max = sizeof(register_values)/sizeof(register_values[0]); - for(i = 0; i < max; i += 3) { - device_t dev; - unsigned where; - unsigned long reg; -#if 0 - print_debug_hex32(register_values[i]); - print_debug(" <-"); - print_debug_hex32(register_values[i+2]); - print_debug("\r\n"); -#endif - dev = register_values[i] & ~0xff; - where = register_values[i] & 0xff; - reg = pci_read_config32(dev, where); - reg &= register_values[i+1]; - reg |= register_values[i+2]; - pci_write_config32(dev, where, reg); -#if 0 - reg = pci_read_config32(register_values[i]); - reg &= register_values[i+1]; - reg |= register_values[i+2] & ~register_values[i+1]; - pci_write_config32(register_values[i], reg); -#endif - } - print_debug("done.\r\n"); -} -#else /* coherent hypertransport initialization for AMD64 - * written by Stefan Reinauer - * (c) 2003 by SuSE Linux AG + * + * written by Stefan Reinauer + * (c) 2003-2004 by SuSE Linux AG + * + * (c) 2004 Tyan Computer + * 2004.12 yhlu added support to create routing table dynamically. + * it also support 8 ways too. (8 ways ladder or 8 ways crossbar) * * This code is licensed under GPL. */ @@ -340,23 +15,60 @@ static void setup_coherent_ht_domain(void) * * nodes : 1 2 4 6 8 * org. : 1x1 2x1 2x2 2x3 2x4 - * + Ladder: + CPU7-------------CPU6 + | | + | | + | | + | | + | | + | | + CPU5-------------CPU4 + | | + | | + | | + | | + | | + | | + CPU3-------------CPU2 + | | + | | + | | + | | + | | + | | + CPU1-------------CPU0 + CROSS_BAR_47_56: + CPU7-------------CPU6 + | \____ ___/ | + | \ / | + | \/ | + | /\ | + | / \ | + | ____/ \___ | + CPU5 CPU4 + | | + | | + | | + | | + | | + | | + CPU3-------------CPU2 + | | + | | + | | + | | + | | + | | + CPU1-------------CPU0 */ -#if 0 -#include "compat.h" -#endif - #include +#include +#include #include "arch/romcc_io.h" +#include "amdk8.h" - -/* when generating a temporary row configuration we - * don't want broadcast to be enabled for that node. - */ - -#define generate_temp_row(x...) ((generate_row(x)&(~0x0f0000))|0x010000) -#define clear_temp_row(x) fill_row(x,7,DEFAULT) #define enable_bsp_routing() enable_routing(0) #define NODE_HT(x) PCI_DEV(0,24+x,0) @@ -367,10 +79,22 @@ static void setup_coherent_ht_domain(void) typedef uint8_t u8; typedef uint32_t u32; -typedef int8_t bool; -#define TRUE (-1) -#define FALSE (0) +#ifndef CROSS_BAR_47_56 + #define CROSS_BAR_47_56 0 +#endif + +#ifndef TRY_HIGH_FIRST + #define TRY_HIGH_FIRST 0 +#endif + + +static inline void print_linkn (const char *strval, uint8_t byteval) +{ +#if 1 + print_debug(strval); print_debug_hex8(byteval); print_debug("\r\n"); +#endif +} static void disable_probes(void) { @@ -399,14 +123,31 @@ static void disable_probes(void) u32 val; - print_debug("Disabling read/write/fill probes for UP... "); + print_spew("Disabling read/write/fill probes for UP... "); val=pci_read_config32(NODE_HT(0), 0x68); val |= (1<<10)|(1<<9)|(1<<8)|(1<<4)|(1<<3)|(1<<2)|(1<<1)|(1 << 0); pci_write_config32(NODE_HT(0), 0x68, val); - print_debug("done.\r\n"); + print_spew("done.\r\n"); + +} + +#ifndef ENABLE_APIC_EXT_ID +#define ENABLE_APIC_EXT_ID 0 +#endif + +static void enable_apic_ext_id(u8 node) +{ +#if ENABLE_APIC_EXT_ID==1 +#warning "FIXME Is the right place to enable apic ext id here?" + + u32 val; + val = pci_read_config32(NODE_HT(node), 0x68); + val |= HTTC_APIC_EXT_ID | HTTC_APIC_EXT_BRD_CST ; + pci_write_config32(NODE_HT(node), 0x68, val); +#endif } static void enable_routing(u8 node) @@ -435,170 +176,263 @@ static void enable_routing(u8 node) * */ - /* Enable routing table for BSP */ - print_debug("Enabling routing table for node "); - print_debug_hex32(node); - + /* Enable routing table */ + print_spew("Enabling routing table for node "); + print_spew_hex8(node); + val=pci_read_config32(NODE_HT(node), 0x6c); - val |= (1 << 6) | (1 << 5) | (1 << 4); -#if 0 val &= ~((1<<1)|(1<<0)); -#else - /* Don't enable requests here as the indicated processor starts booting */ - val &= ~(1<<0); -#endif pci_write_config32(NODE_HT(node), 0x6c, val); - print_debug(" done.\r\n"); + print_spew(" done.\r\n"); } -#if MAX_CPUS > 1 +static void fill_row(u8 node, u8 row, u32 value) +{ + pci_write_config32(NODE_HT(node), 0x40+(row<<2), value); +} + +#if CONFIG_MAX_CPUS > 1 +static u8 link_to_register(int ldt) +{ + /* + * [ 0: 3] Request Route + * [0] Route to this node + * [1] Route to Link 0 + * [2] Route to Link 1 + * [3] Route to Link 2 + */ + + if (ldt&0x08) return 0x40; + if (ldt&0x04) return 0x20; + if (ldt&0x02) return 0x00; + + /* we should never get here */ + print_spew("Unknown Link\n"); + return 0; +} + +static u32 get_row(u8 node, u8 row) +{ + return pci_read_config32(NODE_HT(node), 0x40+(row<<2)); +} + +static int link_connection(u8 src, u8 dest) +{ + return get_row(src, dest) & 0x0f; +} static void rename_temp_node(u8 node) { - u32 val; + uint32_t val; - print_debug("Renaming current temp node to "); - print_debug_hex32(node); + print_spew("Renaming current temporary node to "); + print_spew_hex8(node); val=pci_read_config32(NODE_HT(7), 0x60); val &= (~7); /* clear low bits. */ val |= node; /* new node */ pci_write_config32(NODE_HT(7), 0x60, val); - print_debug(" done.\r\n"); - - + print_spew(" done.\r\n"); } -static bool check_connection(u8 src, u8 dest, u8 link) +static int check_connection(u8 dest) { - /* this function does 2 things: - * 1) detect whether the coherent HT link is connected. - * 2) verify that the coherent hypertransport link - * is established and actually working by reading the - * remote node's vendor/device id - */ - -#define UP 0x00 -#define ACROSS 0x20 -#define DOWN 0x40 - + /* See if we have a valid connection to dest */ u32 val; - - /* 1) */ - val=pci_read_config32(NODE_HT(src), 0x98+link); - if ( (val&0x17) != 0x03) - return 0; - /* 2) */ - val=pci_read_config32(NODE_HT(dest),0); + /* Verify that the coherent hypertransport link is + * established and actually working by reading the + * remode node's vendor/device id + */ + val = pci_read_config32(NODE_HT(dest),0); if(val != 0x11001022) return 0; return 1; } -static unsigned int generate_row(u8 node, u8 row, u8 maxnodes) +static unsigned read_freq_cap(device_t dev, unsigned pos) { - /* Routing Table Node i - * - * F0: 0x40, 0x44, 0x48, 0x4c, 0x50, 0x54, 0x58, 0x5c - * i: 0, 1, 2, 3, 4, 5, 6, 7 - * - * [ 0: 3] Request Route - * [0] Route to this node - * [1] Route to Link 0 - * [2] Route to Link 1 - * [3] Route to Link 2 - * [11: 8] Response Route - * [0] Route to this node - * [1] Route to Link 0 - * [2] Route to Link 1 - * [3] Route to Link 2 - * [19:16] Broadcast route - * [0] Route to this node - * [1] Route to Link 0 - * [2] Route to Link 1 - * [3] Route to Link 2 - */ + /* Handle bugs in valid hypertransport frequency reporting */ + unsigned freq_cap; + uint32_t id; - u32 ret=DEFAULT; + freq_cap = pci_read_config16(dev, pos); + freq_cap &= ~(1 << HT_FREQ_VENDOR); /* Ignore Vendor HT frequencies */ - static const unsigned int rows_2p[2][2] = { - { 0x00030101, 0x00010404 }, - { 0x00010404, 0x00030101 } - }; + id = pci_read_config32(dev, 0); - static const unsigned int rows_4p[4][4] = { - { 0x00070101, 0x00010404, 0x00050202, 0x00010402 }, - { 0x00010808, 0x000b0101, 0x00010802, 0x00090202 }, - { 0x00090202, 0x00010802, 0x000b0101, 0x00010808 }, - { 0x00010402, 0x00050202, 0x00010404, 0x00070101 } - }; - - if (!(node>=maxnodes || row>=maxnodes)) { - if (maxnodes==2) - ret=rows_2p[node][row]; - if (maxnodes==4) - ret=rows_4p[node][row]; + /* AMD 8131 Errata 48 */ + if (id == (PCI_VENDOR_ID_AMD | (PCI_DEVICE_ID_AMD_8131_PCIX << 16))) { + freq_cap &= ~(1 << HT_FREQ_800Mhz); + } + /* AMD 8151 Errata 23 */ + if (id == (PCI_VENDOR_ID_AMD | (PCI_DEVICE_ID_AMD_8151_SYSCTRL << 16))) { + freq_cap &= ~(1 << HT_FREQ_800Mhz); } + /* AMD K8 Unsupported 1Ghz? */ + if (id == (PCI_VENDOR_ID_AMD | (0x1100 << 16))) { + freq_cap &= ~(1 << HT_FREQ_1000Mhz); + } + return freq_cap; +} -#if 0 - printk_spew("generating entry n=%d, r=%d, max=%d - row=%x\n", - node,row,maxnodes,ret); -#endif +static int optimize_connection(device_t node1, uint8_t link1, device_t node2, uint8_t link2) +{ + static const uint8_t link_width_to_pow2[]= { 3, 4, 0, 5, 1, 2, 0, 0 }; + static const uint8_t pow2_to_link_width[] = { 0x7, 4, 5, 0, 1, 3 }; + uint16_t freq_cap1, freq_cap2, freq_cap, freq_mask; + uint8_t width_cap1, width_cap2, width_cap, width, old_width, ln_width1, ln_width2; + uint8_t freq, old_freq; + int needs_reset; + /* Set link width and frequency */ + + /* Initially assume everything is already optimized and I don't need a reset */ + needs_reset = 0; + + /* Get the frequency capabilities */ + freq_cap1 = read_freq_cap(node1, link1 + PCI_HT_CAP_HOST_FREQ_CAP); + freq_cap2 = read_freq_cap(node2, link2 + PCI_HT_CAP_HOST_FREQ_CAP); + + /* Calculate the highest possible frequency */ + freq = log2(freq_cap1 & freq_cap2); + + /* See if I am changing the link freqency */ + old_freq = pci_read_config8(node1, link1 + PCI_HT_CAP_HOST_FREQ); + needs_reset |= old_freq != freq; + old_freq = pci_read_config8(node2, link2 + PCI_HT_CAP_HOST_FREQ); + needs_reset |= old_freq != freq; + + /* Set the Calulcated link frequency */ + pci_write_config8(node1, link1 + PCI_HT_CAP_HOST_FREQ, freq); + pci_write_config8(node2, link2 + PCI_HT_CAP_HOST_FREQ, freq); + + /* Get the width capabilities */ + width_cap1 = pci_read_config8(node1, link1 + PCI_HT_CAP_HOST_WIDTH); + width_cap2 = pci_read_config8(node2, link2 + PCI_HT_CAP_HOST_WIDTH); + + /* Calculate node1's input width */ + ln_width1 = link_width_to_pow2[width_cap1 & 7]; + ln_width2 = link_width_to_pow2[(width_cap2 >> 4) & 7]; + if (ln_width1 > ln_width2) { + ln_width1 = ln_width2; + } + width = pow2_to_link_width[ln_width1]; + /* Calculate node1's output width */ + ln_width1 = link_width_to_pow2[(width_cap1 >> 4) & 7]; + ln_width2 = link_width_to_pow2[width_cap2 & 7]; + if (ln_width1 > ln_width2) { + ln_width1 = ln_width2; + } + width |= pow2_to_link_width[ln_width1] << 4; - return ret; + /* See if I am changing node1's width */ + old_width = pci_read_config8(node1, link1 + PCI_HT_CAP_HOST_WIDTH + 1); + needs_reset |= old_width != width; + + /* Set node1's widths */ + pci_write_config8(node1, link1 + PCI_HT_CAP_HOST_WIDTH + 1, width); + + /* Calculate node2's width */ + width = ((width & 0x70) >> 4) | ((width & 0x7) << 4); + + /* See if I am changing node2's width */ + old_width = pci_read_config8(node2, link2 + PCI_HT_CAP_HOST_WIDTH + 1); + needs_reset |= old_width != width; + + /* Set node2's widths */ + pci_write_config8(node2, link2 + PCI_HT_CAP_HOST_WIDTH + 1, width); + + return needs_reset; } -static void fill_row(u8 node, u8 row, u32 value) +static void setup_row_local(u8 source, u8 row) /* source will be 7 when it is for temp use*/ { -#if 0 - print_debug("fill_row: pci_write_config32("); - print_debug_hex32(NODE_HT(node)); - print_debug_char(','); - print_debug_hex32(0x40 + (row << 2)); - print_debug_char(','); - print_debug_hex32(value); - print_debug(")\r\n"); -#endif - pci_write_config32(NODE_HT(node), 0x40+(row<<2), value); + unsigned linkn; + uint32_t val; + val = 1; + for(linkn = 0; linkn<3; linkn++) { + unsigned regpos; + uint32_t reg; + regpos = 0x98 + 0x20 * linkn; + reg = pci_read_config32(NODE_HT(source), regpos); + if ((reg & 0x17) != 3) continue; /* it is not conherent or not connected*/ + val |= 1<<(linkn+1); + } + val <<= 16; + val |= 0x0101; + fill_row(source,row, val); } -static void setup_row(u8 source, u8 dest, u8 cpus) +static void setup_row_direct_x(u8 temp, u8 source, u8 dest, u8 linkn) { -#if 0 - printk_spew("setting up link from node %d to %d (%d cpus)\r\n", - source, dest, cpus); + uint32_t val; + uint32_t val_s; + val = 1<<(linkn+1); + val |= 1<<(linkn+1+8); /*for direct connect response route should equal to request table*/ + + if(((source &1)!=(dest &1)) +#if CROSS_BAR_47_56 + && (source<4) && (dest<4) #endif + ){ + val |= (1<<16); + } else { + /*for CROSS_BAR_47_56 47, 74, 56, 65 should be here too*/ + val_s = get_row(temp, source); + val |= ((val_s>>16) - (1<<(linkn+1)))<<16; + } + + fill_row(temp,dest, val ); +} - fill_row(source,dest,generate_row(source,dest,cpus)); +static void setup_row_direct(u8 source, u8 dest, u8 linkn){ + setup_row_direct_x(source, source, dest, linkn); } -static void setup_temp_row(u8 source, u8 dest, u8 cpus) +static void setup_remote_row_direct(u8 source, u8 dest, u8 linkn){ + setup_row_direct_x(7, source, dest, linkn); +} + +static uint8_t get_linkn_first(uint8_t byte) { -#if 0 - printk_spew("setting up temp. link from node %d to %d (%d cpus)\r\n", - source, dest, cpus); -#endif + if(byte & 0x02) { byte = 0; } + else if(byte & 0x04) { byte = 1; } + else if(byte & 0x08) { byte = 2; } + return byte; +} + +static uint8_t get_linkn_last(uint8_t byte) +{ + if(byte & 0x02) { byte &= 0x0f; byte |= 0x00; } + if(byte & 0x04) { byte &= 0x0f; byte |= 0x10; } + if(byte & 0x08) { byte &= 0x0f; byte |= 0x20; } + return byte>>4; +} - fill_row(source,7,generate_temp_row(source,dest,cpus)); +static uint8_t get_linkn_last_count(uint8_t byte) +{ + byte &= 0x3f; + if(byte & 0x02) { byte &= 0xcf; byte |= 0x00; byte+=0x40; } + if(byte & 0x04) { byte &= 0xcf; byte |= 0x10; byte+=0x40; } + if(byte & 0x08) { byte &= 0xcf; byte |= 0x20; byte+=0x40; } + return byte>>4; } -static void setup_node(u8 node, u8 cpus) +static void setup_temp_row(u8 source, u8 dest) { - u8 row; - for(row=0; row 1*/ + + +#if CONFIG_MAX_CPUS > 2 +#if !CROSS_BAR_47_56 +static void setup_row_indirect_x(u8 temp, u8 source, u8 dest, u8 gateway) +#else +static void setup_row_indirect_x(u8 temp, u8 source, u8 dest, u8 gateway, u8 diff) +#endif +{ + /*for indirect connection, we need to compute the val from val_s(source, source), and val_g(source, gateway) */ + uint32_t val_s; + uint32_t val; +#if !CROSS_BAR_47_56 + u8 diff; +#endif + val_s = get_row(temp, source); + val = get_row(temp, gateway); + + val &= 0xffff; + val_s >>= 16; + val_s &= 0xfe; + +#if !CROSS_BAR_47_56 + diff = ((source&1)!=(dest &1)); #endif + + if(diff && (val_s!=(val&0xff)) ) { /* use another connect as response*/ + val_s -= val & 0xff; +#if CONFIG_MAX_CPUS > 4 + uint8_t byte; + /* Some node have two links left + * don't worry we only have (2, (3 as source need to handle + */ + byte = val_s; + byte = get_linkn_last_count(byte); + if((byte>>2)>1) { /* make sure not the corner*/ + if(source>16) - link_connection(temp, gateway))<<16; + } + + fill_row(temp, dest, val); + +} + +#if !CROSS_BAR_47_56 +static void setup_row_indirect(u8 source, u8 dest, u8 gateway) +{ + setup_row_indirect_x(source, source, dest, gateway); +} +#else +static void setup_row_indirect(u8 source, u8 dest, u8 gateway, u8 diff) +{ + setup_row_indirect_x(source, source, dest, gateway, diff); } +#endif +static void setup_row_indirect_group(const u8 *conn, int num) +{ + int i; + +#if !CROSS_BAR_47_56 + for(i=0; i 2 -static void setup_temp_node(u8 node, u8 cpus) + } +} + +#if !CROSS_BAR_47_56 +static void setup_remote_row_indirect(u8 source, u8 dest, u8 gateway) { - u8 row; - for(row=0; row 2*/ + + +static void setup_uniprocessor(void) { - print_debug("Enabling UP settings\r\n"); + print_spew("Enabling UP settings\r\n"); disable_probes(); - return 1; } -#if MAX_CPUS > 1 -static u8 setup_smp(void) +struct setup_smp_result { + int nodes; + int needs_reset; +}; + +#if CONFIG_MAX_CPUS > 2 +static int optimize_connection_group(const u8 *opt_conn, int num) { + int needs_reset = 0; + int i; + for(i=0; i 1 +static struct setup_smp_result setup_smp2(void) { - u8 cpus=2; + struct setup_smp_result result; + u8 byte; + uint32_t val; + result.nodes = 2; + result.needs_reset = 0; - print_debug("Enabling SMP settings\r\n"); + setup_row_local(0, 0); /* it will update the broadcast RT*/ + + val = get_row(0,0); + byte = (val>>16) & 0xfe; + if(byte<0x2) { /* no coherent connection so get out.*/ + result.nodes = 1; + return result; + } - setup_row(0,0,cpus); /* Setup and check a temporary connection to node 1 */ - setup_temp_row(0,1,cpus); +#if TRY_HIGH_FIRST == 1 + byte = get_linkn_last(byte); /* Max Link to node1 */ +#else + byte = get_linkn_first(byte); /*Min Link to node1 --- according to AMD*/ +#endif + print_linkn("(0,1) link=", byte); + setup_row_direct(0,1, byte); + setup_temp_row(0, 1); - if (!check_connection(0, 7, ACROSS)) { // Link: ACROSS - print_debug("No connection to Node 1.\r\n"); - clear_temp_row(0); /* delete temp connection */ - setup_uniprocessor(); /* and get up working */ - return 1; + if (!check_connection(7)) { + print_spew("No connection to Node 1.\r\n"); + result.nodes = 1; + return result; } /* We found 2 nodes so far */ - setup_node(0, cpus); /* Node 1 is there. Setup Node 0 correctly */ - setup_remote_node(1, cpus); /* Setup the routes on the remote node */ - enable_routing(7); /* Enable routing on Node 1 */ - rename_temp_node(1); /* Rename Node 7 to Node 1 */ + val = pci_read_config32(NODE_HT(7), 0x6c); + byte = (val>>2) & 0x3; /*get default link on node7 to node0*/ + print_linkn("(1,0) link=", byte); + setup_row_local(7,1); + setup_remote_row_direct(1, 0, byte); + +#if CONFIG_MAX_CPUS > 4 + val = get_row(7,1); + byte = (val>>16) & 0xfe; + byte = get_linkn_last_count(byte); + if((byte>>2)==3) { /* Oh! we need to treat it as node2. So use another link*/ + val = get_row(0,0); + byte = (val>>16) & 0xfe; +#if TRY_HIGH_FIRST == 1 + byte = get_linkn_first(byte); /* Min link to Node1 */ +#else + byte = get_linkn_last(byte); /* Max link to Node1*/ +#endif + print_linkn("-->(0,1) link=", byte); + setup_row_direct(0,1, byte); + setup_temp_row(0, 1); - clear_temp_row(0); /* delete temporary connection */ + if (!check_connection(7)) { + print_spew("No connection to Node 1.\r\n"); + result.nodes = 1; + return result; + } + + /* We found 2 nodes so far */ + val = pci_read_config32(NODE_HT(7), 0x6c); + byte = (val>>2) & 0x3; /* get default link on node7 to node0*/ + print_linkn("-->(1,0) link=", byte); + setup_row_local(7,1); + setup_remote_row_direct(1, 0, byte); + } +#endif -#if MAX_CPUS > 2 - cpus=4; + setup_remote_node(1); /* Setup the regs on the remote node */ + rename_temp_node(1); /* Rename Node 7 to Node 1 */ + enable_routing(1); /* Enable routing on Node 1 */ +#if 0 + /*don't need and it is done by clear_dead_links */ + clear_temp_row(0); +#endif + result.needs_reset = optimize_connection( + NODE_HT(0), 0x80 + link_to_register(link_connection(0,1)), + NODE_HT(1), 0x80 + link_to_register(link_connection(1,0)) ); + + + return result; +} +#endif /*CONFIG_MAX_CPUS > 1 */ + +#if CONFIG_MAX_CPUS > 2 + +static struct setup_smp_result setup_smp4(int needs_reset) +{ + struct setup_smp_result result; + u8 byte; + uint32_t val; + + result.nodes=4; + result.needs_reset = needs_reset; + /* Setup and check temporary connection from Node 0 to Node 2 */ - setup_temp_row(0,2,cpus); + val = get_row(0,0); + byte = ((val>>16) & 0xfe) - link_connection(0,1); + byte = get_linkn_last_count(byte); - if (!check_connection(0, 7, UP)) { // Link: UP - print_debug("No connection to Node 2.\r\n"); - clear_temp_row(0); /* delete temp connection */ - return 2; + if((byte>>2)==0) { /* We should have two coherent for 4p and above*/ + result.nodes = 2; + return result; } - /* We found 3 nodes so far. Now setup a temporary - * connection from node 0 to node 3 via node 1 - */ + byte &= 3; /* bit [3,2] is count-1*/ + print_linkn("(0,2) link=", byte); + setup_row_direct(0, 2, byte); /*(0,2) direct link done*/ + setup_temp_row(0, 2); - setup_temp_row(0,1,cpus); /* temp. link between nodes 0 and 1 */ - setup_temp_row(1,3,cpus); /* temp. link between nodes 1 and 3 */ + if (!check_connection(7) ) { + print_spew("No connection to Node 2.\r\n"); + result.nodes = 2; + return result; + } - if (!check_connection(1, 7, UP)) { // Link: UP - print_debug("No connection to Node 3.\r\n"); - clear_temp_row(0); /* delete temp connection */ - clear_temp_row(1); /* delete temp connection */ - return 2; + /* We found 3 nodes so far. Now setup a temporary + * connection from node 0 to node 3 via node 1 + */ + setup_temp_row(0,1); /* temp. link between nodes 0 and 1 */ + /* here should setup_row_direct(1,3) at first, before that we should find the link in node 1 to 3*/ + val = get_row(1,1); + byte = ((val>>16) & 0xfe) - link_connection(1,0); + byte = get_linkn_first(byte); + print_linkn("(1,3) link=", byte); + setup_row_direct(1,3,byte); /* (1, 3) direct link done*/ + setup_temp_row(1,3); /* temp. link between nodes 1 and 3 */ + + if (!check_connection(7)) { + print_spew("No connection to Node 3.\r\n"); + result.nodes = 2; + return result; } /* We found 4 nodes so far. Now setup all nodes for 4p */ +#if !CROSS_BAR_47_56 + static const u8 conn4_1[] = { + 0,3,2, + 1,2,3, + }; +#else + static const u8 conn4_1[] = { + 0,3,2,1, + 1,2,3,1, + }; +#endif + + setup_row_indirect_group(conn4_1, sizeof(conn4_1)/sizeof(conn4_1[0])); + + setup_temp_row(0,2); + + val = pci_read_config32(NODE_HT(7), 0x6c); + byte = (val>>2) & 0x3; /* get default link on 7 to 0*/ + print_linkn("(2,0) link=", byte); + + setup_row_local(7,2); + setup_remote_row_direct(2, 0, byte); + setup_remote_node(2); /* Setup the regs on the remote node */ +#if !CROSS_BAR_47_56 + static const u8 conn4_2[] = { + 2,1,0, + }; +#else + static const u8 conn4_2[] = { + 2,1,0,1, + }; +#endif + setup_remote_row_indirect_group(conn4_2, sizeof(conn4_2)/sizeof(conn4_2[0])); + + rename_temp_node(2); /* Rename Node 7 to Node 2 */ + enable_routing(2); /* Enable routing on Node 2 */ + + setup_temp_row(0,1); + setup_temp_row(1,3); + + val = pci_read_config32(NODE_HT(7), 0x6c); + byte = (val>>2) & 0x3; /* get default link on 7 to 1*/ + print_linkn("(3,1) link=", byte); + + setup_row_local(7,3); + setup_remote_row_direct(3, 1, byte); + setup_remote_node(3); /* Setup the regs on the remote node */ + +#if !CROSS_BAR_47_56 + static const u8 conn4_3[] = { + 3,0,1, + }; +#else + static const u8 conn4_3[] = { + 3,0,1,1, + }; +#endif + setup_remote_row_indirect_group(conn4_3, sizeof(conn4_3)/sizeof(conn4_3[0])); + + /* We need to init link between 2, and 3 direct link */ + val = get_row(2,2); + byte = ((val>>16) & 0xfe) - link_connection(2,0); + byte = get_linkn_last_count(byte); + print_linkn("(2,3) link=", byte & 3); + + setup_row_direct(2,3, byte & 0x3); + setup_temp_row(0,2); + setup_temp_row(2,3); + check_connection(7); /* to 3*/ - setup_node(0, cpus); /* The first 2 nodes are configured */ - setup_node(1, cpus); /* already. Just configure them for 4p */ +#if CONFIG_MAX_CPUS > 4 + /* We need to find out which link is to node3 */ - setup_temp_row(0,2,cpus); - setup_temp_node(2,cpus); - enable_routing(7); - rename_temp_node(2); + if((byte>>2)==2) { /* one to node3, one to node0, one to node4*/ + val = get_row(7,3); + if((val>>16) == 1) { /* that link is to node4, because via node3 it has been set, recompute it*/ + val = get_row(2,2); + byte = ((val>>16) & 0xfe) - link_connection(2,0); + byte = get_linkn_first(byte); + print_linkn("-->(2,3) link=", byte); + setup_row_direct(2,3,byte); + setup_temp_row(2,3); + check_connection(7); /* to 3*/ + } + } +#endif - setup_temp_row(0,1,cpus); - setup_temp_row(1,3,cpus); - setup_temp_node(3,cpus); - enable_routing(7); /* enable routing on node 3 (temp.) */ + val = pci_read_config32(NODE_HT(7), 0x6c); + byte = (val>>2) & 0x3; /* get default link on 7 to 2*/ + print_linkn("(3,2) link=", byte); + setup_remote_row_direct(3,2, byte); + +/* ready to enable RT for Node 3 */ rename_temp_node(3); - + enable_routing(3); /* enable routing on node 3 (temp.) */ + +#if 0 + /*We need to do sth to reverse work for setup_temp_row (0,1) (1,3) */ + /* it will be done by clear_dead_links */ clear_temp_row(0); clear_temp_row(1); - clear_temp_row(2); - clear_temp_row(3); - #endif - print_debug_hex32(cpus); - print_debug(" nodes initialized.\r\n"); - return cpus; + + /* optimize physical connections - by LYH */ + static const u8 opt_conn4[] = { + 0,2, + 1,3, + 2,3, + }; + + result.needs_reset = optimize_connection_group(opt_conn4, sizeof(opt_conn4)/sizeof(opt_conn4[0])); + + return result; + } + +#endif /* CONFIG_MAX_CPUS > 2 */ + +#if CONFIG_MAX_CPUS > 4 + +static struct setup_smp_result setup_smp6(int needs_reset) +{ + struct setup_smp_result result; + u8 byte; + uint32_t val; + + result.nodes=6; + result.needs_reset = needs_reset; + + /* Setup and check temporary connection from Node 0 to Node 4 via 2 */ + val = get_row(2,2); + byte = ((val>>16) & 0xfe) - link_connection(2,3) - link_connection(2,0); + byte = get_linkn_last_count(byte); + + if((byte>>2)==0) { /* We should have two coherent link on node 2 for 6p and above*/ + result.nodes = 4; + return result; + } + byte &= 3; /* bit [3,2] is count-2*/ + print_linkn("(2,4) link=", byte); + setup_row_direct(2, 4, byte); + + /* Setup and check temporary connection from Node 0 to Node 4 through 2*/ + for(byte=0; byte<4; byte+=2) { + setup_temp_row(byte,byte+2); + } + + if (!check_connection(7) ) { + print_spew("No connection to Node 4.\r\n"); + result.nodes = 4; + return result; + } + + /* Setup and check temporary connection from Node 0 to Node 5 through 1, 3*/ + val = get_row(3,3); + byte = ((val>>16) & 0xfe) - link_connection(3,2) - link_connection(3,1); + byte = get_linkn_last_count(byte); + if((byte>>2)==0) { /* We should have two coherent links on node 3 for 6p and above*/ + result.nodes = 4; + return result; + } + + byte &= 3; /*bit [3,2] is count-2*/ + print_linkn("(3,5) link=", byte); + setup_row_direct(3, 5, byte); + + setup_temp_row(0,1); /* temp. link between nodes 0 and 1 */ + for(byte=0; byte<4; byte+=2) { + setup_temp_row(byte+1,byte+3); + } + + if (!check_connection(7)) { + print_spew("No connection to Node 5.\r\n"); + result.nodes = 4; + return result; + } + + /* We found 6 nodes so far. Now setup all nodes for 6p */ +#warning "FIXME we need to find out the correct gateway for 6p" + static const u8 conn6_1[] = { +#if !CROSS_BAR_47_56 + 0, 4, 2, + 0, 5, 1, + 1, 4, 3, + 1, 5, 3, + 2, 5, 3, + 3, 4, 5, +#else + 0, 4, 2, 0, + 0, 5, 1, 1, + 1, 4, 3, 1, + 1, 5, 3, 0, + 2, 5, 3, 0, + 3, 4, 2, 0, #endif + }; -#if MAX_CPUS > 1 -static unsigned detect_mp_capabilities(unsigned cpus) + setup_row_indirect_group(conn6_1, sizeof(conn6_1)/sizeof(conn6_1[0])); + + for(byte=0; byte<4; byte+=2) { + setup_temp_row(byte,byte+2); + } + val = pci_read_config32(NODE_HT(7), 0x6c); + byte = (val>>2) & 0x3; /*get default link on 7 to 2*/ + print_linkn("(4,2) link=", byte); + + setup_row_local(7,4); + setup_remote_row_direct(4, 2, byte); + setup_remote_node(4); /* Setup the regs on the remote node */ + /* Set indirect connection to 0, to 3 */ + static const u8 conn6_2[] = { +#if !CROSS_BAR_47_56 + 4, 0, 2, + 4, 1, 2, + 4, 3, 2, +#else + 4, 0, 2, 0, + 4, 1, 2, 0, + 4, 3, 2, 0, + 4, 5, 2, 0, +#endif + }; + + setup_remote_row_indirect_group(conn6_2, sizeof(conn6_2)/sizeof(conn6_2[0])); + + rename_temp_node(4); + enable_routing(4); + + setup_temp_row(0,1); + for(byte=0; byte<4; byte+=2) { + setup_temp_row(byte+1,byte+3); + } + + val = pci_read_config32(NODE_HT(7), 0x6c); + byte = (val>>2) & 0x3; /* get default link on 7 to 3*/ + print_linkn("(5,3) link=", byte); + setup_row_local(7,5); + setup_remote_row_direct(5, 3, byte); + setup_remote_node(5); /* Setup the regs on the remote node */ + +#if !CROSS_BAR_47_56 + /* We need to init link between 4, and 5 direct link */ + val = get_row(4,4); + byte = ((val>>16) & 0xfe) - link_connection(4,2); + byte = get_linkn_last_count(byte); + print_linkn("(4,5) link=", byte & 3); + + setup_row_direct(4,5, byte & 0x3); + setup_temp_row(0,2); + setup_temp_row(2,4); + setup_temp_row(4,5); + check_connection(7); /* to 5*/ + +#if CONFIG_MAX_CPUS > 6 + /* We need to find out which link is to node5 */ + + if((byte>>2)==2) { /* one to node5, one to node2, one to node6*/ + val = get_row(7,5); + if((val>>16) == 1) { /* that link is to node6, because via node 3 node 5 has been set*/ + val = get_row(4,4); + byte = ((val>>16) & 0xfe) - link_connection(4,2); + byte = get_linkn_first(byte); + print_linkn("-->(4,5) link=", byte); + setup_row_direct(4,5,byte); + setup_temp_row(4,5); + check_connection(7); /* to 5*/ + } + } +#endif + + val = pci_read_config32(NODE_HT(7), 0x6c); + byte = (val>>2) & 0x3; /* get default link on 7 to 4*/ + print_linkn("(5,4) link=", byte); + setup_remote_row_direct(5,4, byte); +#endif + + /* Set indirect connection to 0, to 3 for indirect we will use clockwise routing */ + static const u8 conn6_3[] = { +#if !CROSS_BAR_47_56 + 5, 0, 4, + 5, 2, 4, + 5, 1, 3, +#else + 5, 0, 3, 0, + 5, 2, 3, 0, + 5, 1, 3, 0, + 5, 4, 3, 0, +#endif + }; + + setup_remote_row_indirect_group(conn6_3, sizeof(conn6_3)/sizeof(conn6_3[0])); + +/* ready to enable RT for 5 */ + rename_temp_node(5); + enable_routing(5); /* enable routing on node 5 (temp.) */ + +#if 0 + /* We need to do sth about reverse about setup_temp_row (0,1), (2,4), (1, 3), (3,5) + * It will be done by clear_dead_links + */ + for(byte=0; byte<4; byte++) { + clear_temp_row(byte); + } +#endif + + /* optimize physical connections - by LYH */ + static const uint8_t opt_conn6[] ={ + 2, 4, + 3, 5, +#if !CROSS_BAR_47_56 + 4, 5, +#endif + }; + result.needs_reset = optimize_connection_group(opt_conn6, sizeof(opt_conn6)/sizeof(opt_conn6[0])); + + return result; + +} + +#endif /* CONFIG_MAX_CPUS > 4 */ + +#if CONFIG_MAX_CPUS > 6 + +static struct setup_smp_result setup_smp8(int needs_reset) { - unsigned node, row, mask; - bool mp_cap=TRUE; + struct setup_smp_result result; + u8 byte; + uint32_t val; -#if 1 - print_debug("detect_mp_capabilities: "); - print_debug_hex32(cpus); - print_debug("\r\n"); + result.nodes=8; + result.needs_reset = needs_reset; + + /* Setup and check temporary connection from Node 0 to Node 6 via 2 and 4 to 7 */ + val = get_row(4,4); +#if !CROSS_BAR_47_56 + byte = ((val>>16) & 0xfe) - link_connection(4,5) - link_connection(4,2); +#else + byte = ((val>>16) & 0xfe) - link_connection(4,2); #endif - if (cpus>2) - mask=0x06; /* BigMPCap */ - else - mask=0x02; /* MPCap */ - for (node=0; node>2)==0) { /* We should have two or three coherent links on node 4 for 8p*/ + result.nodes = 6; + return result; } + byte &= 3; /* bit [3,2] is count-1 or 2*/ +#else + byte = get_linkn_first(byte); /*Min link to 6*/ +#endif + print_linkn("(4,6) link=", byte); + setup_row_direct(4, 6, byte); - if (mp_cap) - return cpus; + /* Setup and check temporary connection from Node 0 to Node 6 through 2, and 4*/ + for(byte=0; byte<6; byte+=2) { + setup_temp_row(byte,byte+2); + } - /* one of our cpus is not mp capable */ + if (!check_connection(7) ) { + print_spew("No connection to Node 6.\r\n"); + result.nodes = 6; + return result; + } +#if !CROSS_BAR_47_56 + /* Setup and check temporary connection from Node 0 to Node 7 through 1, 3, 5*/ + val = get_row(5,5); + byte = ((val>>16) & 0xfe) - link_connection(5,4) - link_connection(5,3); + byte = get_linkn_first(byte); + print_linkn("(5,7) link=", byte); + setup_row_direct(5, 7, byte); + + setup_temp_row(0,1); /* temp. link between nodes 0 and 1 */ + for(byte=0; byte<6; byte+=2) { + setup_temp_row(byte+1,byte+3); + } +#else + val = get_row(4,4); + byte = ((val>>16) & 0xfe) - link_connection(4,2) - link_connection(4,6); + byte = get_linkn_first(byte); + print_linkn("(4,7) link=", byte); + setup_row_direct(4, 7, byte); + + /* Setup and check temporary connection from Node 0 to Node 7 through 2, and 4*/ + for(byte=0; byte<4; byte+=2) { + setup_temp_row(byte,byte+2); + } + setup_temp_row(4, 7); - print_debug("One of the CPUs is not MP capable. Going back to UP\r\n"); +#endif + + if (!check_connection(7)) { + print_spew("No connection to Node 7.\r\n"); + result.nodes = 6; + return result; + } + + + /* We found 8 nodes so far. Now setup all nodes for 8p */ + static const u8 conn8_1[] = { +#if !CROSS_BAR_47_56 + 0, 6, 2, + /*0, 7, 1,*/ + 1, 6, 3, + /*1, 7, 3,*/ + 2, 6, 4, + /*2, 7, 3,*/ + 3, 6, 5, + /*3, 7, 5,*/ + /*4, 7, 5,*/ +#else + 0, 6, 2, 0, + /*0, 7, 2, 0,*/ + 1, 6, 3, 0, + /*1, 7, 3, 0,*/ + 2, 6, 4, 0, + /*2, 7, 4, 0,*/ + 3, 6, 5, 0, + /*3, 7, 5, 0,*/ +#endif + }; - for (node=cpus; node>0; node--) - for (row=cpus; row>0; row--) - fill_row(NODE_HT(node-1), row-1, DEFAULT); + setup_row_indirect_group(conn8_1,sizeof(conn8_1)/sizeof(conn8_1[0])); + + for(byte=0; byte<6; byte+=2) { + setup_temp_row(byte,byte+2); + } + val = pci_read_config32(NODE_HT(7), 0x6c); + byte = (val>>2) & 0x3; /* get default link on 7 to 4*/ + print_linkn("(6,4) link=", byte); - return setup_uniprocessor(); -} + setup_row_local(7,6); + setup_remote_row_direct(6, 4, byte); + setup_remote_node(6); /* Setup the regs on the remote node */ + /* Set indirect connection to 0, to 3 */ +#warning "FIXME we need to find out the correct gateway for 8p" + static const u8 conn8_2[] = { +#if !CROSS_BAR_47_56 + 6, 0, 4, + 6, 1, 4, + 6, 2, 4, + 6, 3, 4, + 6, 5, 4, +#else + 6, 0, 4, 0, + /*6, 1, 5, 0,*/ + 6, 2, 4, 0, + /*6, 3, 5, 0,*/ +#endif + }; + setup_remote_row_indirect_group(conn8_2, sizeof(conn8_2)/sizeof(conn8_2[0])); + + rename_temp_node(6); + enable_routing(6); + +#if !CROSS_BAR_47_56 + setup_temp_row(0,1); + for(byte=0; byte<6; byte+=2) { + setup_temp_row(byte+1,byte+3); + } + + val = pci_read_config32(NODE_HT(7), 0x6c); + byte = (val>>2) & 0x3; /* get default link on 7 to 5*/ + print_linkn("(7,5) link=", byte); + setup_row_local(7,7); + setup_remote_row_direct(7, 5, byte); + +#else + for(byte=0; byte<4; byte+=2) { + setup_temp_row(byte,byte+2); + } + setup_temp_row(4,7); + val = pci_read_config32(NODE_HT(7), 0x6c); + byte = (val>>2) & 0x3; /* get default link on 7 to 4*/ + print_linkn("(7,4) link=", byte); + setup_row_local(7,7); + setup_remote_row_direct(7, 4, byte); + /* till now 4-7, 7-4 done. */ +#endif + setup_remote_node(7); /* Setup the regs on the remote node */ + +#if CROSS_BAR_47_56 + /* here init 5, 7 */ + /* Setup and check temporary connection from Node 0 to Node 5 through 1, 3, 5*/ + val = get_row(5,5); + byte = ((val>>16) & 0xfe) - link_connection(5,3); + byte = get_linkn_last(byte); + print_linkn("(5,7) link=", byte); + setup_row_direct(5, 7, byte); + + setup_temp_row(0,1); /* temp. link between nodes 0 and 1 */ + for(byte=0; byte<6; byte+=2) { + setup_temp_row(byte+1,byte+3); + } + + if (!check_connection(7)) { + /* We need to recompute link to 7 */ + val = get_row(5,5); + byte = ((val>>16) & 0xfe) - link_connection(5,3); + byte = get_linkn_first(byte); + + print_linkn("-->(5,7) link=", byte); + setup_row_direct(5, 7, byte); +#if 0 + setup_temp_row(0,1); /* temp. link between nodes 0 and 1 */ + for(byte=0; byte<6; byte+=2) { + setup_temp_row(byte+1,byte+3); + } +#else + setup_temp_row(5,7); #endif + check_connection(7); + } + val = pci_read_config32(NODE_HT(7), 0x6c); + byte = (val>>2) & 0x3; /* get default link on 7 to 5*/ + print_linkn("(7,5) link=", byte); + setup_remote_row_direct(7, 5, byte); + /*Till now 57, 75 done */ + + /* init init 5, 6 */ + val = get_row(5,5); + byte = ((val>>16) & 0xfe) - link_connection(5,3) - link_connection(5,7); + byte = get_linkn_first(byte); + print_linkn("(5,6) link=", byte); + setup_row_direct(5, 6, byte); + + /* init 6,7 */ + val = get_row(6,6); + byte = ((val>>16) & 0xfe) - link_connection(6,4); + byte = get_linkn_last(byte); + print_linkn("(6,7) link=", byte); + setup_row_direct(6, 7, byte); + + for(byte=0; byte<6; byte+=2) { + setup_temp_row(byte,byte+2); + } + setup_temp_row(6,7); + + if (!check_connection(7)) { + /* We need to recompute link to 7 */ + val = get_row(6,6); + byte = ((val>>16) & 0xfe) - link_connection(6,4); + byte = get_linkn_first(byte); + print_linkn("-->(6,7) link=", byte); + + setup_row_direct(6, 7, byte); +#if 0 + for(byte=0; byte<6; byte+=2) { + setup_temp_row(byte,byte+2); /* temp. link between nodes 0 and 2 */ + } +#endif + setup_temp_row(6,7); + check_connection(7); + } + val = pci_read_config32(NODE_HT(7), 0x6c); + byte = (val>>2) & 0x3; /* get default link on 7 to 6*/ + print_linkn("(7,6) link=", byte); + + setup_remote_row_direct(7, 6, byte); + /* Till now 67, 76 done*/ + + /* init 6,5 */ + val = get_row(6,6); + byte = ((val>>16) & 0xfe) - link_connection(6,4) - link_connection(6,7); + byte = get_linkn_first(byte); + print_linkn("(6,5) link=", byte); + setup_row_direct(6, 5, byte); + +#endif + +#if !CROSS_BAR_47_56 + /* We need to init link between 6, and 7 direct link */ + val = get_row(6,6); + byte = ((val>>16) & 0xfe) - link_connection(6,4); + byte = get_linkn_first(byte); + print_linkn("(6,7) link=", byte); + setup_row_direct(6,7, byte); + + val = get_row(7,7); + byte = ((val>>16) & 0xfe) - link_connection(7,5); + byte = get_linkn_first(byte); + print_linkn("(7,6) link=", byte); + setup_row_direct(7,6, byte); +#endif + + /* Set indirect connection to 0, to 3 for indirect we will use clockwise routing */ + static const u8 conn8_3[] = { +#if !CROSS_BAR_47_56 + 0, 7, 1, /* restore it*/ + 1, 7, 3, + 2, 7, 3, + 3, 7, 5, + 4, 7, 5, + + 7, 0, 6, + 7, 1, 5, + 7, 2, 6, + 7, 3, 5, + 7, 4, 6, +#else + 0, 7, 2, 0, /* restore it*/ + 1, 7, 3, 0, + 2, 7, 4, 0, + 3, 7, 5, 0, + + 6, 1, 5, 0, /*???*/ + 6, 3, 5, 0, /*???*/ + + 7, 0, 4, 0, + 7, 1, 5, 0, + 7, 2, 4, 0, + 7, 3, 5, 0, + 4, 5, 7, 0, + 5, 4, 6, 0, +#endif + }; + + setup_row_indirect_group(conn8_3, sizeof(conn8_3)/sizeof(conn8_3[0])); + +/* ready to enable RT for Node 7 */ + enable_routing(7); /* enable routing on node 7 (temp.) */ + + + static const uint8_t opt_conn8[] ={ + 4, 6, +#if CROSS_BAR_47_56 + 4, 7, + 5, 6, +#endif + 5, 7, + 6, 7, + }; + /* optimize physical connections - by LYH */ + result.needs_reset = optimize_connection_group(opt_conn8, sizeof(opt_conn8)/sizeof(opt_conn8[0])); + + return result; +} + +#endif /* CONFIG_MAX_CPUS > 6 */ -/* this is a shrunken cpuid. */ -static unsigned int cpuid(unsigned int op) +#if CONFIG_MAX_CPUS > 1 + +static struct setup_smp_result setup_smp(void) { - unsigned int ret; + struct setup_smp_result result; + + print_spew("Enabling SMP settings\r\n"); + + result = setup_smp2(); +#if CONFIG_MAX_CPUS > 2 + result = setup_smp4(result.needs_reset); +#endif + +#if CONFIG_MAX_CPUS > 4 + result = setup_smp6(result.needs_reset); +#endif + +#if CONFIG_MAX_CPUS > 6 + result = setup_smp6(result.needs_reset); +#endif - asm volatile ( "cpuid" : "=a" (ret) : "a" (op)); + print_debug_hex8(result.nodes); + print_debug(" nodes initialized.\r\n"); + + return result; - return ret; } -static void coherent_ht_finalize(unsigned cpus) +static unsigned verify_mp_capabilities(unsigned nodes) { - int node; - bool rev_a0; + unsigned node, mask; - /* set up cpu count and node count and enable Limit - * Config Space Range for all available CPUs. - * Also clear non coherent hypertransport bus range - * registers on Hammer A0 revision. - */ + mask = 0x06; /* BigMPCap */ -#if 1 - print_debug("coherent_ht_finalize\r\n"); + for (node=0; node 2 + case 0x02: /* MPCap */ + if(nodes > 2) { + print_err("Going back to DP\r\n"); + return 2; + } + break; #endif - rev_a0=((cpuid(1)&0xffff)==0x0f10); + case 0x00: /* Non SMP */ + if(nodes >1 ) { + print_err("Going back to UP\r\n"); + return 1; + } + break; + } + + return nodes; - for (node=0; node 6 + if(nodes==8) return;/* don't touch (7,7)*/ +#endif + last_row = nodes; + if (nodes == 1) { + last_row = 0; + } + for(node = 7; node >= 0; node--) { + for(row = 7; row >= last_row; row--) { + fill_row(node, row, DEFAULT); + } + } + + /* Update the local row */ + for( node=0; node> 8) & 0xff)) << 16) | 0x0101); + } +} +#endif /* CONFIG_MAX_CPUS > 1 */ + +static void coherent_ht_finalize(unsigned nodes) +{ + unsigned node; + int rev_a0; + + /* set up cpu count and node count and enable Limit + * Config Space Range for all available CPUs. + * Also clear non coherent hypertransport bus range + * registers on Hammer A0 revision. + */ + + print_spew("coherent_ht_finalize\r\n"); + rev_a0 = is_cpu_rev_a0(); + for (node = 0; node < nodes; node++) { + device_t dev; + uint32_t val; + dev = NODE_HT(node); - val=pci_read_config32(NODE_HT(node), 0x68); - val |= 0x00008000; - pci_write_config32(NODE_HT(node),0x68,val); + /* Set the Total CPU and Node count in the system */ + val = pci_read_config32(dev, 0x60); + val &= (~0x000F0070); + val |= ((nodes-1)<<16)|((nodes-1)<<4); + pci_write_config32(dev, 0x60, val); + + /* Only respond to real cpu pci configuration cycles + * and optimize the HT settings + */ + val=pci_read_config32(dev, 0x68); + val &= ~((HTTC_BUF_REL_PRI_MASK << HTTC_BUF_REL_PRI_SHIFT) | + (HTTC_MED_PRI_BYP_CNT_MASK << HTTC_MED_PRI_BYP_CNT_SHIFT) | + (HTTC_HI_PRI_BYP_CNT_MASK << HTTC_HI_PRI_BYP_CNT_SHIFT)); + val |= HTTC_LIMIT_CLDT_CFG | + (HTTC_BUF_REL_PRI_8 << HTTC_BUF_REL_PRI_SHIFT) | + HTTC_RSP_PASS_PW | + (3 << HTTC_MED_PRI_BYP_CNT_SHIFT) | + (3 << HTTC_HI_PRI_BYP_CNT_SHIFT); + pci_write_config32(dev, 0x68, val); if (rev_a0) { - pci_write_config32(NODE_HT(node),0x94,0); - pci_write_config32(NODE_HT(node),0xb4,0); - pci_write_config32(NODE_HT(node),0xd4,0); + print_spew("shit it is an old cup\n"); + pci_write_config32(dev, 0x94, 0); + pci_write_config32(dev, 0xb4, 0); + pci_write_config32(dev, 0xd4, 0); } } -#if 1 - print_debug("done\n"); -#endif + print_spew("done\r\n"); +} + +static int apply_cpu_errata_fixes(unsigned nodes, int needs_reset) +{ + unsigned node; + for(node = 0; node < nodes; node++) { + device_t dev; + uint32_t cmd; + dev = NODE_MC(node); + if (is_cpu_pre_c0()) { + + /* Errata 66 + * Limit the number of downstream posted requests to 1 + */ + cmd = pci_read_config32(dev, 0x70); + if ((cmd & (3 << 0)) != 2) { + cmd &= ~(3<<0); + cmd |= (2<<0); + pci_write_config32(dev, 0x70, cmd ); + needs_reset = 1; + } + cmd = pci_read_config32(dev, 0x7c); + if ((cmd & (3 << 4)) != 0) { + cmd &= ~(3<<4); + cmd |= (0<<4); + pci_write_config32(dev, 0x7c, cmd ); + needs_reset = 1; + } + /* Clock Power/Timing Low */ + cmd = pci_read_config32(dev, 0xd4); + if (cmd != 0x000D0001) { + cmd = 0x000D0001; + pci_write_config32(dev, 0xd4, cmd); + needs_reset = 1; /* Needed? */ + } + + } + else { + uint32_t cmd_ref; + /* Errata 98 + * Set Clk Ramp Hystersis to 7 + * Clock Power/Timing Low + */ + cmd_ref = 0x04e20707; /* Registered */ + cmd = pci_read_config32(dev, 0xd4); + if(cmd != cmd_ref) { + pci_write_config32(dev, 0xd4, cmd_ref ); + needs_reset = 1; /* Needed? */ + } + } + } + return needs_reset; +} + +static int optimize_link_read_pointers(unsigned nodes, int needs_reset) +{ + unsigned node; + for(node = 0; node < nodes; node++) { + device_t f0_dev, f3_dev; + uint32_t cmd_ref, cmd; + int link; + f0_dev = NODE_HT(node); + f3_dev = NODE_MC(node); + cmd_ref = cmd = pci_read_config32(f3_dev, 0xdc); + for(link = 0; link < 3; link++) { + uint32_t link_type; + unsigned reg; + /* This works on an Athlon64 because unimplemented links return 0 */ + reg = 0x98 + (link * 0x20); + link_type = pci_read_config32(f0_dev, reg); + if ((link_type & 7) == 3) { + cmd &= ~(0xff << (link *8)); + cmd |= 0x25 << (link *8); + } + } + if (cmd != cmd_ref) { + pci_write_config32(f3_dev, 0xdc, cmd); + needs_reset = 1; + } + } + return needs_reset; } -static void setup_coherent_ht_domain(void) +static int setup_coherent_ht_domain(void) { - unsigned cpus; + struct setup_smp_result result; enable_bsp_routing(); -#if MAX_CPUS == 1 - cpus=setup_uniprocessor(); +#if CONFIG_MAX_CPUS > 1 + result = setup_smp(); + result.nodes = verify_mp_capabilities(result.nodes); + clear_dead_routes(result.nodes); #else - cpus=setup_smp(); - cpus=detect_mp_capabilities(cpus); + result.nodes = 1; + result.needs_reset = 0; #endif - coherent_ht_finalize(cpus); -} -#endif + if (result.nodes == 1) { + setup_uniprocessor(); + } + coherent_ht_finalize(result.nodes); + result.needs_reset = apply_cpu_errata_fixes(result.nodes, result.needs_reset); + result.needs_reset = optimize_link_read_pointers(result.nodes, result.needs_reset); + return result.needs_reset; +}