/* * This file is part of the coreboot project. * * Copyright (C) 2010 Advanced Micro Devices, Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ /****************************************************************************** Description: Receiver En and DQS Timing Training feature for DDR 3 MCT ******************************************************************************/ static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 Pass); static u8 mct_SavePassRcvEnDly_D(struct DCTStatStruc *pDCTstat, u8 rcvrEnDly, u8 Channel, u8 receiver, u8 Pass); static u8 mct_CompareTestPatternQW0_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u32 addr, u8 channel, u8 pattern, u8 Pass); static void mct_InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat); static void InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 Channel); static void CalcEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 Channel); static void mct_SetFinalRcvrEnDly_D(struct DCTStatStruc *pDCTstat, u8 RcvrEnDly, u8 where, u8 Channel, u8 Receiver, u32 dev, u32 index_reg, u8 Addl_Index, u8 Pass); static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u8 DQSRcvEnDly); static void fenceDynTraining_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 dct); static void mct_DisableDQSRcvEn_D(struct DCTStatStruc *pDCTstat); /* Warning: These must be located so they do not cross a logical 16-bit segment boundary! */ const static u32 TestPattern0_D[] = { 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, }; const static u32 TestPattern1_D[] = { 0x55555555, 0x55555555, 0x55555555, 0x55555555, 0x55555555, 0x55555555, 0x55555555, 0x55555555, 0x55555555, 0x55555555, 0x55555555, 0x55555555, 0x55555555, 0x55555555, 0x55555555, 0x55555555, }; const static u32 TestPattern2_D[] = { 0x12345678, 0x87654321, 0x23456789, 0x98765432, 0x59385824, 0x30496724, 0x24490795, 0x99938733, 0x40385642, 0x38465245, 0x29432163, 0x05067894, 0x12349045, 0x98723467, 0x12387634, 0x34587623, }; static void SetupRcvrPattern(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u32 *buffer, u8 pass) { /* * 1. Copy the alpha and Beta patterns from ROM to Cache, * aligning on 16 byte boundary * 2. Set the ptr to DCTStatstruc.PtrPatternBufA for Alpha * 3. Set the ptr to DCTStatstruc.PtrPatternBufB for Beta */ u32 *buf_a; u32 *buf_b; u32 *p_A; u32 *p_B; u8 i; buf_a = (u32 *)(((u32)buffer + 0x10) & (0xfffffff0)); buf_b = buf_a + 32; /* ?? */ p_A = (u32 *)SetupDqsPattern_1PassB(pass); p_B = (u32 *)SetupDqsPattern_1PassA(pass); for(i=0;i<16;i++) { buf_a[i] = p_A[i]; buf_b[i] = p_B[i]; } pDCTstat->PtrPatternBufA = (u32)buf_a; pDCTstat->PtrPatternBufB = (u32)buf_b; } void mct_TrainRcvrEn_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 Pass) { if(mct_checkNumberOfDqsRcvEn_1Pass(Pass)) dqsTrainRcvrEn_SW(pMCTstat, pDCTstat, Pass); } static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 Pass) { u8 Channel, RcvrEnDly, RcvrEnDlyRmin; u8 Test0, Test1, CurrTest, CurrTestSide0, CurrTestSide1; u8 CTLRMaxDelay, _2Ranks, PatternA, PatternB; u8 Addl_Index = 0; u8 Receiver; u8 _DisableDramECC = 0, _Wrap32Dis = 0, _SSE2 = 0; u8 RcvrEnDlyLimit, Final_Value, MaxDelay_CH[2]; u32 TestAddr0, TestAddr1, TestAddr0B, TestAddr1B; u32 PatternBuffer[64+4]; /* FIXME: need increase 8? */ u32 Errors; u32 val; u32 reg; u32 dev; u32 index_reg; u32 ch_start, ch_end, ch; u32 msr; u32 cr4; u32 lo, hi; u8 valid; u32 tmp; u8 LastTest; print_debug_dqs("\nTrainRcvEn: Node", pDCTstat->Node_ID, 0); print_debug_dqs("TrainRcvEn: Pass", Pass, 0); dev = pDCTstat->dev_dct; ch_start = 0; if(!pDCTstat->GangedMode) { ch_end = 2; } else { ch_end = 1; } for (ch = ch_start; ch < ch_end; ch++) { reg = 0x78 + (0x100 * ch); val = Get_NB32(dev, reg); val &= ~(0x3ff << 22); val |= (0x0c8 << 22); /* Max Rd Lat */ Set_NB32(dev, reg, val); } Final_Value = 1; if (Pass == FirstPass) { mct_InitDQSPos4RcvrEn_D(pMCTstat, pDCTstat); } else { pDCTstat->DimmTrainFail = 0; pDCTstat->CSTrainFail = ~pDCTstat->CSPresent; } cr4 = read_cr4(); if(cr4 & ( 1 << 9)) { /* save the old value */ _SSE2 = 1; } cr4 |= (1 << 9); /* OSFXSR enable SSE2 */ write_cr4(cr4); msr = HWCR; _RDMSR(msr, &lo, &hi); /* FIXME: Why use SSEDIS */ if(lo & (1 << 17)) { /* save the old value */ _Wrap32Dis = 1; } lo |= (1 << 17); /* HWCR.wrap32dis */ lo &= ~(1 << 15); /* SSEDIS */ _WRMSR(msr, lo, hi); /* Setting wrap32dis allows 64-bit memory references in real mode */ _DisableDramECC = mct_DisableDimmEccEn_D(pMCTstat, pDCTstat); SetupRcvrPattern(pMCTstat, pDCTstat, PatternBuffer, Pass); Errors = 0; dev = pDCTstat->dev_dct; CTLRMaxDelay = 0; for (Channel = 0; Channel < 2; Channel++) { print_debug_dqs("\tTrainRcvEn51: Node ", pDCTstat->Node_ID, 1); print_debug_dqs("\tTrainRcvEn51: Channel ", Channel, 1); pDCTstat->Channel = Channel; MaxDelay_CH[Channel] = 0; index_reg = 0x98 + 0x100 * Channel; Receiver = mct_InitReceiver_D(pDCTstat, Channel); /* There are four receiver pairs, loosely associated with chipselects. */ for (; Receiver < 8; Receiver += 2) { Addl_Index = (Receiver >> 1) * 3 + 0x10; LastTest = DQS_FAIL; /* mct_ModifyIndex_D */ RcvrEnDlyRmin = RcvrEnDlyLimit = 0xff; print_debug_dqs("\t\tTrainRcvEnd52: index ", Addl_Index, 2); if(!mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, Receiver)) { continue; } TestAddr0 = mct_GetRcvrSysAddr_D(pMCTstat, pDCTstat, Channel, Receiver, &valid); if(!valid) { /* Address not supported on current CS */ continue; } TestAddr0B = TestAddr0 + (BigPagex8_RJ8 << 3); if(mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, Receiver+1)) { TestAddr1 = mct_GetRcvrSysAddr_D(pMCTstat, pDCTstat, Channel, Receiver+1, &valid); if(!valid) { /* Address not supported on current CS */ continue; } TestAddr1B = TestAddr1 + (BigPagex8_RJ8 << 3); _2Ranks = 1; } else { _2Ranks = TestAddr1 = TestAddr1B = 0; } print_debug_dqs("\t\tTrainRcvEn53: TestAddr0 ", TestAddr0, 2); print_debug_dqs("\t\tTrainRcvEn53: TestAddr0B ", TestAddr0B, 2); print_debug_dqs("\t\tTrainRcvEn53: TestAddr1 ", TestAddr1, 2); print_debug_dqs("\t\tTrainRcvEn53: TestAddr1B ", TestAddr1B, 2); /* * Get starting RcvrEnDly value */ RcvrEnDly = mct_Get_Start_RcvrEnDly_1Pass(Pass); /* mct_GetInitFlag_D*/ if (Pass == FirstPass) { pDCTstat->DqsRcvEn_Pass = 0; } else { pDCTstat->DqsRcvEn_Pass=0xFF; } pDCTstat->DqsRcvEn_Saved = 0; while(RcvrEnDly < RcvrEnDlyLimit) { /* sweep Delay value here */ print_debug_dqs("\t\t\tTrainRcvEn541: RcvrEnDly ", RcvrEnDly, 3); /* callback not required if(mct_AdjustDelay_D(pDCTstat, RcvrEnDly)) goto skipDly; */ /* Odd steps get another pattern such that even and odd steps alternate. The pointers to the patterns will be swaped at the end of the loop so that they correspond. */ if(RcvrEnDly & 1) { PatternA = 1; PatternB = 0; } else { /* Even step */ PatternA = 0; PatternB = 1; } mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0, PatternA); /* rank 0 of DIMM, testpattern 0 */ mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0B, PatternB); /* rank 0 of DIMM, testpattern 1 */ if(_2Ranks) { mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1, PatternA); /*rank 1 of DIMM, testpattern 0 */ mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1B, PatternB); /*rank 1 of DIMM, testpattern 1 */ } mct_SetRcvrEnDly_D(pDCTstat, RcvrEnDly, 0, Channel, Receiver, dev, index_reg, Addl_Index, Pass); CurrTest = DQS_FAIL; CurrTestSide0 = DQS_FAIL; CurrTestSide1 = DQS_FAIL; mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0); /*cache fills */ Test0 = mct_CompareTestPatternQW0_D(pMCTstat, pDCTstat, TestAddr0, Channel, PatternA, Pass);/* ROM vs cache compare */ proc_IOCLFLUSH_D(TestAddr0); ResetDCTWrPtr_D(dev, index_reg, Addl_Index); print_debug_dqs("\t\t\tTrainRcvEn542: Test0 result ", Test0, 3); /* != 0x00 mean pass */ if(Test0 == DQS_PASS) { mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0B); /*cache fills */ /* ROM vs cache compare */ Test1 = mct_CompareTestPatternQW0_D(pMCTstat, pDCTstat, TestAddr0B, Channel, PatternB, Pass); proc_IOCLFLUSH_D(TestAddr0B); ResetDCTWrPtr_D(dev, index_reg, Addl_Index); print_debug_dqs("\t\t\tTrainRcvEn543: Test1 result ", Test1, 3); if(Test1 == DQS_PASS) { CurrTestSide0 = DQS_PASS; } } if(_2Ranks) { mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1); /*cache fills */ /* ROM vs cache compare */ Test0 = mct_CompareTestPatternQW0_D(pMCTstat, pDCTstat, TestAddr1, Channel, PatternA, Pass); proc_IOCLFLUSH_D(TestAddr1); ResetDCTWrPtr_D(dev, index_reg, Addl_Index); print_debug_dqs("\t\t\tTrainRcvEn544: Test0 result ", Test0, 3); if(Test0 == DQS_PASS) { mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1B); /*cache fills */ /* ROM vs cache compare */ Test1 = mct_CompareTestPatternQW0_D(pMCTstat, pDCTstat, TestAddr1B, Channel, PatternB, Pass); proc_IOCLFLUSH_D(TestAddr1B); ResetDCTWrPtr_D(dev, index_reg, Addl_Index); print_debug_dqs("\t\t\tTrainRcvEn545: Test1 result ", Test1, 3); if(Test1 == DQS_PASS) { CurrTestSide1 = DQS_PASS; } } } if(_2Ranks) { if ((CurrTestSide0 == DQS_PASS) && (CurrTestSide1 == DQS_PASS)) { CurrTest = DQS_PASS; } } else if (CurrTestSide0 == DQS_PASS) { CurrTest = DQS_PASS; } /* record first pass DqsRcvEn to stack */ valid = mct_SavePassRcvEnDly_D(pDCTstat, RcvrEnDly, Channel, Receiver, Pass); /* Break(1:RevF,2:DR) or not(0) FIXME: This comment deosn't make sense */ if(valid == 2 || (LastTest == DQS_FAIL && valid == 1)) { RcvrEnDlyRmin = RcvrEnDly; break; } LastTest = CurrTest; /* swap the rank 0 pointers */ tmp = TestAddr0; TestAddr0 = TestAddr0B; TestAddr0B = tmp; /* swap the rank 1 pointers */ tmp = TestAddr1; TestAddr1 = TestAddr1B; TestAddr1B = tmp; print_debug_dqs("\t\t\tTrainRcvEn56: RcvrEnDly ", RcvrEnDly, 3); RcvrEnDly++; } /* while RcvrEnDly */ print_debug_dqs("\t\tTrainRcvEn61: RcvrEnDly ", RcvrEnDly, 2); print_debug_dqs("\t\tTrainRcvEn61: RcvrEnDlyRmin ", RcvrEnDlyRmin, 3); print_debug_dqs("\t\tTrainRcvEn61: RcvrEnDlyLimit ", RcvrEnDlyLimit, 3); if(RcvrEnDlyRmin == RcvrEnDlyLimit) { /* no passing window */ pDCTstat->ErrStatus |= 1 << SB_NORCVREN; Errors |= 1 << SB_NORCVREN; pDCTstat->ErrCode = SC_FatalErr; } if(RcvrEnDly > (RcvrEnDlyLimit - 1)) { /* passing window too narrow, too far delayed*/ pDCTstat->ErrStatus |= 1 << SB_SmallRCVR; Errors |= 1 << SB_SmallRCVR; pDCTstat->ErrCode = SC_FatalErr; RcvrEnDly = RcvrEnDlyLimit - 1; pDCTstat->CSTrainFail |= 1 << Receiver; pDCTstat->DimmTrainFail |= 1 << (Receiver + Channel); } /* CHB_D0_B0_RCVRDLY set in mct_Average_RcvrEnDly_Pass */ mct_Average_RcvrEnDly_Pass(pDCTstat, RcvrEnDly, RcvrEnDlyLimit, Channel, Receiver, Pass); mct_SetFinalRcvrEnDly_D(pDCTstat, RcvrEnDly, Final_Value, Channel, Receiver, dev, index_reg, Addl_Index, Pass); if(pDCTstat->ErrStatus & (1 << SB_SmallRCVR)) { Errors |= 1 << SB_SmallRCVR; } RcvrEnDly += Pass1MemClkDly; if(RcvrEnDly > CTLRMaxDelay) { CTLRMaxDelay = RcvrEnDly; } } /* while Receiver */ MaxDelay_CH[Channel] = CTLRMaxDelay; } /* for Channel */ CTLRMaxDelay = MaxDelay_CH[0]; if (MaxDelay_CH[1] > CTLRMaxDelay) CTLRMaxDelay = MaxDelay_CH[1]; for (Channel = 0; Channel < 2; Channel++) { mct_SetMaxLatency_D(pDCTstat, Channel, CTLRMaxDelay); /* program Ch A/B MaxAsyncLat to correspond with max delay */ } ResetDCTWrPtr_D(dev, index_reg, Addl_Index); if(_DisableDramECC) { mct_EnableDimmEccEn_D(pMCTstat, pDCTstat, _DisableDramECC); } if (Pass == FirstPass) { /*Disable DQSRcvrEn training mode */ mct_DisableDQSRcvEn_D(pDCTstat); } if(!_Wrap32Dis) { msr = HWCR; _RDMSR(msr, &lo, &hi); lo &= ~(1<<17); /* restore HWCR.wrap32dis */ _WRMSR(msr, lo, hi); } if(!_SSE2){ cr4 = read_cr4(); cr4 &= ~(1<<9); /* restore cr4.OSFXSR */ write_cr4(cr4); } #if DQS_TRAIN_DEBUG > 0 { u8 Channel; printk(BIOS_DEBUG, "TrainRcvrEn: CH_MaxRdLat:\n"); for(Channel = 0; Channel<2; Channel++) { printk(BIOS_DEBUG, "Channel:%x: %x\n", Channel, pDCTstat->CH_MaxRdLat[Channel]); } } #endif #if DQS_TRAIN_DEBUG > 0 { u8 val; u8 Channel, Receiver; u8 i; u8 *p; printk(BIOS_DEBUG, "TrainRcvrEn: CH_D_B_RCVRDLY:\n"); for(Channel = 0; Channel < 2; Channel++) { printk(BIOS_DEBUG, "Channel:%x\n", Channel); for(Receiver = 0; Receiver<8; Receiver+=2) { printk(BIOS_DEBUG, "\t\tReceiver:%x:", Receiver); p = pDCTstat->CH_D_B_RCVRDLY[Channel][Receiver>>1]; for (i=0;i<8; i++) { val = p[i]; printk(BIOS_DEBUG, "%x ", val); } printk(BIOS_DEBUG, "\n"); } } } #endif printk(BIOS_DEBUG, "TrainRcvrEn: Status %x\n", pDCTstat->Status); printk(BIOS_DEBUG, "TrainRcvrEn: ErrStatus %x\n", pDCTstat->ErrStatus); printk(BIOS_DEBUG, "TrainRcvrEn: ErrCode %x\n", pDCTstat->ErrCode); printk(BIOS_DEBUG, "TrainRcvrEn: Done\n\n"); } u8 mct_InitReceiver_D(struct DCTStatStruc *pDCTstat, u8 dct) { if (pDCTstat->DIMMValidDCT[dct] == 0 ) { return 8; } else { return 0; } } static void mct_SetFinalRcvrEnDly_D(struct DCTStatStruc *pDCTstat, u8 RcvrEnDly, u8 where, u8 Channel, u8 Receiver, u32 dev, u32 index_reg, u8 Addl_Index, u8 Pass/*, u8 *p*/) { /* * Program final DqsRcvEnDly to additional index for DQS receiver * enabled delay */ mct_SetRcvrEnDly_D(pDCTstat, RcvrEnDly, where, Channel, Receiver, dev, index_reg, Addl_Index, Pass); } static void mct_DisableDQSRcvEn_D(struct DCTStatStruc *pDCTstat) { u8 ch_end, ch; u32 reg; u32 dev; u32 val; dev = pDCTstat->dev_dct; if (pDCTstat->GangedMode) { ch_end = 1; } else { ch_end = 2; } for (ch=0; chStatus |= 1 << SB_DQSRcvLimit; } /* DimmOffset not needed for CH_D_B_RCVRDLY array */ for(i=0; i < 8; i++) { if(FinalValue) { /*calculate dimm offset */ p = pDCTstat->CH_D_B_RCVRDLY[Channel][Receiver >> 1]; RcvrEnDly = p[i]; } /* if flag=0, set DqsRcvEn value to reg. */ /* get the register index from table */ index = Table_DQSRcvEn_Offset[i >> 1]; index += Addl_Index; /* DIMMx DqsRcvEn byte0 */ val = Get_NB32_index_wait(dev, index_reg, index); if(i & 1) { /* odd byte lane */ val &= ~(0xFF << 16); val |= (RcvrEnDly << 16); } else { /* even byte lane */ val &= ~0xFF; val |= RcvrEnDly; } Set_NB32_index_wait(dev, index_reg, index, val); } } static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u8 DQSRcvEnDly) { u32 dev; u32 reg; u16 SubTotal; u32 index_reg; u32 reg_off; u32 val; u32 valx; if(pDCTstat->GangedMode) Channel = 0; dev = pDCTstat->dev_dct; reg_off = 0x100 * Channel; index_reg = 0x98 + reg_off; /* Multiply the CAS Latency by two to get a number of 1/2 MEMCLKs units.*/ val = Get_NB32(dev, 0x88 + reg_off); SubTotal = ((val & 0x0f) + 4) << 1; /* SubTotal is 1/2 Memclk unit */ /* If registered DIMMs are being used then * add 1 MEMCLK to the sub-total. */ val = Get_NB32(dev, 0x90 + reg_off); if(!(val & (1 << UnBuffDimm))) SubTotal += 2; /* If the address prelaunch is setup for 1/2 MEMCLKs then * add 1, else add 2 to the sub-total. * if (AddrCmdSetup || CsOdtSetup || CkeSetup) then K := K + 2; */ val = Get_NB32_index_wait(dev, index_reg, 0x04); if(!(val & 0x00202020)) SubTotal += 1; else SubTotal += 2; /* If the F2x[1, 0]78[RdPtrInit] field is 4, 5, 6 or 7 MEMCLKs, * then add 4, 3, 2, or 1 MEMCLKs, respectively to the sub-total. */ val = Get_NB32(dev, 0x78 + reg_off); SubTotal += 8 - (val & 0x0f); /* Convert bits 7-5 (also referred to as the course delay) of * the current (or worst case) DQS receiver enable delay to * 1/2 MEMCLKs units, rounding up, and add this to the sub-total. */ SubTotal += DQSRcvEnDly >> 5; /*BOZO-no rounding up */ /* Add 5.5 to the sub-total. 5.5 represents part of the * processor specific constant delay value in the DRAM * clock domain. */ SubTotal <<= 1; /*scale 1/2 MemClk to 1/4 MemClk */ SubTotal += 11; /*add 5.5 1/2MemClk */ /* Convert the sub-total (in 1/2 MEMCLKs) to northbridge * clocks (NCLKs) as follows (assuming DDR400 and assuming * that no P-state or link speed changes have occurred). */ /* New formula: * SubTotal *= 3*(Fn2xD4[NBFid]+4)/(3+Fn2x94[MemClkFreq])/2 */ val = Get_NB32(dev, 0x94 + reg_off); /* SubTotal div 4 to scale 1/4 MemClk back to MemClk */ val &= 7; if (val >= 3) { val <<= 1; } else val += 3; valx = val << 2; val = Get_NB32(pDCTstat->dev_nbmisc, 0xD4); SubTotal *= ((val & 0x1f) + 4 ) * 3; SubTotal /= valx; if (SubTotal % valx) { /* round up */ SubTotal++; } /* Add 5 NCLKs to the sub-total. 5 represents part of the * processor specific constant value in the northbridge * clock domain. */ SubTotal += 5; pDCTstat->CH_MaxRdLat[Channel] = SubTotal; if(pDCTstat->GangedMode) { pDCTstat->CH_MaxRdLat[1] = SubTotal; } /* Program the F2x[1, 0]78[MaxRdLatency] register with * the total delay value (in NCLKs). */ reg = 0x78 + reg_off; val = Get_NB32(dev, reg); val &= ~(0x3ff << 22); val |= (SubTotal & 0x3ff) << 22; /* program MaxRdLatency to correspond with current delay */ Set_NB32(dev, reg, val); } static u8 mct_SavePassRcvEnDly_D(struct DCTStatStruc *pDCTstat, u8 rcvrEnDly, u8 Channel, u8 receiver, u8 Pass) { u8 i; u8 mask_Saved, mask_Pass; u8 *p; /* calculate dimm offset * not needed for CH_D_B_RCVRDLY array */ /* cmp if there has new DqsRcvEnDly to be recorded */ mask_Pass = pDCTstat->DqsRcvEn_Pass; if(Pass == SecondPass) { mask_Pass = ~mask_Pass; } mask_Saved = pDCTstat->DqsRcvEn_Saved; if(mask_Pass != mask_Saved) { /* find desired stack offset according to channel/dimm/byte */ if(Pass == SecondPass) { /* FIXME: SecondPass is never used for Barcelona p = pDCTstat->CH_D_B_RCVRDLY_1[Channel][receiver>>1]; */ p = 0; /* Keep the compiler happy. */ } else { mask_Saved &= mask_Pass; p = pDCTstat->CH_D_B_RCVRDLY[Channel][receiver>>1]; } for(i=0; i < 8; i++) { /* cmp per byte lane */ if(mask_Pass & (1 << i)) { if(!(mask_Saved & (1 << i))) { /* save RcvEnDly to stack, according to the related Dimm/byte lane */ p[i] = (u8)rcvrEnDly; mask_Saved |= 1 << i; } } } pDCTstat->DqsRcvEn_Saved = mask_Saved; } return mct_SaveRcvEnDly_D_1Pass(pDCTstat, Pass); } static u8 mct_CompareTestPatternQW0_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u32 addr, u8 channel, u8 pattern, u8 Pass) { /* Compare only the first beat of data. Since target addrs are cache * line aligned, the Channel parameter is used to determine which * cache QW to compare. */ u8 *test_buf; u8 i; u8 result; u8 value; if(Pass == FirstPass) { if(pattern==1) { test_buf = (u8 *)TestPattern1_D; } else { test_buf = (u8 *)TestPattern0_D; } } else { /* Second Pass */ test_buf = (u8 *)TestPattern2_D; } SetUpperFSbase(addr); addr <<= 8; if((pDCTstat->Status & (1<DqsRcvEn_Pass |= (1<DqsRcvEn_Pass &= ~(1<DqsRcvEn_Pass != 0) { result = DQS_PASS; } else { result = DQS_FAIL; } } else { /* if second pass, at least one byte lane fail * ,then DQS_FAIL=1 and will set to related reg. */ if(pDCTstat->DqsRcvEn_Pass != 0xFF) { result = DQS_FAIL; } else { result = DQS_PASS; } } /* if second pass, we can't find the fail until FFh, * then let it fail to save the final delay */ if((Pass == SecondPass) && (pDCTstat->Status & (1 << SB_DQSRcvLimit))) { result = DQS_FAIL; pDCTstat->DqsRcvEn_Pass = 0; } /* second pass needs to be inverted * FIXME? this could be inverted in the above code to start with... */ if(Pass == SecondPass) { if (result == DQS_PASS) { result = DQS_FAIL; } else if (result == DQS_FAIL) { /* FIXME: doesn't need to be else if */ result = DQS_PASS; } } return result; } static void mct_InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat) { /* Initialize the DQS Positions in preparation for * Receiver Enable Training. * Write Position is 1/2 Memclock Delay * Read Position is 1/2 Memclock Delay */ u8 i; for(i=0;i<2; i++){ InitDQSPos4RcvrEn_D(pMCTstat, pDCTstat, i); } } static void InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 Channel) { /* Initialize the DQS Positions in preparation for * Receiver Enable Training. * Write Position is no Delay * Read Position is 1/2 Memclock Delay */ u8 i, j; u32 dword; u8 dn = 4; /* TODO: Rev C could be 4 */ u32 dev = pDCTstat->dev_dct; u32 index_reg = 0x98 + 0x100 * Channel; /* FIXME: add Cx support */ dword = 0x00000000; for(i=1; i<=3; i++) { for(j=0; jdev_dct; index_reg = 0x98 + Channel * 0x100; index = 0x12; p = pDCTstat->CH_D_BC_RCVRDLY[Channel]; print_debug_dqs("\t\tSetEccDQSRcvrPos: Channel ", Channel, 2); for(ChipSel = 0; ChipSel < MAX_CS_SUPPORTED; ChipSel += 2) { val = p[ChipSel>>1]; Set_NB32_index_wait(dev, index_reg, index, val); print_debug_dqs_pair("\t\tSetEccDQSRcvrPos: ChipSel ", ChipSel, " rcvr_delay ", val, 2); index += 3; } } static void CalcEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 Channel) { u8 ChipSel; u16 EccDQSLike; u8 EccDQSScale; u32 val, val0, val1; EccDQSLike = pDCTstat->CH_EccDQSLike[Channel]; EccDQSScale = pDCTstat->CH_EccDQSScale[Channel]; for (ChipSel = 0; ChipSel < MAX_CS_SUPPORTED; ChipSel += 2) { if(mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, ChipSel)) { u8 *p; p = pDCTstat->CH_D_B_RCVRDLY[Channel][ChipSel>>1]; /* DQS Delay Value of Data Bytelane * most like ECC byte lane */ val0 = p[EccDQSLike & 0x07]; /* DQS Delay Value of Data Bytelane * 2nd most like ECC byte lane */ val1 = p[(EccDQSLike>>8) & 0x07]; if (!(pDCTstat->Status & (1 << SB_Registered))) { if(val0 > val1) { val = val0 - val1; } else { val = val1 - val0; } val *= ~EccDQSScale; val >>= 8; /* /256 */ if(val0 > val1) { val -= val1; } else { val += val0; } } else { val = val1 - val0; val += val1; } pDCTstat->CH_D_BC_RCVRDLY[Channel][ChipSel>>1] = val; } } SetEccDQSRcvrEn_D(pDCTstat, Channel); } void mctSetEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA) { u8 Node; u8 i; for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) { struct DCTStatStruc *pDCTstat; pDCTstat = pDCTstatA + Node; if (!pDCTstat->NodePresent) break; if (pDCTstat->DCTSysLimit) { for(i=0; i<2; i++) CalcEccDQSRcvrEn_D(pMCTstat, pDCTstat, i); } } } void phyAssistedMemFnceTraining(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA) { u8 Node = 0; struct DCTStatStruc *pDCTstat; /* FIXME: skip for Ax */ while (Node < MAX_NODES_SUPPORTED) { pDCTstat = pDCTstatA + Node; if(pDCTstat->DCTSysLimit) { fenceDynTraining_D(pMCTstat, pDCTstat, 0); fenceDynTraining_D(pMCTstat, pDCTstat, 1); } Node++; } } static void fenceDynTraining_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 dct) { u16 avRecValue; u32 val; u32 dev; u32 index_reg = 0x98 + 0x100 * dct; u32 index; /* BIOS first programs a seed value to the phase recovery engine * (recommended 19) registers. * Dram Phase Recovery Control Register (F2x[1,0]9C_x[51:50] and * F2x[1,0]9C_x52.) . */ dev = pDCTstat->dev_dct; for (index = 0x50; index <= 0x52; index ++) { val = (FenceTrnFinDlySeed & 0x1F); if (index != 0x52) { val |= val << 8 | val << 16 | val << 24; } Set_NB32_index_wait(dev, index_reg, index, val); } /* Set F2x[1,0]9C_x08[PhyFenceTrEn]=1. */ val = Get_NB32_index_wait(dev, index_reg, 0x08); val |= 1 << PhyFenceTrEn; Set_NB32_index_wait(dev, index_reg, 0x08, val); /* Wait 200 MEMCLKs. */ mct_Wait(50000); /* wait 200us */ /* Clear F2x[1,0]9C_x08[PhyFenceTrEn]=0. */ val = Get_NB32_index_wait(dev, index_reg, 0x08); val &= ~(1 << PhyFenceTrEn); Set_NB32_index_wait(dev, index_reg, 0x08, val); /* BIOS reads the phase recovery engine registers * F2x[1,0]9C_x[51:50] and F2x[1,0]9C_x52. */ avRecValue = 0; for (index = 0x50; index <= 0x52; index ++) { val = Get_NB32_index_wait(dev, index_reg, index); avRecValue += val & 0x7F; if (index != 0x52) { avRecValue += (val >> 8) & 0x7F; avRecValue += (val >> 16) & 0x7F; avRecValue += (val >> 24) & 0x7F; } } val = avRecValue / 9; if (avRecValue % 9) val++; avRecValue = val; /* Write the (averaged value -8) to F2x[1,0]9C_x0C[PhyFence]. */ /* inlined mct_AdjustFenceValue() */ /* The RBC0 is not supported. */ /* if (pDCTstat->LogicalCPUID & AMD_RB_C0) avRecValue -= 3; else */ if (pDCTstat->LogicalCPUID & AMD_DR_Cx) avRecValue -= 8; else if (pDCTstat->LogicalCPUID & AMD_DR_Bx) avRecValue -= 8; val = Get_NB32_index_wait(dev, index_reg, 0x0C); val &= ~(0x1F << 16); val |= (avRecValue & 0x1F) << 16; Set_NB32_index_wait(dev, index_reg, 0x0C, val); /* Rewrite F2x[1,0]9C_x04-DRAM Address/Command Timing Control Register * delays (both channels). */ val = Get_NB32_index_wait(dev, index_reg, 0x04); Set_NB32_index_wait(dev, index_reg, 0x04, val); } void mct_Wait(u32 cycles) { u32 saved; u32 hi, lo, msr; /* Wait # of 50ns cycles This seems like a hack to me... */ cycles <<= 3; /* x8 (number of 1.25ns ticks) */ msr = 0x10; /* TSC */ _RDMSR(msr, &lo, &hi); saved = lo; do { _RDMSR(msr, &lo, &hi); } while (lo - saved < cycles ); }