blob: 4397ebaccbced484c1d6b555416310413985dc4c [file] [log] [blame]
/*
* This file is part of the coreboot project.
*
* Copyright (C) 2010 Advanced Micro Devices, Inc.
* Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
/******************************************************************************
Description: Receiver En and DQS Timing Training feature for DDR 3 MCT
******************************************************************************/
static int32_t abs(int32_t val);
static void dqsTrainRcvrEn_SW_Fam10(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstat, u8 Pass);
static void dqsTrainRcvrEn_SW_Fam15(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstat, u8 Pass);
static void mct_InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstat);
static void InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstat, u8 Channel);
static void CalcEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstat, u8 Channel);
static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u16 DQSRcvEnDly);
static uint32_t fenceDynTraining_D(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstat, u8 dct);
static void mct_DisableDQSRcvEn_D(struct DCTStatStruc *pDCTstat);
/* Warning: These must be located so they do not cross a logical 16-bit
segment boundary! */
static const u32 TestPattern0_D[] = {
0x55555555, 0x55555555, 0x55555555, 0x55555555,
0x55555555, 0x55555555, 0x55555555, 0x55555555,
0x55555555, 0x55555555, 0x55555555, 0x55555555,
0x55555555, 0x55555555, 0x55555555, 0x55555555,
};
static const u32 TestPattern1_D[] = {
0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
};
static const u32 TestPattern2_D[] = {
0x12345678, 0x87654321, 0x23456789, 0x98765432,
0x59385824, 0x30496724, 0x24490795, 0x99938733,
0x40385642, 0x38465245, 0x29432163, 0x05067894,
0x12349045, 0x98723467, 0x12387634, 0x34587623,
};
static void SetupRcvrPattern(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstat, u32 *buffer, u8 pass)
{
/*
* 1. Copy the alpha and Beta patterns from ROM to Cache,
* aligning on 16 byte boundary
* 2. Set the ptr to DCTStatstruc.PtrPatternBufA for Alpha
* 3. Set the ptr to DCTStatstruc.PtrPatternBufB for Beta
*/
u32 *buf_a;
u32 *buf_b;
u32 *p_A;
u32 *p_B;
u8 i;
buf_a = (u32 *)(((u32)buffer + 0x10) & (0xfffffff0));
buf_b = buf_a + 32; /* ?? */
p_A = (u32 *)SetupDqsPattern_1PassB(pass);
p_B = (u32 *)SetupDqsPattern_1PassA(pass);
for(i=0;i<16;i++) {
buf_a[i] = p_A[i];
buf_b[i] = p_B[i];
}
pDCTstat->PtrPatternBufA = (u32)buf_a;
pDCTstat->PtrPatternBufB = (u32)buf_b;
}
void mct_TrainRcvrEn_D(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstat, u8 Pass)
{
if(mct_checkNumberOfDqsRcvEn_1Pass(Pass)) {
if (is_fam15h())
dqsTrainRcvrEn_SW_Fam15(pMCTstat, pDCTstat, Pass);
else
dqsTrainRcvrEn_SW_Fam10(pMCTstat, pDCTstat, Pass);
}
}
static uint16_t fam15_receiver_enable_training_seed(struct DCTStatStruc *pDCTstat, uint8_t dct, uint8_t dimm, uint8_t rank, uint8_t package_type)
{
uint32_t dword;
uint16_t seed = 0;
/* FIXME
* Mainboards need to be able to specify the maximum number of DIMMs installable per channel
* For now assume a maximum of 2 DIMMs per channel can be installed
*/
uint8_t MaxDimmsInstallable = 2;
uint8_t channel = dct;
if (package_type == PT_GR) {
/* Get the internal node number */
dword = Get_NB32(pDCTstat->dev_nbmisc, 0xe8);
dword = (dword >> 30) & 0x3;
if (dword == 1) {
channel += 2;
}
}
if (pDCTstat->Status & (1 << SB_Registered)) {
if (package_type == PT_GR) {
/* Socket G34: Fam15h BKDG v3.14 Table 99 */
if (MaxDimmsInstallable == 1) {
if (channel == 0)
seed = 0x43;
else if (channel == 1)
seed = 0x3f;
else if (channel == 2)
seed = 0x3a;
else if (channel == 3)
seed = 0x35;
} else if (MaxDimmsInstallable == 2) {
if (channel == 0)
seed = 0x54;
else if (channel == 1)
seed = 0x4d;
else if (channel == 2)
seed = 0x45;
else if (channel == 3)
seed = 0x40;
} else if (MaxDimmsInstallable == 3) {
if (channel == 0)
seed = 0x6b;
else if (channel == 1)
seed = 0x5e;
else if (channel == 2)
seed = 0x4b;
else if (channel == 3)
seed = 0x3d;
}
} else if (package_type == PT_C3) {
/* Socket C32: Fam15h BKDG v3.14 Table 100 */
if ((MaxDimmsInstallable == 1) || (MaxDimmsInstallable == 2)) {
if (channel == 0)
seed = 0x3f;
else if (channel == 1)
seed = 0x3e;
} else if (MaxDimmsInstallable == 3) {
if (channel == 0)
seed = 0x47;
else if (channel == 1)
seed = 0x38;
}
}
} else if (pDCTstat->Status & (1 << SB_LoadReduced)) {
if (package_type == PT_GR) {
/* Socket G34: Fam15h BKDG v3.14 Table 99 */
if (MaxDimmsInstallable == 1) {
if (channel == 0)
seed = 0x123;
else if (channel == 1)
seed = 0x122;
else if (channel == 2)
seed = 0x112;
else if (channel == 3)
seed = 0x102;
}
} else if (package_type == PT_C3) {
/* Socket C32: Fam15h BKDG v3.14 Table 100 */
if (channel == 0)
seed = 0x132;
else if (channel == 1)
seed = 0x122;
}
} else {
if (package_type == PT_GR) {
/* Socket G34: Fam15h BKDG v3.14 Table 99 */
if (MaxDimmsInstallable == 1) {
if (channel == 0)
seed = 0x3e;
else if (channel == 1)
seed = 0x38;
else if (channel == 2)
seed = 0x37;
else if (channel == 3)
seed = 0x31;
} else if (MaxDimmsInstallable == 2) {
if (channel == 0)
seed = 0x51;
else if (channel == 1)
seed = 0x4a;
else if (channel == 2)
seed = 0x46;
else if (channel == 3)
seed = 0x3f;
} else if (MaxDimmsInstallable == 3) {
if (channel == 0)
seed = 0x5e;
else if (channel == 1)
seed = 0x52;
else if (channel == 2)
seed = 0x48;
else if (channel == 3)
seed = 0x3c;
}
} else if (package_type == PT_C3) {
/* Socket C32: Fam15h BKDG v3.14 Table 100 */
if ((MaxDimmsInstallable == 1) || (MaxDimmsInstallable == 2)) {
if (channel == 0)
seed = 0x39;
else if (channel == 1)
seed = 0x32;
} else if (MaxDimmsInstallable == 3) {
if (channel == 0)
seed = 0x45;
else if (channel == 1)
seed = 0x37;
}
} else if (package_type == PT_M2) {
/* Socket AM3: Fam15h BKDG v3.14 Table 101 */
seed = 0x3a;
}
}
return seed;
}
static void read_dqs_write_timing_control_registers(uint16_t* current_total_delay, uint32_t dev, uint8_t dct, uint8_t dimm, uint32_t index_reg)
{
uint8_t lane;
uint32_t dword;
for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
uint32_t wdt_reg;
if ((lane == 0) || (lane == 1))
wdt_reg = 0x30;
if ((lane == 2) || (lane == 3))
wdt_reg = 0x31;
if ((lane == 4) || (lane == 5))
wdt_reg = 0x40;
if ((lane == 6) || (lane == 7))
wdt_reg = 0x41;
if (lane == 8)
wdt_reg = 0x32;
wdt_reg += dimm * 3;
dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, wdt_reg);
if ((lane == 7) || (lane == 5) || (lane == 3) || (lane == 1))
current_total_delay[lane] = (dword & 0x00ff0000) >> 16;
if ((lane == 8) || (lane == 6) || (lane == 4) || (lane == 2) || (lane == 0))
current_total_delay[lane] = dword & 0x000000ff;
}
}
#ifdef UNUSED_CODE
static void write_dqs_write_timing_control_registers(uint16_t* current_total_delay, uint32_t dev, uint8_t dct, uint8_t dimm, uint32_t index_reg)
{
uint8_t lane;
uint32_t dword;
for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
uint32_t ret_reg;
if ((lane == 0) || (lane == 1))
ret_reg = 0x30;
if ((lane == 2) || (lane == 3))
ret_reg = 0x31;
if ((lane == 4) || (lane == 5))
ret_reg = 0x40;
if ((lane == 6) || (lane == 7))
ret_reg = 0x41;
if (lane == 8)
ret_reg = 0x32;
ret_reg += dimm * 3;
dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, ret_reg);
if ((lane == 7) || (lane == 5) || (lane == 3) || (lane == 1)) {
dword &= ~(0xff << 16);
dword |= (current_total_delay[lane] & 0xff) << 16;
}
if ((lane == 8) || (lane == 6) || (lane == 4) || (lane == 2) || (lane == 0)) {
dword &= ~0xff;
dword |= current_total_delay[lane] & 0xff;
}
Set_NB32_index_wait_DCT(dev, dct, index_reg, ret_reg, dword);
}
}
#endif
static void write_write_data_timing_control_registers(uint16_t* current_total_delay, uint32_t dev, uint8_t dct, uint8_t dimm, uint32_t index_reg)
{
uint8_t lane;
uint32_t dword;
for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
uint32_t wdt_reg;
/* Calculate Write Data Timing register location */
if ((lane == 0) || (lane == 1) || (lane == 2) || (lane == 3))
wdt_reg = 0x1;
if ((lane == 4) || (lane == 5) || (lane == 6) || (lane == 7))
wdt_reg = 0x2;
if (lane == 8)
wdt_reg = 0x3;
wdt_reg |= (dimm << 8);
/* Set Write Data Timing register values */
dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, wdt_reg);
if ((lane == 7) || (lane == 3)) {
dword &= ~(0x7f << 24);
dword |= (current_total_delay[lane] & 0x7f) << 24;
}
if ((lane == 6) || (lane == 2)) {
dword &= ~(0x7f << 16);
dword |= (current_total_delay[lane] & 0x7f) << 16;
}
if ((lane == 5) || (lane == 1)) {
dword &= ~(0x7f << 8);
dword |= (current_total_delay[lane] & 0x7f) << 8;
}
if ((lane == 8) || (lane == 4) || (lane == 0)) {
dword &= ~0x7f;
dword |= current_total_delay[lane] & 0x7f;
}
Set_NB32_index_wait_DCT(dev, dct, index_reg, wdt_reg, dword);
}
}
static void read_dqs_receiver_enable_control_registers(uint16_t* current_total_delay, uint32_t dev, uint8_t dct, uint8_t dimm, uint32_t index_reg)
{
uint8_t lane;
uint32_t mask;
uint32_t dword;
if (is_fam15h())
mask = 0x3ff;
else
mask = 0x1ff;
for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
uint32_t ret_reg;
if ((lane == 0) || (lane == 1))
ret_reg = 0x10;
if ((lane == 2) || (lane == 3))
ret_reg = 0x11;
if ((lane == 4) || (lane == 5))
ret_reg = 0x20;
if ((lane == 6) || (lane == 7))
ret_reg = 0x21;
if (lane == 8)
ret_reg = 0x12;
ret_reg += dimm * 3;
dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, ret_reg);
if ((lane == 7) || (lane == 5) || (lane == 3) || (lane == 1)) {
current_total_delay[lane] = (dword & (mask << 16)) >> 16;
}
if ((lane == 8) || (lane == 6) || (lane == 4) || (lane == 2) || (lane == 0)) {
current_total_delay[lane] = dword & mask;
}
}
}
static void write_dqs_receiver_enable_control_registers(uint16_t* current_total_delay, uint32_t dev, uint8_t dct, uint8_t dimm, uint32_t index_reg)
{
uint8_t lane;
uint32_t mask;
uint32_t dword;
if (is_fam15h())
mask = 0x3ff;
else
mask = 0x1ff;
for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
uint32_t ret_reg;
if ((lane == 0) || (lane == 1))
ret_reg = 0x10;
if ((lane == 2) || (lane == 3))
ret_reg = 0x11;
if ((lane == 4) || (lane == 5))
ret_reg = 0x20;
if ((lane == 6) || (lane == 7))
ret_reg = 0x21;
if (lane == 8)
ret_reg = 0x12;
ret_reg += dimm * 3;
dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, ret_reg);
if ((lane == 7) || (lane == 5) || (lane == 3) || (lane == 1)) {
dword &= ~(mask << 16);
dword |= (current_total_delay[lane] & mask) << 16;
}
if ((lane == 8) || (lane == 6) || (lane == 4) || (lane == 2) || (lane == 0)) {
dword &= ~mask;
dword |= current_total_delay[lane] & mask;
}
Set_NB32_index_wait_DCT(dev, dct, index_reg, ret_reg, dword);
}
}
static void read_dram_phase_recovery_control_registers(uint16_t* current_total_delay, uint32_t dev, uint8_t dct, uint8_t dimm, uint32_t index_reg)
{
uint8_t lane;
uint32_t dword;
for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
uint32_t prc_reg;
/* Calculate DRAM Phase Recovery Control register location */
if ((lane == 0) || (lane == 1) || (lane == 2) || (lane == 3))
prc_reg = 0x50;
if ((lane == 4) || (lane == 5) || (lane == 6) || (lane == 7))
prc_reg = 0x51;
if (lane == 8)
prc_reg = 0x52;
dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, prc_reg);
if ((lane == 7) || (lane == 3)) {
current_total_delay[lane] = (dword >> 24) & 0x7f;
}
if ((lane == 6) || (lane == 2)) {
current_total_delay[lane] = (dword >> 16) & 0x7f;
}
if ((lane == 5) || (lane == 1)) {
current_total_delay[lane] = (dword >> 8) & 0x7f;
}
if ((lane == 8) || (lane == 4) || (lane == 0)) {
current_total_delay[lane] = dword & 0x7f;
}
}
}
static void write_dram_phase_recovery_control_registers(uint16_t* current_total_delay, uint32_t dev, uint8_t dct, uint8_t dimm, uint32_t index_reg)
{
uint8_t lane;
uint32_t dword;
for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
uint32_t prc_reg;
/* Calculate DRAM Phase Recovery Control register location */
if ((lane == 0) || (lane == 1) || (lane == 2) || (lane == 3))
prc_reg = 0x50;
if ((lane == 4) || (lane == 5) || (lane == 6) || (lane == 7))
prc_reg = 0x51;
if (lane == 8)
prc_reg = 0x52;
/* Set DRAM Phase Recovery Control register values */
dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, prc_reg);
if ((lane == 7) || (lane == 3)) {
dword &= ~(0x7f << 24);
dword |= (current_total_delay[lane] & 0x7f) << 24;
}
if ((lane == 6) || (lane == 2)) {
dword &= ~(0x7f << 16);
dword |= (current_total_delay[lane] & 0x7f) << 16;
}
if ((lane == 5) || (lane == 1)) {
dword &= ~(0x7f << 8);
dword |= (current_total_delay[lane] & 0x7f) << 8;
}
if ((lane == 8) || (lane == 4) || (lane == 0)) {
dword &= ~0x7f;
dword |= current_total_delay[lane] & 0x7f;
}
Set_NB32_index_wait_DCT(dev, dct, index_reg, prc_reg, dword);
}
}
static void read_read_dqs_timing_control_registers(uint16_t* current_total_delay, uint32_t dev, uint8_t dct, uint8_t dimm, uint32_t index_reg)
{
uint8_t lane;
uint32_t dword;
for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
uint32_t rdt_reg;
/* Calculate DRAM Read DQS Timing register location */
if ((lane == 0) || (lane == 1) || (lane == 2) || (lane == 3))
rdt_reg = 0x5;
if ((lane == 4) || (lane == 5) || (lane == 6) || (lane == 7))
rdt_reg = 0x6;
if (lane == 8)
rdt_reg = 0x7;
rdt_reg |= (dimm << 8);
dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, rdt_reg);
if ((lane == 7) || (lane == 3)) {
current_total_delay[lane] = (dword >> 24) & 0x3f;
}
if ((lane == 6) || (lane == 2)) {
current_total_delay[lane] = (dword >> 16) & 0x3f;
}
if ((lane == 5) || (lane == 1)) {
current_total_delay[lane] = (dword >> 8) & 0x3f;
}
if ((lane == 8) || (lane == 4) || (lane == 0)) {
current_total_delay[lane] = dword & 0x3f;
}
if (is_fam15h())
current_total_delay[lane] >>= 1;
}
}
static uint32_t convert_testaddr_and_channel_to_address(struct DCTStatStruc *pDCTstat, uint32_t testaddr, uint8_t channel)
{
SetUpperFSbase(testaddr);
testaddr <<= 8;
if((pDCTstat->Status & (1<<SB_128bitmode)) && channel ) {
testaddr += 8; /* second channel */
}
return testaddr;
}
/* DQS Receiver Enable Training (Family 10h)
* Algorithm detailed in:
* The Fam10h BKDG Rev. 3.62 section 2.8.9.9.2
*/
static void dqsTrainRcvrEn_SW_Fam10(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstat, u8 Pass)
{
u8 Channel;
u8 _2Ranks;
u8 Addl_Index = 0;
u8 Receiver;
u8 _DisableDramECC = 0, _Wrap32Dis = 0, _SSE2 = 0;
u16 CTLRMaxDelay;
u16 MaxDelay_CH[2];
u32 TestAddr0, TestAddr1, TestAddr0B, TestAddr1B;
u32 PatternBuffer[64+4]; /* FIXME: need increase 8? */
u32 Errors;
u32 val;
u32 reg;
u32 dev;
u32 index_reg;
u32 ch_start, ch_end, ch;
u32 msr;
u32 cr4;
u32 lo, hi;
uint32_t dword;
uint8_t dimm;
uint8_t rank;
uint8_t lane;
uint16_t current_total_delay[MAX_BYTE_LANES];
uint16_t candidate_total_delay[8];
uint8_t data_test_pass_sr[2][8]; /* [rank][lane] */
uint8_t data_test_pass[8]; /* [lane] */
uint8_t data_test_pass_prev[8]; /* [lane] */
uint8_t window_det_toggle[8];
uint8_t trained[8];
uint64_t result_qword1;
uint64_t result_qword2;
u8 valid;
print_debug_dqs("\nTrainRcvEn: Node", pDCTstat->Node_ID, 0);
print_debug_dqs("TrainRcvEn: Pass", Pass, 0);
dev = pDCTstat->dev_dct;
ch_start = 0;
if(!pDCTstat->GangedMode) {
ch_end = 2;
} else {
ch_end = 1;
}
for (ch = ch_start; ch < ch_end; ch++) {
reg = 0x78;
val = Get_NB32_DCT(dev, ch, reg);
val &= ~(0x3ff << 22);
val |= (0x0c8 << 22); /* MaxRdLatency = 0xc8 */
Set_NB32_DCT(dev, ch, reg, val);
}
if (Pass == FirstPass) {
mct_InitDQSPos4RcvrEn_D(pMCTstat, pDCTstat);
} else {
pDCTstat->DimmTrainFail = 0;
pDCTstat->CSTrainFail = ~pDCTstat->CSPresent;
}
cr4 = read_cr4();
if(cr4 & ( 1 << 9)) { /* save the old value */
_SSE2 = 1;
}
cr4 |= (1 << 9); /* OSFXSR enable SSE2 */
write_cr4(cr4);
msr = HWCR;
_RDMSR(msr, &lo, &hi);
/* FIXME: Why use SSEDIS */
if(lo & (1 << 17)) { /* save the old value */
_Wrap32Dis = 1;
}
lo |= (1 << 17); /* HWCR.wrap32dis */
lo &= ~(1 << 15); /* SSEDIS */
_WRMSR(msr, lo, hi); /* Setting wrap32dis allows 64-bit memory references in real mode */
_DisableDramECC = mct_DisableDimmEccEn_D(pMCTstat, pDCTstat);
SetupRcvrPattern(pMCTstat, pDCTstat, PatternBuffer, Pass);
Errors = 0;
dev = pDCTstat->dev_dct;
for (Channel = 0; Channel < 2; Channel++) {
print_debug_dqs("\tTrainRcvEn51: Node ", pDCTstat->Node_ID, 1);
print_debug_dqs("\tTrainRcvEn51: Channel ", Channel, 1);
pDCTstat->Channel = Channel;
CTLRMaxDelay = 0;
MaxDelay_CH[Channel] = 0;
index_reg = 0x98;
Receiver = mct_InitReceiver_D(pDCTstat, Channel);
/* There are four receiver pairs, loosely associated with chipselects.
* This is essentially looping over each DIMM.
*/
for (; Receiver < 8; Receiver += 2) {
Addl_Index = (Receiver >> 1) * 3 + 0x10;
dimm = (Receiver >> 1);
print_debug_dqs("\t\tTrainRcvEnd52: index ", Addl_Index, 2);
if (!mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, Receiver)) {
continue;
}
/* Clear data structures */
for (lane = 0; lane < 8; lane++) {
data_test_pass_prev[lane] = 0;
trained[lane] = 0;
}
/* 2.8.9.9.2 (1, 6)
* Retrieve gross and fine timing fields from write DQS registers
*/
read_dqs_write_timing_control_registers(current_total_delay, dev, Channel, dimm, index_reg);
/* 2.8.9.9.2 (1)
* Program the Write Data Timing and Write ECC Timing register to
* the values stored in the DQS Write Timing Control register
* for each lane
*/
write_write_data_timing_control_registers(current_total_delay, dev, Channel, dimm, index_reg);
/* 2.8.9.9.2 (2)
* Program the Read DQS Timing Control and the Read DQS ECC Timing Control registers
* to 1/2 MEMCLK for all lanes
*/
for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
uint32_t rdt_reg;
if ((lane == 0) || (lane == 1) || (lane == 2) || (lane == 3))
rdt_reg = 0x5;
if ((lane == 4) || (lane == 5) || (lane == 6) || (lane == 7))
rdt_reg = 0x6;
if (lane == 8)
rdt_reg = 0x7;
rdt_reg |= (dimm << 8);
if (lane == 8)
dword = 0x0000003f;
else
dword = 0x3f3f3f3f;
Set_NB32_index_wait_DCT(dev, Channel, index_reg, rdt_reg, dword);
}
/* 2.8.9.9.2 (3)
* Select two test addresses for each rank present
*/
TestAddr0 = mct_GetRcvrSysAddr_D(pMCTstat, pDCTstat, Channel, Receiver, &valid);
if (!valid) { /* Address not supported on current CS */
continue;
}
TestAddr0B = TestAddr0 + (BigPagex8_RJ8 << 3);
if(mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, Receiver+1)) {
TestAddr1 = mct_GetRcvrSysAddr_D(pMCTstat, pDCTstat, Channel, Receiver+1, &valid);
if(!valid) { /* Address not supported on current CS */
continue;
}
TestAddr1B = TestAddr1 + (BigPagex8_RJ8 << 3);
_2Ranks = 1;
} else {
_2Ranks = TestAddr1 = TestAddr1B = 0;
}
print_debug_dqs("\t\tTrainRcvEn53: TestAddr0 ", TestAddr0, 2);
print_debug_dqs("\t\tTrainRcvEn53: TestAddr0B ", TestAddr0B, 2);
print_debug_dqs("\t\tTrainRcvEn53: TestAddr1 ", TestAddr1, 2);
print_debug_dqs("\t\tTrainRcvEn53: TestAddr1B ", TestAddr1B, 2);
/* 2.8.9.9.2 (4, 5)
* Write 1 cache line of the appropriate test pattern to each test address
*/
mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0, 0); /* rank 0 of DIMM, testpattern 0 */
mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0B, 1); /* rank 0 of DIMM, testpattern 1 */
if (_2Ranks) {
mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1, 0); /*rank 1 of DIMM, testpattern 0 */
mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1B, 1); /*rank 1 of DIMM, testpattern 1 */
}
#if DQS_TRAIN_DEBUG > 0
for (lane = 0; lane < 8; lane++) {
print_debug_dqs("\t\tTrainRcvEn54: lane: ", lane, 2);
print_debug_dqs("\t\tTrainRcvEn54: current_total_delay ", current_total_delay[lane], 2);
}
#endif
/* 2.8.9.9.2 (6)
* Write gross and fine timing fields to read DQS registers
*/
write_dqs_receiver_enable_control_registers(current_total_delay, dev, Channel, dimm, index_reg);
/* 2.8.9.9.2 (7)
* Loop over all delay values up to 1 MEMCLK (0x40 delay steps) from the initial delay values
*
* FIXME
* It is not clear if training should be discontinued if any test failures occur in the first
* 1 MEMCLK window, or if it should be discontinued if no successes occur in the first 1 MEMCLK
* window. Therefore, loop over up to 2 MEMCLK (0x80 delay steps) to be on the safe side.
*/
uint16_t current_delay_step;
for (current_delay_step = 0; current_delay_step < 0x80; current_delay_step++) {
print_debug_dqs("\t\t\tTrainRcvEn541: current_delay_step ", current_delay_step, 3);
/* 2.8.9.9.2 (7 D)
* Terminate if all lanes are trained
*/
uint8_t all_lanes_trained = 1;
for (lane = 0; lane < 8; lane++)
if (!trained[lane])
all_lanes_trained = 0;
if (all_lanes_trained)
break;
/* 2.8.9.9.2 (7 A)
* Loop over all ranks
*/
for (rank = 0; rank < (_2Ranks + 1); rank++) {
/* 2.8.9.9.2 (7 A a-d)
* Read the first test address of the current rank
* Store the first data beat for analysis
* Reset read pointer in the DRAM controller FIFO
* Read the second test address of the current rank
* Store the first data beat for analysis
* Reset read pointer in the DRAM controller FIFO
*/
if (rank & 1) {
/* 2.8.9.9.2 (7 D)
* Invert read instructions to alternate data read order on the bus
*/
proc_IOCLFLUSH_D((rank == 0)?TestAddr0B:TestAddr1B);
result_qword2 = read64_fs(convert_testaddr_and_channel_to_address(pDCTstat, (rank == 0)?TestAddr0B:TestAddr1B, Channel));
write_dqs_receiver_enable_control_registers(current_total_delay, dev, Channel, dimm, index_reg);
proc_IOCLFLUSH_D((rank == 0)?TestAddr0:TestAddr1);
result_qword1 = read64_fs(convert_testaddr_and_channel_to_address(pDCTstat, (rank == 0)?TestAddr0:TestAddr1, Channel));
write_dqs_receiver_enable_control_registers(current_total_delay, dev, Channel, dimm, index_reg);
} else {
proc_IOCLFLUSH_D((rank == 0)?TestAddr0:TestAddr1);
result_qword1 = read64_fs(convert_testaddr_and_channel_to_address(pDCTstat, (rank == 0)?TestAddr0:TestAddr1, Channel));
write_dqs_receiver_enable_control_registers(current_total_delay, dev, Channel, dimm, index_reg);
proc_IOCLFLUSH_D((rank == 0)?TestAddr0B:TestAddr1B);
result_qword2 = read64_fs(convert_testaddr_and_channel_to_address(pDCTstat, (rank == 0)?TestAddr0B:TestAddr1B, Channel));
write_dqs_receiver_enable_control_registers(current_total_delay, dev, Channel, dimm, index_reg);
}
/* 2.8.9.9.2 (7 A e)
* Compare both read patterns and flag passing ranks/lanes
*/
uint8_t result_lane_byte1;
uint8_t result_lane_byte2;
for (lane = 0; lane < 8; lane++) {
if (trained[lane] == 1) {
#if DQS_TRAIN_DEBUG > 0
print_debug_dqs("\t\t\t\t\t\t\t\t lane already trained: ", lane, 4);
#endif
continue;
}
result_lane_byte1 = (result_qword1 >> (lane * 8)) & 0xff;
result_lane_byte2 = (result_qword2 >> (lane * 8)) & 0xff;
if ((result_lane_byte1 == 0x55) && (result_lane_byte2 == 0xaa))
data_test_pass_sr[rank][lane] = 1;
else
data_test_pass_sr[rank][lane] = 0;
#if DQS_TRAIN_DEBUG > 0
print_debug_dqs_pair("\t\t\t\t\t\t\t\t ", 0x55, " | ", result_lane_byte1, 4);
print_debug_dqs_pair("\t\t\t\t\t\t\t\t ", 0xaa, " | ", result_lane_byte2, 4);
#endif
}
}
/* 2.8.9.9.2 (7 B)
* If DIMM is dual rank, only use delays that pass testing for both ranks
*/
for (lane = 0; lane < 8; lane++) {
if (_2Ranks) {
if ((data_test_pass_sr[0][lane]) && (data_test_pass_sr[1][lane]))
data_test_pass[lane] = 1;
else
data_test_pass[lane] = 0;
} else {
data_test_pass[lane] = data_test_pass_sr[0][lane];
}
}
/* 2.8.9.9.2 (7 E)
* For each lane, update the DQS receiver delay setting in support of next iteration
*/
for (lane = 0; lane < 8; lane++) {
if (trained[lane] == 1)
continue;
/* 2.8.9.9.2 (7 C a)
* Save the total delay of the first success after a failure for later use
*/
if ((data_test_pass[lane] == 1) && (data_test_pass_prev[lane] == 0)) {
candidate_total_delay[lane] = current_total_delay[lane];
window_det_toggle[lane] = 0;
}
/* 2.8.9.9.2 (7 C b)
* If the current delay failed testing add 1/8 UI to the current delay
*/
if (data_test_pass[lane] == 0)
current_total_delay[lane] += 0x4;
/* 2.8.9.9.2 (7 C c)
* If the current delay passed testing alternately add either 1/32 UI or 1/4 UI to the current delay
* If 1.25 UI of delay have been added with no failures the lane is considered trained
*/
if (data_test_pass[lane] == 1) {
/* See if lane is trained */
if ((current_total_delay[lane] - candidate_total_delay[lane]) >= 0x28) {
trained[lane] = 1;
/* Calculate and set final lane delay value
* The final delay is the candidate delay + 7/8 UI
*/
current_total_delay[lane] = candidate_total_delay[lane] + 0x1c;
} else {
if (window_det_toggle[lane] == 0) {
current_total_delay[lane] += 0x1;
window_det_toggle[lane] = 1;
} else {
current_total_delay[lane] += 0x8;
window_det_toggle[lane] = 0;
}
}
}
}
/* Update delays in hardware */
write_dqs_receiver_enable_control_registers(current_total_delay, dev, Channel, dimm, index_reg);
/* Save previous results for comparison in the next iteration */
for (lane = 0; lane < 8; lane++)
data_test_pass_prev[lane] = data_test_pass[lane];
}
#if DQS_TRAIN_DEBUG > 0
for (lane = 0; lane < 8; lane++)
print_debug_dqs_pair("\t\tTrainRcvEn55: Lane ", lane, " current_total_delay ", current_total_delay[lane], 2);
#endif
/* Find highest delay value and save for later use */
for (lane = 0; lane < 8; lane++)
if (current_total_delay[lane] > CTLRMaxDelay)
CTLRMaxDelay = current_total_delay[lane];
/* See if any lanes failed training, and set error flags appropriately
* For all trained lanes, save delay values for later use
*/
for (lane = 0; lane < 8; lane++) {
if (trained[lane]) {
pDCTstat->CH_D_B_RCVRDLY[Channel][Receiver >> 1][lane] = current_total_delay[lane];
} else {
printk(BIOS_WARNING, "TrainRcvrEn: WARNING: Lane %d of receiver %d on channel %d failed training!\n", lane, Receiver, Channel);
/* Set error flags */
pDCTstat->ErrStatus |= 1 << SB_NORCVREN;
Errors |= 1 << SB_NORCVREN;
pDCTstat->ErrCode = SC_FatalErr;
pDCTstat->CSTrainFail |= 1 << Receiver;
pDCTstat->DimmTrainFail |= 1 << (Receiver + Channel);
}
}
/* 2.8.9.9.2 (8)
* Flush the receiver FIFO
* Write one full cache line of non-0x55/0xaa data to one of the test addresses, then read it back to flush the FIFO
*/
WriteLNTestPattern(TestAddr0 << 8, (uint8_t *)TestPattern2_D, 1);
mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0);
}
MaxDelay_CH[Channel] = CTLRMaxDelay;
}
CTLRMaxDelay = MaxDelay_CH[0];
if (MaxDelay_CH[1] > CTLRMaxDelay)
CTLRMaxDelay = MaxDelay_CH[1];
for (Channel = 0; Channel < 2; Channel++) {
mct_SetMaxLatency_D(pDCTstat, Channel, CTLRMaxDelay); /* program Ch A/B MaxAsyncLat to correspond with max delay */
}
for (Channel = 0; Channel < 2; Channel++) {
ResetDCTWrPtr_D(dev, Channel, index_reg, Addl_Index);
}
if(_DisableDramECC) {
mct_EnableDimmEccEn_D(pMCTstat, pDCTstat, _DisableDramECC);
}
if (Pass == FirstPass) {
/*Disable DQSRcvrEn training mode */
mct_DisableDQSRcvEn_D(pDCTstat);
}
if(!_Wrap32Dis) {
msr = HWCR;
_RDMSR(msr, &lo, &hi);
lo &= ~(1<<17); /* restore HWCR.wrap32dis */
_WRMSR(msr, lo, hi);
}
if(!_SSE2){
cr4 = read_cr4();
cr4 &= ~(1<<9); /* restore cr4.OSFXSR */
write_cr4(cr4);
}
#if DQS_TRAIN_DEBUG > 0
{
u8 ChannelDTD;
printk(BIOS_DEBUG, "TrainRcvrEn: CH_MaxRdLat:\n");
for(ChannelDTD = 0; ChannelDTD<2; ChannelDTD++) {
printk(BIOS_DEBUG, "Channel:%x: %x\n",
ChannelDTD, pDCTstat->CH_MaxRdLat[ChannelDTD]);
}
}
#endif
#if DQS_TRAIN_DEBUG > 0
{
u16 valDTD;
u8 ChannelDTD, ReceiverDTD;
u8 i;
u16 *p;
printk(BIOS_DEBUG, "TrainRcvrEn: CH_D_B_RCVRDLY:\n");
for(ChannelDTD = 0; ChannelDTD < 2; ChannelDTD++) {
printk(BIOS_DEBUG, "Channel:%x\n", ChannelDTD);
for(ReceiverDTD = 0; ReceiverDTD<8; ReceiverDTD+=2) {
printk(BIOS_DEBUG, "\t\tReceiver:%x:", ReceiverDTD);
p = pDCTstat->CH_D_B_RCVRDLY[ChannelDTD][ReceiverDTD>>1];
for (i=0;i<8; i++) {
valDTD = p[i];
printk(BIOS_DEBUG, " %03x", valDTD);
}
printk(BIOS_DEBUG, "\n");
}
}
}
#endif
printk(BIOS_DEBUG, "TrainRcvrEn: Status %x\n", pDCTstat->Status);
printk(BIOS_DEBUG, "TrainRcvrEn: ErrStatus %x\n", pDCTstat->ErrStatus);
printk(BIOS_DEBUG, "TrainRcvrEn: ErrCode %x\n", pDCTstat->ErrCode);
printk(BIOS_DEBUG, "TrainRcvrEn: Done\n\n");
}
/* DQS Receiver Enable Training Pattern Generation (Family 15h)
* Algorithm detailed in:
* The Fam15h BKDG Rev. 3.14 section 2.10.5.8.2 (4)
*/
static void generate_dram_receiver_enable_training_pattern_fam15(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstat, uint8_t dct, uint8_t Receiver)
{
uint32_t dword;
uint32_t dev = pDCTstat->dev_dct;
/* 2.10.5.7.1.1
* It appears that the DCT only supports 8-beat burst length mode,
* so do nothing here...
*/
/* Wait for CmdSendInProg == 0 */
do {
dword = Get_NB32_DCT(dev, dct, 0x250);
} while (dword & (0x1 << 12));
/* Set CmdTestEnable = 1 */
dword = Get_NB32_DCT(dev, dct, 0x250);
dword |= (0x1 << 2);
Set_NB32_DCT(dev, dct, 0x250, dword);
/* 2.10.5.8.6.1.1 Send Activate Command */
dword = Get_NB32_DCT(dev, dct, 0x28c);
dword &= ~(0xff << 22); /* CmdChipSelect = Receiver */
dword |= ((0x1 << Receiver) << 22);
dword &= ~(0x7 << 19); /* CmdBank = 0 */
dword &= ~(0x3ffff); /* CmdAddress = 0 */
dword |= (0x1 << 31); /* SendActCmd = 1 */
Set_NB32_DCT(dev, dct, 0x28c, dword);
/* Wait for SendActCmd == 0 */
do {
dword = Get_NB32_DCT(dev, dct, 0x28c);
} while (dword & (0x1 << 31));
/* Wait 75 MEMCLKs. */
precise_memclk_delay_fam15(pMCTstat, pDCTstat, dct, 75);
/* 2.10.5.8.6.1.2 */
Set_NB32_DCT(dev, dct, 0x274, 0x0); /* DQMask = 0 */
Set_NB32_DCT(dev, dct, 0x278, 0x0);
dword = Get_NB32_DCT(dev, dct, 0x27c);
dword &= ~(0xff); /* EccMask = 0 */
if (pDCTstat->DimmECCPresent == 0)
dword |= 0xff; /* EccMask = 0xff */
Set_NB32_DCT(dev, dct, 0x27c, dword);
/* 2.10.5.8.6.1.2 */
dword = Get_NB32_DCT(dev, dct, 0x270);
dword &= ~(0x7ffff); /* DataPrbsSeed = 55555 */
// dword |= (0x55555);
dword |= (0x44443); /* Use AGESA seed */
Set_NB32_DCT(dev, dct, 0x270, dword);
/* 2.10.5.8.2 (4) */
dword = Get_NB32_DCT(dev, dct, 0x260);
dword &= ~(0x1fffff); /* CmdCount = 192 */
dword |= 192;
Set_NB32_DCT(dev, dct, 0x260, dword);
#if 0
/* TODO: This applies to Fam15h model 10h and above only */
/* Program Bubble Count and CmdStreamLen */
dword = Get_NB32_DCT(dev, dct, 0x25c);
dword &= ~(0x3ff << 12); /* BubbleCnt = 0 */
dword &= ~(0x3ff << 22); /* BubbleCnt2 = 0 */
dword &= ~(0xff); /* CmdStreamLen = 1 */
dword |= 0x1;
Set_NB32_DCT(dev, dct, 0x25c, dword);
#endif
/* Configure Target A */
dword = Get_NB32_DCT(dev, dct, 0x254);
dword &= ~(0x7 << 24); /* TgtChipSelect = Receiver */
dword |= (Receiver & 0x7) << 24;
dword &= ~(0x7 << 21); /* TgtBank = 0 */
dword &= ~(0x3ff); /* TgtAddress = 0 */
Set_NB32_DCT(dev, dct, 0x254, dword);
dword = Get_NB32_DCT(dev, dct, 0x250);
dword |= (0x1 << 3); /* ResetAllErr = 1 */
dword &= ~(0x1 << 4); /* StopOnErr = 0 */
dword &= ~(0x3 << 8); /* CmdTgt = 0 (Target A) */
dword &= ~(0x7 << 5); /* CmdType = 0 (Read) */
dword |= (0x1 << 11); /* SendCmd = 1 */
Set_NB32_DCT(dev, dct, 0x250, dword);
/* 2.10.5.8.6.1.2 Wait for TestStatus == 1 and CmdSendInProg == 0 */
do {
dword = Get_NB32_DCT(dev, dct, 0x250);
} while ((dword & (0x1 << 12)) || (!(dword & (0x1 << 10))));
dword = Get_NB32_DCT(dev, dct, 0x250);
dword &= ~(0x1 << 11); /* SendCmd = 0 */
Set_NB32_DCT(dev, dct, 0x250, dword);
/* 2.10.5.8.6.1.1 Send Precharge Command */
/* Wait 25 MEMCLKs. */
precise_memclk_delay_fam15(pMCTstat, pDCTstat, dct, 25);
dword = Get_NB32_DCT(dev, dct, 0x28c);
dword &= ~(0xff << 22); /* CmdChipSelect = Receiver */
dword |= ((0x1 << Receiver) << 22);
dword &= ~(0x7 << 19); /* CmdBank = 0 */
dword &= ~(0x3ffff); /* CmdAddress = 0x400 */
dword |= 0x400;
dword |= (0x1 << 30); /* SendPchgCmd = 1 */
Set_NB32_DCT(dev, dct, 0x28c, dword);
/* Wait for SendPchgCmd == 0 */
do {
dword = Get_NB32_DCT(dev, dct, 0x28c);
} while (dword & (0x1 << 30));
/* Wait 25 MEMCLKs. */
precise_memclk_delay_fam15(pMCTstat, pDCTstat, dct, 25);
/* Set CmdTestEnable = 0 */
dword = Get_NB32_DCT(dev, dct, 0x250);
dword &= ~(0x1 << 2);
Set_NB32_DCT(dev, dct, 0x250, dword);
}
/* DQS Receiver Enable Training (Family 15h)
* Algorithm detailed in:
* The Fam15h BKDG Rev. 3.14 section 2.10.5.8.2
* This algorithm runs once at the lowest supported MEMCLK,
* then once again at the highest supported MEMCLK.
*/
static void dqsTrainRcvrEn_SW_Fam15(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstat, u8 Pass)
{
u8 Channel;
u8 _2Ranks;
u8 Addl_Index = 0;
u8 Receiver;
u8 _DisableDramECC = 0, _Wrap32Dis = 0, _SSE2 = 0;
u32 Errors;
u32 val;
u32 dev;
u32 index_reg;
u32 ch_start, ch_end, ch;
u32 msr;
u32 cr4;
u32 lo, hi;
uint32_t dword;
uint8_t dimm;
uint8_t rank;
uint8_t lane;
uint8_t mem_clk;
uint16_t initial_seed;
uint16_t current_total_delay[MAX_BYTE_LANES];
uint16_t dqs_ret_pass1_total_delay[MAX_BYTE_LANES];
uint16_t rank0_current_total_delay[MAX_BYTE_LANES];
uint16_t phase_recovery_delays[MAX_BYTE_LANES];
uint16_t seed[MAX_BYTE_LANES];
uint16_t seed_gross[MAX_BYTE_LANES];
uint16_t seed_fine[MAX_BYTE_LANES];
uint16_t seed_pre_gross[MAX_BYTE_LANES];
uint8_t package_type = mctGet_NVbits(NV_PACK_TYPE);
uint16_t fam15h_freq_tab[] = {0, 0, 0, 0, 333, 0, 400, 0, 0, 0, 533, 0, 0, 0, 667, 0, 0, 0, 800, 0, 0, 0, 933};
print_debug_dqs("\nTrainRcvEn: Node", pDCTstat->Node_ID, 0);
print_debug_dqs("TrainRcvEn: Pass", Pass, 0);
dev = pDCTstat->dev_dct;
index_reg = 0x98;
ch_start = 0;
ch_end = 2;
for (ch = ch_start; ch < ch_end; ch++) {
uint8_t max_rd_latency = 0x55;
uint8_t p_state;
/* 2.10.5.6 */
fam15EnableTrainingMode(pMCTstat, pDCTstat, ch, 1);
/* 2.10.5.2 */
for (p_state = 0; p_state < 3; p_state++) {
val = Get_NB32_DCT_NBPstate(dev, ch, p_state, 0x210);
val &= ~(0x3ff << 22); /* MaxRdLatency = max_rd_latency */
val |= (max_rd_latency & 0x3ff) << 22;
Set_NB32_DCT_NBPstate(dev, ch, p_state, 0x210, val);
}
}
if (Pass != FirstPass) {
pDCTstat->DimmTrainFail = 0;
pDCTstat->CSTrainFail = ~pDCTstat->CSPresent;
}
cr4 = read_cr4();
if(cr4 & ( 1 << 9)) { /* save the old value */
_SSE2 = 1;
}
cr4 |= (1 << 9); /* OSFXSR enable SSE2 */
write_cr4(cr4);
msr = HWCR;
_RDMSR(msr, &lo, &hi);
/* FIXME: Why use SSEDIS */
if(lo & (1 << 17)) { /* save the old value */
_Wrap32Dis = 1;
}
lo |= (1 << 17); /* HWCR.wrap32dis */
lo &= ~(1 << 15); /* SSEDIS */
_WRMSR(msr, lo, hi); /* Setting wrap32dis allows 64-bit memory references in real mode */
_DisableDramECC = mct_DisableDimmEccEn_D(pMCTstat, pDCTstat);
Errors = 0;
dev = pDCTstat->dev_dct;
for (Channel = 0; Channel < 2; Channel++) {
print_debug_dqs("\tTrainRcvEn51: Node ", pDCTstat->Node_ID, 1);
print_debug_dqs("\tTrainRcvEn51: Channel ", Channel, 1);
pDCTstat->Channel = Channel;
mem_clk = Get_NB32_DCT(dev, Channel, 0x94) & 0x1f;
Receiver = mct_InitReceiver_D(pDCTstat, Channel);
/* There are four receiver pairs, loosely associated with chipselects.
* This is essentially looping over each DIMM.
*/
for (; Receiver < 8; Receiver += 2) {
Addl_Index = (Receiver >> 1) * 3 + 0x10;
dimm = (Receiver >> 1);
print_debug_dqs("\t\tTrainRcvEnd52: index ", Addl_Index, 2);
if (!mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, Receiver)) {
continue;
}
/* Retrieve the total delay values from pass 1 of DQS receiver enable training */
if (Pass != FirstPass) {
read_dqs_receiver_enable_control_registers(dqs_ret_pass1_total_delay, dev, Channel, dimm, index_reg);
}
/* 2.10.5.8.2
* Loop over all ranks
*/
if (mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, Receiver+1))
_2Ranks = 1;
else
_2Ranks = 0;
for (rank = 0; rank < (_2Ranks + 1); rank++) {
/* 2.10.5.8.2 (1)
* Specify the target DIMM to be trained
* Set TrNibbleSel = 0
*
* TODO: Add support for x4 DIMMs
*/
dword = Get_NB32_index_wait_DCT(dev, Channel, index_reg, 0x00000008);
dword &= ~(0x3 << 4); /* TrDimmSel */
dword |= ((dimm & 0x3) << 4);
dword &= ~(0x1 << 2); /* TrNibbleSel */
Set_NB32_index_wait_DCT(dev, Channel, index_reg, 0x00000008, dword);
/* 2.10.5.8.2 (2)
* Retrieve gross and fine timing fields from write DQS registers
*/
read_dqs_write_timing_control_registers(current_total_delay, dev, Channel, dimm, index_reg);
/* 2.10.5.8.2.1
* Generate the DQS Receiver Enable Training Seed Values
*/
if (Pass == FirstPass) {
initial_seed = fam15_receiver_enable_training_seed(pDCTstat, Channel, dimm, rank, package_type);
/* Adjust seed for the minimum platform supported frequency */
initial_seed = (uint16_t) (((((uint64_t) initial_seed) *
fam15h_freq_tab[mem_clk] * 100) / (mctGet_NVbits(NV_MIN_MEMCLK) * 100)));
for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
uint16_t wl_pass1_delay;
wl_pass1_delay = current_total_delay[lane];
seed[lane] = initial_seed + wl_pass1_delay;
}
} else {
uint8_t addr_prelaunch = 0; /* TODO: Fetch the correct value from RC2[0] */
uint16_t register_delay;
int16_t seed_prescaling;
memcpy(current_total_delay, dqs_ret_pass1_total_delay, sizeof(current_total_delay));
if ((pDCTstat->Status & (1 << SB_Registered))) {
if (addr_prelaunch)
register_delay = 0x30;
else
register_delay = 0x20;
} else if ((pDCTstat->Status & (1 << SB_LoadReduced))) {
/* TODO
* Load reduced DIMM support unimplemented
*/
register_delay = 0x0;
} else {
register_delay = 0x0;
}
for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
seed_prescaling = current_total_delay[lane] - register_delay - 0x20;
seed[lane] = (uint16_t) (register_delay + ((((uint64_t) seed_prescaling) * fam15h_freq_tab[mem_clk] * 100) / (mctGet_NVbits(NV_MIN_MEMCLK) * 100)));
}
}
for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
seed_gross[lane] = (seed[lane] >> 5) & 0x1f;
seed_fine[lane] = seed[lane] & 0x1f;
/*if (seed_gross[lane] == 0)
seed_pre_gross[lane] = 0;
else */if (seed_gross[lane] & 0x1)
seed_pre_gross[lane] = 1;
else
seed_pre_gross[lane] = 2;
/* Calculate phase recovery delays */
phase_recovery_delays[lane] = ((seed_pre_gross[lane] & 0x1f) << 5) | (seed_fine[lane] & 0x1f);
/* Set the gross delay.
* NOTE: While the BKDG states to only program DqsRcvEnGrossDelay, this appears
* to have been a misprint as DqsRcvEnFineDelay should be set to zero as well.
*/
current_total_delay[lane] = ((seed_gross[lane] & 0x1f) << 5);
}
/* 2.10.5.8.2 (2) / 2.10.5.8.2.1 (5 6)
* Program PhRecFineDly and PhRecGrossDly
*/
write_dram_phase_recovery_control_registers(phase_recovery_delays, dev, Channel, dimm, index_reg);
/* 2.10.5.8.2 (2) / 2.10.5.8.2.1 (7)
* Program the DQS Receiver Enable delay values for each lane
*/
write_dqs_receiver_enable_control_registers(current_total_delay, dev, Channel, dimm, index_reg);
/* 2.10.5.8.2 (3)
* Program DqsRcvTrEn = 1
*/
dword = Get_NB32_index_wait_DCT(dev, Channel, index_reg, 0x00000008);
dword |= (0x1 << 13);
Set_NB32_index_wait_DCT(dev, Channel, index_reg, 0x00000008, dword);
/* 2.10.5.8.2 (4)
* Issue 192 read requests to the target rank
*/
generate_dram_receiver_enable_training_pattern_fam15(pMCTstat, pDCTstat, Channel, Receiver + (rank & 0x1));
/* 2.10.5.8.2 (5)
* Program DqsRcvTrEn = 0
*/
dword = Get_NB32_index_wait_DCT(dev, Channel, index_reg, 0x00000008);
dword &= ~(0x1 << 13);
Set_NB32_index_wait_DCT(dev, Channel, index_reg, 0x00000008, dword);
/* 2.10.5.8.2 (6)
* Read PhRecGrossDly, PhRecFineDly
*/
read_dram_phase_recovery_control_registers(phase_recovery_delays, dev, Channel, dimm, index_reg);
/* 2.10.5.8.2 (7)
* Calculate and program the DQS Receiver Enable delay values
*/
for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
current_total_delay[lane] = (phase_recovery_delays[lane] & 0x1f);
current_total_delay[lane] |= ((seed_gross[lane] + ((phase_recovery_delays[lane] >> 5) & 0x1f) - seed_pre_gross[lane] + 1) << 5);
if (lane == 8)
pDCTstat->CH_D_BC_RCVRDLY[Channel][dimm] = current_total_delay[lane];
else
pDCTstat->CH_D_B_RCVRDLY[Channel][dimm][lane] = current_total_delay[lane];
}
write_dqs_receiver_enable_control_registers(current_total_delay, dev, Channel, dimm, index_reg);
if (rank == 0) {
/* Back up the Rank 0 delays for later use */
memcpy(rank0_current_total_delay, current_total_delay, sizeof(current_total_delay));
}
if (rank == 1) {
/* 2.10.5.8.2 (8)
* Compute the average delay across both ranks and program the result into
* the DQS Receiver Enable delay registers
*/
for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
current_total_delay[lane] = (rank0_current_total_delay[lane] + current_total_delay[lane]) / 2;
if (lane == 8)
pDCTstat->CH_D_BC_RCVRDLY[Channel][dimm] = current_total_delay[lane];
else
pDCTstat->CH_D_B_RCVRDLY[Channel][dimm][lane] = current_total_delay[lane];
}
write_dqs_receiver_enable_control_registers(current_total_delay, dev, Channel, dimm, index_reg);
}
}
#if DQS_TRAIN_DEBUG > 0
for (lane = 0; lane < 8; lane++)
print_debug_dqs_pair("\t\tTrainRcvEn55: Lane ", lane, " current_total_delay ", current_total_delay[lane], 2);
#endif
}
}
/* Calculate and program MaxRdLatency */
Calc_SetMaxRdLatency_D_Fam15(pMCTstat, pDCTstat, Channel);
if(_DisableDramECC) {
mct_EnableDimmEccEn_D(pMCTstat, pDCTstat, _DisableDramECC);
}
if (Pass == FirstPass) {
/*Disable DQSRcvrEn training mode */
mct_DisableDQSRcvEn_D(pDCTstat);
}
if(!_Wrap32Dis) {
msr = HWCR;
_RDMSR(msr, &lo, &hi);
lo &= ~(1<<17); /* restore HWCR.wrap32dis */
_WRMSR(msr, lo, hi);
}
if(!_SSE2){
cr4 = read_cr4();
cr4 &= ~(1<<9); /* restore cr4.OSFXSR */
write_cr4(cr4);
}
#if DQS_TRAIN_DEBUG > 0
{
u8 ChannelDTD;
printk(BIOS_DEBUG, "TrainRcvrEn: CH_MaxRdLat:\n");
for(ChannelDTD = 0; ChannelDTD<2; ChannelDTD++) {
printk(BIOS_DEBUG, "Channel:%x: %x\n",
ChannelDTD, pDCTstat->CH_MaxRdLat[ChannelDTD]);
}
}
#endif
#if DQS_TRAIN_DEBUG > 0
{
u16 valDTD;
u8 ChannelDTD, ReceiverDTD;
u8 i;
u16 *p;
printk(BIOS_DEBUG, "TrainRcvrEn: CH_D_B_RCVRDLY:\n");
for(ChannelDTD = 0; ChannelDTD < 2; ChannelDTD++) {
printk(BIOS_DEBUG, "Channel:%x\n", ChannelDTD);
for(ReceiverDTD = 0; ReceiverDTD<8; ReceiverDTD+=2) {
printk(BIOS_DEBUG, "\t\tReceiver:%x:", ReceiverDTD);
p = pDCTstat->CH_D_B_RCVRDLY[ChannelDTD][ReceiverDTD>>1];
for (i=0;i<8; i++) {
valDTD = p[i];
printk(BIOS_DEBUG, " %03x", valDTD);
}
printk(BIOS_DEBUG, "\n");
}
}
}
#endif
printk(BIOS_DEBUG, "TrainRcvrEn: Status %x\n", pDCTstat->Status);
printk(BIOS_DEBUG, "TrainRcvrEn: ErrStatus %x\n", pDCTstat->ErrStatus);
printk(BIOS_DEBUG, "TrainRcvrEn: ErrCode %x\n", pDCTstat->ErrCode);
printk(BIOS_DEBUG, "TrainRcvrEn: Done\n\n");
}
u8 mct_InitReceiver_D(struct DCTStatStruc *pDCTstat, u8 dct)
{
if (pDCTstat->DIMMValidDCT[dct] == 0 ) {
return 8;
} else {
return 0;
}
}
static void mct_DisableDQSRcvEn_D(struct DCTStatStruc *pDCTstat)
{
u8 ch_end, ch;
u32 reg;
u32 dev;
u32 val;
dev = pDCTstat->dev_dct;
if (pDCTstat->GangedMode) {
ch_end = 1;
} else {
ch_end = 2;
}
for (ch=0; ch<ch_end; ch++) {
reg = 0x78;
val = Get_NB32_DCT(dev, ch, reg);
val &= ~(1 << DqsRcvEnTrain);
Set_NB32_DCT(dev, ch, reg, val);
}
}
/* mct_ModifyIndex_D
* Function only used once so it was inlined.
*/
/* mct_GetInitFlag_D
* Function only used once so it was inlined.
*/
/* Set F2x[1, 0]9C_x[2B:10] DRAM DQS Receiver Enable Timing Control Registers
* See BKDG Rev. 3.62 page 268 for more information
*/
void mct_SetRcvrEnDly_D(struct DCTStatStruc *pDCTstat, u16 RcvrEnDly,
u8 FinalValue, u8 Channel, u8 Receiver, u32 dev,
u32 index_reg, u8 Addl_Index, u8 Pass)
{
u32 index;
u8 i;
u16 *p;
u32 val;
if(RcvrEnDly == 0x1fe) {
/*set the boundary flag */
pDCTstat->Status |= 1 << SB_DQSRcvLimit;
}
/* DimmOffset not needed for CH_D_B_RCVRDLY array */
for(i=0; i < 8; i++) {
if(FinalValue) {
/*calculate dimm offset */
p = pDCTstat->CH_D_B_RCVRDLY[Channel][Receiver >> 1];
RcvrEnDly = p[i];
}
/* if flag=0, set DqsRcvEn value to reg. */
/* get the register index from table */
index = Table_DQSRcvEn_Offset[i >> 1];
index += Addl_Index; /* DIMMx DqsRcvEn byte0 */
val = Get_NB32_index_wait_DCT(dev, Channel, index_reg, index);
if(i & 1) {
/* odd byte lane */
val &= ~(0x1ff << 16);
val |= ((RcvrEnDly & 0x1ff) << 16);
} else {
/* even byte lane */
val &= ~0x1ff;
val |= (RcvrEnDly & 0x1ff);
}
Set_NB32_index_wait_DCT(dev, Channel, index_reg, index, val);
}
}
/* Calculate MaxRdLatency
* Algorithm detailed in the Fam10h BKDG Rev. 3.62 section 2.8.9.9.5
*/
static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u16 DQSRcvEnDly)
{
u32 dev;
u32 reg;
u32 SubTotal;
u32 index_reg;
u32 val;
uint8_t cpu_val_n;
uint8_t cpu_val_p;
u16 freq_tab[] = {400, 533, 667, 800};
/* Set up processor-dependent values */
if (pDCTstat->LogicalCPUID & AMD_DR_Dx) {
/* Revision D and above */
cpu_val_n = 4;
cpu_val_p = 29;
} else if (pDCTstat->LogicalCPUID & AMD_DR_Cx) {
/* Revision C */
uint8_t package_type = mctGet_NVbits(NV_PACK_TYPE);
if ((package_type == PT_L1) /* Socket F (1207) */
|| (package_type == PT_M2) /* Socket AM3 */
|| (package_type == PT_S1)) { /* Socket S1g<x> */
cpu_val_n = 10;
cpu_val_p = 11;
} else {
cpu_val_n = 4;
cpu_val_p = 29;
}
} else {
/* Revision B and below */
cpu_val_n = 10;
cpu_val_p = 11;
}
if(pDCTstat->GangedMode)
Channel = 0;
dev = pDCTstat->dev_dct;
index_reg = 0x98;
/* Multiply the CAS Latency by two to get a number of 1/2 MEMCLKs units.*/
val = Get_NB32_DCT(dev, Channel, 0x88);
SubTotal = ((val & 0x0f) + 4) << 1; /* SubTotal is 1/2 Memclk unit */
/* If registered DIMMs are being used then
* add 1 MEMCLK to the sub-total.
*/
val = Get_NB32_DCT(dev, Channel, 0x90);
if(!(val & (1 << UnBuffDimm)))
SubTotal += 2;
/* If the address prelaunch is setup for 1/2 MEMCLKs then
* add 1, else add 2 to the sub-total.
* if (AddrCmdSetup || CsOdtSetup || CkeSetup) then K := K + 2;
*/
val = Get_NB32_index_wait_DCT(dev, Channel, index_reg, 0x04);
if(!(val & 0x00202020))
SubTotal += 1;
else
SubTotal += 2;
/* If the F2x[1, 0]78[RdPtrInit] field is 4, 5, 6 or 7 MEMCLKs,
* then add 4, 3, 2, or 1 MEMCLKs, respectively to the sub-total. */
val = Get_NB32_DCT(dev, Channel, 0x78);
SubTotal += 8 - (val & 0x0f);
/* Convert bits 7-5 (also referred to as the coarse delay) of
* the current (or worst case) DQS receiver enable delay to
* 1/2 MEMCLKs units, rounding up, and add this to the sub-total.
*/
SubTotal += DQSRcvEnDly >> 5; /* Retrieve gross delay portion of value */
/* Add "P" to the sub-total. "P" represents part of the
* processor specific constant delay value in the DRAM
* clock domain.
*/
SubTotal <<= 1; /*scale 1/2 MemClk to 1/4 MemClk */
SubTotal += cpu_val_p; /*add "P" 1/2MemClk */
SubTotal >>= 1; /*scale 1/4 MemClk back to 1/2 MemClk */
/* Convert the sub-total (in 1/2 MEMCLKs) to northbridge
* clocks (NCLKs)
*/
SubTotal *= 200 * ((Get_NB32(pDCTstat->dev_nbmisc, 0xd4) & 0x1f) + 4);
SubTotal /= freq_tab[((Get_NB32_DCT(pDCTstat->dev_dct, Channel, 0x94) & 0x7) - 3)];
SubTotal = (SubTotal + (2 - 1)) / 2; /* Round up */
/* Add "N" NCLKs to the sub-total. "N" represents part of the
* processor specific constant value in the northbridge
* clock domain.
*/
SubTotal += (cpu_val_n) / 2;
pDCTstat->CH_MaxRdLat[Channel] = SubTotal;
if(pDCTstat->GangedMode) {
pDCTstat->CH_MaxRdLat[1] = SubTotal;
}
/* Program the F2x[1, 0]78[MaxRdLatency] register with
* the total delay value (in NCLKs).
*/
reg = 0x78;
val = Get_NB32_DCT(dev, Channel, reg);
val &= ~(0x3ff << 22);
val |= (SubTotal & 0x3ff) << 22;
/* program MaxRdLatency to correspond with current delay */
Set_NB32_DCT(dev, Channel, reg, val);
}
static void mct_InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstat)
{
/* Initialize the DQS Positions in preparation for
* Receiver Enable Training.
* Write Position is 1/2 Memclock Delay
* Read Position is 1/2 Memclock Delay
*/
u8 i;
for(i=0;i<2; i++){
InitDQSPos4RcvrEn_D(pMCTstat, pDCTstat, i);
}
}
static void InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstat, u8 Channel)
{
/* Initialize the DQS Positions in preparation for
* Receiver Enable Training.
* Write Position is no Delay
* Read Position is 1/2 Memclock Delay
*/
u8 i, j;
u32 dword;
u8 dn = 4; /* TODO: Rev C could be 4 */
u32 dev = pDCTstat->dev_dct;
u32 index_reg = 0x98;
/* FIXME: add Cx support */
dword = 0x00000000;
for(i=1; i<=3; i++) {
for(j=0; j<dn; j++)
/* DIMM0 Write Data Timing Low */
/* DIMM0 Write ECC Timing */
Set_NB32_index_wait_DCT(dev, Channel, index_reg, i + 0x100 * j, dword);
}
/* errata #180 */
dword = 0x2f2f2f2f;
for(i=5; i<=6; i++) {
for(j=0; j<dn; j++)
/* DIMM0 Read DQS Timing Control Low */
Set_NB32_index_wait_DCT(dev, Channel, index_reg, i + 0x100 * j, dword);
}
dword = 0x0000002f;
for(j=0; j<dn; j++)
/* DIMM0 Read DQS ECC Timing Control */
Set_NB32_index_wait_DCT(dev, Channel, index_reg, 7 + 0x100 * j, dword);
}
void SetEccDQSRcvrEn_D(struct DCTStatStruc *pDCTstat, u8 Channel)
{
u32 dev;
u32 index_reg;
u32 index;
u8 ChipSel;
u16 *p;
u32 val;
dev = pDCTstat->dev_dct;
index_reg = 0x98;
index = 0x12;
p = pDCTstat->CH_D_BC_RCVRDLY[Channel];
print_debug_dqs("\t\tSetEccDQSRcvrPos: Channel ", Channel, 2);
for(ChipSel = 0; ChipSel < MAX_CS_SUPPORTED; ChipSel += 2) {
val = p[ChipSel>>1];
Set_NB32_index_wait_DCT(dev, Channel, index_reg, index, val);
print_debug_dqs_pair("\t\tSetEccDQSRcvrPos: ChipSel ",
ChipSel, " rcvr_delay ", val, 2);
index += 3;
}
}
static void CalcEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstat, u8 Channel)
{
u8 ChipSel;
u16 EccDQSLike;
u8 EccDQSScale;
u32 val, val0, val1;
EccDQSLike = pDCTstat->CH_EccDQSLike[Channel];
EccDQSScale = pDCTstat->CH_EccDQSScale[Channel];
for (ChipSel = 0; ChipSel < MAX_CS_SUPPORTED; ChipSel += 2) {
if(mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, ChipSel)) {
u16 *p;
p = pDCTstat->CH_D_B_RCVRDLY[Channel][ChipSel>>1];
/* DQS Delay Value of Data Bytelane
* most like ECC byte lane */
val0 = p[EccDQSLike & 0x07];
/* DQS Delay Value of Data Bytelane
* 2nd most like ECC byte lane */
val1 = p[(EccDQSLike>>8) & 0x07];
if (!(pDCTstat->Status & (1 << SB_Registered))) {
if(val0 > val1) {
val = val0 - val1;
} else {
val = val1 - val0;
}
val *= ~EccDQSScale;
val >>= 8; /* /256 */
if(val0 > val1) {
val -= val1;
} else {
val += val0;
}
} else {
val = val1 - val0;
val += val1;
}
pDCTstat->CH_D_BC_RCVRDLY[Channel][ChipSel>>1] = val;
}
}
SetEccDQSRcvrEn_D(pDCTstat, Channel);
}
/* 2.8.9.9.4
* ECC Byte Lane Training
* DQS Receiver Enable Delay
*/
void mctSetEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstatA)
{
u8 Node;
u8 i;
for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
struct DCTStatStruc *pDCTstat;
pDCTstat = pDCTstatA + Node;
if (!pDCTstat->NodePresent)
break;
if (pDCTstat->DCTSysLimit) {
for(i=0; i<2; i++)
CalcEccDQSRcvrEn_D(pMCTstat, pDCTstat, i);
}
}
}
void phyAssistedMemFnceTraining(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstatA)
{
u8 Node = 0;
struct DCTStatStruc *pDCTstat;
printk(BIOS_DEBUG, "%s: Start\n", __func__);
/* FIXME: skip for Ax */
for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
pDCTstat = pDCTstatA + Node;
if (!pDCTstat->NodePresent)
continue;
if (pDCTstat->DCTSysLimit) {
if (is_fam15h()) {
/* Fam15h BKDG v3.14 section 2.10.5.3.3
* This picks up where InitDDRPhy left off
*/
uint8_t dct;
uint8_t index;
uint32_t dword;
uint32_t datc_backup;
uint32_t training_dword;
uint32_t fence2_config_dword;
uint32_t fence_tx_pad_config_dword;
uint32_t index_reg = 0x98;
uint32_t dev = pDCTstat->dev_dct;
for (dct = 0; dct < 2; dct++) {
if (!pDCTstat->DIMMValidDCT[dct])
continue;
/* Back up D18F2x9C_x0000_0004_dct[1:0] */
datc_backup = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000004);
/* FenceTrSel = 0x2 */
dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000008);
dword &= ~(0x3 << 6);
dword |= (0x2 << 6);
Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000008, dword);
/* Set phase recovery seed values */
Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000050, 0x13131313);
Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000051, 0x13131313);
Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000052, 0x00000013);
training_dword = fenceDynTraining_D(pMCTstat, pDCTstat, dct);
/* Save calculated fence value to the TX DLL */
dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0000000c);
dword &= ~(0x1f << 26);
dword |= ((training_dword & 0x1f) << 26);
Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0000000c, dword);
/* D18F2x9C_x0D0F_0[F,8:0]0F_dct[1:0][AlwaysEnDllClks]=0x1 */
for (index = 0; index < 0x9; index++) {
dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f000f | (index << 8));
dword &= ~(0x7 << 12);
dword |= (0x1 << 12);
Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f000f | (index << 8), dword);
}
/* FenceTrSel = 0x1 */
dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000008);
dword &= ~(0x3 << 6);
dword |= (0x1 << 6);
Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000008, dword);
/* Set phase recovery seed values */
Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000050, 0x13131313);
Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000051, 0x13131313);
Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000052, 0x00000013);
training_dword = fenceDynTraining_D(pMCTstat, pDCTstat, dct);
/* Save calculated fence value to the RX DLL */
dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0000000c);
dword &= ~(0x1f << 21);
dword |= ((training_dword & 0x1f) << 21);
Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0000000c, dword);
/* D18F2x9C_x0D0F_0[F,8:0]0F_dct[1:0][AlwaysEnDllClks]=0x0 */
for (index = 0; index < 0x9; index++) {
dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f000f | (index << 8));
dword &= ~(0x7 << 12);
Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f000f | (index << 8), dword);
}
/* FenceTrSel = 0x3 */
dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000008);
dword &= ~(0x3 << 6);
dword |= (0x3 << 6);
Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000008, dword);
/* Set phase recovery seed values */
Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000050, 0x13131313);
Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000051, 0x13131313);
Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000052, 0x00000013);
fence_tx_pad_config_dword = fenceDynTraining_D(pMCTstat, pDCTstat, dct);
/* Save calculated fence value to the TX Pad */
dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0000000c);
dword &= ~(0x1f << 16);
dword |= ((fence_tx_pad_config_dword & 0x1f) << 16);
Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0000000c, dword);
/* Program D18F2x9C_x0D0F_[C,8,2][2:0]31_dct[1:0] */
training_dword = fence_tx_pad_config_dword;
if (fence_tx_pad_config_dword < 16)
training_dword |= (0x1 << 4);
else
training_dword = 0;
for (index = 0; index < 0x3; index++) {
dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f2031 | (index << 8));
dword &= ~(0x1f);
dword |= (training_dword & 0x1f);
Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f2031 | (index << 8), dword);
}
for (index = 0; index < 0x3; index++) {
dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f8031 | (index << 8));
dword &= ~(0x1f);
dword |= (training_dword & 0x1f);
Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f8031 | (index << 8), dword);
}
for (index = 0; index < 0x3; index++) {
dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0fc031 | (index << 8));
dword &= ~(0x1f);
dword |= (training_dword & 0x1f);
Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0fc031 | (index << 8), dword);
}
/* Assemble Fence2 configuration word (Fam15h BKDG v3.14 page 331) */
dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0000000c);
fence2_config_dword = 0;
/* TxPad */
training_dword = (dword >> 16) & 0x1f;
if (training_dword < 16)
training_dword |= 0x10;
else
training_dword = 0;
fence2_config_dword |= training_dword;
/* RxDll */
training_dword = (dword >> 21) & 0x1f;
if (training_dword < 16)
training_dword |= 0x10;
else
training_dword = 0;
fence2_config_dword |= (training_dword << 10);
/* TxDll */
training_dword = (dword >> 26) & 0x1f;
if (training_dword < 16)
training_dword |= 0x10;
else
training_dword = 0;
fence2_config_dword |= (training_dword << 5);
/* Program D18F2x9C_x0D0F_0[F,8:0]31_dct[1:0] */
for (index = 0; index < 0x9; index++) {
dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f0031 | (index << 8));
dword &= ~(0x7fff);
dword |= fence2_config_dword;
Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f0031 | (index << 8), dword);
}
/* Restore D18F2x9C_x0000_0004_dct[1:0] */
Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000004, datc_backup);
}
} else {
fenceDynTraining_D(pMCTstat, pDCTstat, 0);
fenceDynTraining_D(pMCTstat, pDCTstat, 1);
}
}
}
printk(BIOS_DEBUG, "%s: Done\n", __func__);
}
static uint32_t fenceDynTraining_D(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstat, u8 dct)
{
u16 avRecValue;
u32 val;
u32 dev;
u32 index_reg = 0x98;
u32 index;
dev = pDCTstat->dev_dct;
if (is_fam15h()) {
/* Set F2x[1,0]9C_x08[PhyFenceTrEn] */
val = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x08);
val |= 1 << PhyFenceTrEn;
Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x08, val);
/* Wait 2000 MEMCLKs */
precise_memclk_delay_fam15(pMCTstat, pDCTstat, dct, 2000);
/* Clear F2x[1,0]9C_x08[PhyFenceTrEn] */
val = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x08);
val &= ~(1 << PhyFenceTrEn);
Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x08, val);
/* BIOS reads the phase recovery engine registers
* F2x[1,0]9C_x[51:50] and F2x[1,0]9C_x52.
* Average the fine delay components only.
*/
avRecValue = 0;
for (index = 0x50; index <= 0x52; index++) {
val = Get_NB32_index_wait_DCT(dev, dct, index_reg, index);
avRecValue += val & 0x1f;
if (index != 0x52) {
avRecValue += (val >> 8) & 0x1f;
avRecValue += (val >> 16) & 0x1f;
avRecValue += (val >> 24) & 0x1f;
}
}
val = avRecValue / 9;
if (avRecValue % 9)
val++;
avRecValue = val;
if (avRecValue < 6)
avRecValue = 0;
else
avRecValue -= 6;
return avRecValue;
} else {
/* BIOS first programs a seed value to the phase recovery engine
* (recommended 19) registers.
* Dram Phase Recovery Control Register (F2x[1,0]9C_x[51:50] and
* F2x[1,0]9C_x52.) .
*/
for (index = 0x50; index <= 0x52; index ++) {
val = (FenceTrnFinDlySeed & 0x1F);
if (index != 0x52) {
val |= val << 8 | val << 16 | val << 24;
}
Set_NB32_index_wait_DCT(dev, dct, index_reg, index, val);
}
/* Set F2x[1,0]9C_x08[PhyFenceTrEn]=1. */
val = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x08);
val |= 1 << PhyFenceTrEn;
Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x08, val);
/* Wait 200 MEMCLKs. */
mct_Wait(50000); /* wait 200us */
/* Clear F2x[1,0]9C_x08[PhyFenceTrEn]=0. */
val = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x08);
val &= ~(1 << PhyFenceTrEn);
Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x08, val);
/* BIOS reads the phase recovery engine registers
* F2x[1,0]9C_x[51:50] and F2x[1,0]9C_x52. */
avRecValue = 0;
for (index = 0x50; index <= 0x52; index ++) {
val = Get_NB32_index_wait_DCT(dev, dct, index_reg, index);
avRecValue += val & 0x7F;
if (index != 0x52) {
avRecValue += (val >> 8) & 0x7F;
avRecValue += (val >> 16) & 0x7F;
avRecValue += (val >> 24) & 0x7F;
}
}
val = avRecValue / 9;
if (avRecValue % 9)
val++;
avRecValue = val;
/* Write the (averaged value -8) to F2x[1,0]9C_x0C[PhyFence]. */
/* inlined mct_AdjustFenceValue() */
/* TODO: The RBC0 is not supported. */
/* if (pDCTstat->LogicalCPUID & AMD_RB_C0)
avRecValue -= 3;
else
*/
if (pDCTstat->LogicalCPUID & AMD_DR_Dx)
avRecValue -= 8;
else if (pDCTstat->LogicalCPUID & AMD_DR_Cx)
avRecValue -= 8;
else if (pDCTstat->LogicalCPUID & AMD_DR_Bx)
avRecValue -= 8;
val = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0C);
val &= ~(0x1F << 16);
val |= (avRecValue & 0x1F) << 16;
Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0C, val);
/* Rewrite F2x[1,0]9C_x04-DRAM Address/Command Timing Control Register
* delays (both channels).
*/
val = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x04);
Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x04, val);
return avRecValue;
}
}
void mct_Wait(u32 cycles)
{
u32 saved;
u32 hi, lo, msr;
/* Wait # of 50ns cycles
This seems like a hack to me... */
cycles <<= 3; /* x8 (number of 1.25ns ticks) */
msr = 0x10; /* TSC */
_RDMSR(msr, &lo, &hi);
saved = lo;
do {
_RDMSR(msr, &lo, &hi);
} while (lo - saved < cycles );
}