| /* |
| * This file is part of the coreboot project. |
| * |
| * Copyright (C) 2010 Advanced Micro Devices, Inc. |
| * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering |
| * |
| * This program is free software; you can redistribute it and/or modify |
| * it under the terms of the GNU General Public License as published by |
| * the Free Software Foundation; version 2 of the License. |
| * |
| * This program is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| * GNU General Public License for more details. |
| */ |
| |
| static void CalcEccDQSPos_D(struct MCTStatStruc *pMCTstat, |
| struct DCTStatStruc *pDCTstat, u16 like, |
| u8 scale, u8 ChipSel); |
| static void GetDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat, |
| struct DCTStatStruc *pDCTstat, u8 ChipSel); |
| static void WriteDQSTestPattern_D(struct MCTStatStruc *pMCTstat, |
| struct DCTStatStruc *pDCTstat, |
| u32 TestAddr_lo); |
| static void WriteL18TestPattern_D(struct DCTStatStruc *pDCTstat, |
| u32 TestAddr_lo); |
| static void WriteL9TestPattern_D(struct DCTStatStruc *pDCTstat, |
| u32 TestAddr_lo); |
| static u16 CompareDQSTestPattern_D(struct MCTStatStruc *pMCTstat, |
| struct DCTStatStruc *pDCTstat, |
| u32 addr_lo); |
| static void FlushDQSTestPattern_D(struct DCTStatStruc *pDCTstat, |
| u32 addr_lo); |
| static void SetTargetWTIO_D(u32 TestAddr); |
| static void ResetTargetWTIO_D(void); |
| void ResetDCTWrPtr_D(u32 dev, u32 index_reg, u32 index); |
| u8 mct_DisableDimmEccEn_D(struct MCTStatStruc *pMCTstat, |
| struct DCTStatStruc *pDCTstat); |
| static void mct_SetDQSDelayCSR_D(struct MCTStatStruc *pMCTstat, |
| struct DCTStatStruc *pDCTstat, |
| u8 ChipSel); |
| u32 mct_GetMCTSysAddr_D(struct MCTStatStruc *pMCTstat, |
| struct DCTStatStruc *pDCTstat, u8 Channel, |
| u8 receiver, u8 *valid); |
| static void SetupDqsPattern_D(struct MCTStatStruc *pMCTstat, |
| struct DCTStatStruc *pDCTstat, |
| u32 *buffer); |
| static void proc_IOCLFLUSH_D(u32 addr_hi); |
| |
| static void StoreDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 ChipSel); |
| |
| #define DQS_TRAIN_DEBUG 0 |
| |
| static void print_debug_dqs(const char *str, u32 val, u8 level) |
| { |
| #if DQS_TRAIN_DEBUG > 0 |
| if (DQS_TRAIN_DEBUG >= level) { |
| printk(BIOS_DEBUG, "%s%x\n", str, val); |
| } |
| #endif |
| } |
| |
| static void print_debug_dqs_pair(const char *str, u32 val, const char *str2, u32 val2, u8 level) |
| { |
| #if DQS_TRAIN_DEBUG > 0 |
| if (DQS_TRAIN_DEBUG >= level) { |
| printk(BIOS_DEBUG, "%s%08x%s%08x\n", str, val, str2, val2); |
| } |
| #endif |
| } |
| |
| /*Warning: These must be located so they do not cross a logical 16-bit segment boundary!*/ |
| static const u32 TestPatternJD1a_D[] = { |
| 0x00000000,0x00000000,0xFFFFFFFF,0xFFFFFFFF, /* QW0-1, ALL-EVEN */ |
| 0x00000000,0x00000000,0x00000000,0x00000000, /* QW2-3, ALL-EVEN */ |
| 0x00000000,0x00000000,0xFFFFFFFF,0xFFFFFFFF, /* QW4-5, ALL-EVEN */ |
| 0x00000000,0x00000000,0x00000000,0x00000000, /* QW6-7, ALL-EVEN */ |
| 0xFeFeFeFe,0xFeFeFeFe,0x01010101,0x01010101, /* QW0-1, DQ0-ODD */ |
| 0xFeFeFeFe,0xFeFeFeFe,0x01010101,0x01010101, /* QW2-3, DQ0-ODD */ |
| 0x01010101,0x01010101,0xFeFeFeFe,0xFeFeFeFe, /* QW4-5, DQ0-ODD */ |
| 0xFeFeFeFe,0xFeFeFeFe,0x01010101,0x01010101, /* QW6-7, DQ0-ODD */ |
| 0x02020202,0x02020202,0x02020202,0x02020202, /* QW0-1, DQ1-ODD */ |
| 0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, /* QW2-3, DQ1-ODD */ |
| 0xFdFdFdFd,0xFdFdFdFd,0x02020202,0x02020202, /* QW4-5, DQ1-ODD */ |
| 0x02020202,0x02020202,0x02020202,0x02020202, /* QW6-7, DQ1-ODD */ |
| 0x04040404,0x04040404,0xfBfBfBfB,0xfBfBfBfB, /* QW0-1, DQ2-ODD */ |
| 0x04040404,0x04040404,0x04040404,0x04040404, /* QW2-3, DQ2-ODD */ |
| 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, /* QW4-5, DQ2-ODD */ |
| 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, /* QW6-7, DQ2-ODD */ |
| 0x08080808,0x08080808,0xF7F7F7F7,0xF7F7F7F7, /* QW0-1, DQ3-ODD */ |
| 0x08080808,0x08080808,0x08080808,0x08080808, /* QW2-3, DQ3-ODD */ |
| 0xF7F7F7F7,0xF7F7F7F7,0x08080808,0x08080808, /* QW4-5, DQ3-ODD */ |
| 0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, /* QW6-7, DQ3-ODD */ |
| 0x10101010,0x10101010,0x10101010,0x10101010, /* QW0-1, DQ4-ODD */ |
| 0xeFeFeFeF,0xeFeFeFeF,0x10101010,0x10101010, /* QW2-3, DQ4-ODD */ |
| 0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, /* QW4-5, DQ4-ODD */ |
| 0xeFeFeFeF,0xeFeFeFeF,0x10101010,0x10101010, /* QW6-7, DQ4-ODD */ |
| 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, /* QW0-1, DQ5-ODD */ |
| 0xdFdFdFdF,0xdFdFdFdF,0x20202020,0x20202020, /* QW2-3, DQ5-ODD */ |
| 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, /* QW4-5, DQ5-ODD */ |
| 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, /* QW6-7, DQ5-ODD */ |
| 0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, /* QW0-1, DQ6-ODD */ |
| 0x40404040,0x40404040,0xBfBfBfBf,0xBfBfBfBf, /* QW2-3, DQ6-ODD */ |
| 0x40404040,0x40404040,0xBfBfBfBf,0xBfBfBfBf, /* QW4-5, DQ6-ODD */ |
| 0x40404040,0x40404040,0xBfBfBfBf,0xBfBfBfBf, /* QW6-7, DQ6-ODD */ |
| 0x80808080,0x80808080,0x7F7F7F7F,0x7F7F7F7F, /* QW0-1, DQ7-ODD */ |
| 0x80808080,0x80808080,0x7F7F7F7F,0x7F7F7F7F, /* QW2-3, DQ7-ODD */ |
| 0x80808080,0x80808080,0x7F7F7F7F,0x7F7F7F7F, /* QW4-5, DQ7-ODD */ |
| 0x80808080,0x80808080,0x80808080,0x80808080 /* QW6-7, DQ7-ODD */ |
| }; |
| static const u32 TestPatternJD1b_D[] = { |
| 0x00000000,0x00000000,0x00000000,0x00000000, /* QW0,CHA-B, ALL-EVEN */ |
| 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, /* QW1,CHA-B, ALL-EVEN */ |
| 0x00000000,0x00000000,0x00000000,0x00000000, /* QW2,CHA-B, ALL-EVEN */ |
| 0x00000000,0x00000000,0x00000000,0x00000000, /* QW3,CHA-B, ALL-EVEN */ |
| 0x00000000,0x00000000,0x00000000,0x00000000, /* QW4,CHA-B, ALL-EVEN */ |
| 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, /* QW5,CHA-B, ALL-EVEN */ |
| 0x00000000,0x00000000,0x00000000,0x00000000, /* QW6,CHA-B, ALL-EVEN */ |
| 0x00000000,0x00000000,0x00000000,0x00000000, /* QW7,CHA-B, ALL-EVEN */ |
| 0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, /* QW0,CHA-B, DQ0-ODD */ |
| 0x01010101,0x01010101,0x01010101,0x01010101, /* QW1,CHA-B, DQ0-ODD */ |
| 0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, /* QW2,CHA-B, DQ0-ODD */ |
| 0x01010101,0x01010101,0x01010101,0x01010101, /* QW3,CHA-B, DQ0-ODD */ |
| 0x01010101,0x01010101,0x01010101,0x01010101, /* QW4,CHA-B, DQ0-ODD */ |
| 0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, /* QW5,CHA-B, DQ0-ODD */ |
| 0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, /* QW6,CHA-B, DQ0-ODD */ |
| 0x01010101,0x01010101,0x01010101,0x01010101, /* QW7,CHA-B, DQ0-ODD */ |
| 0x02020202,0x02020202,0x02020202,0x02020202, /* QW0,CHA-B, DQ1-ODD */ |
| 0x02020202,0x02020202,0x02020202,0x02020202, /* QW1,CHA-B, DQ1-ODD */ |
| 0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, /* QW2,CHA-B, DQ1-ODD */ |
| 0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, /* QW3,CHA-B, DQ1-ODD */ |
| 0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, /* QW4,CHA-B, DQ1-ODD */ |
| 0x02020202,0x02020202,0x02020202,0x02020202, /* QW5,CHA-B, DQ1-ODD */ |
| 0x02020202,0x02020202,0x02020202,0x02020202, /* QW6,CHA-B, DQ1-ODD */ |
| 0x02020202,0x02020202,0x02020202,0x02020202, /* QW7,CHA-B, DQ1-ODD */ |
| 0x04040404,0x04040404,0x04040404,0x04040404, /* QW0,CHA-B, DQ2-ODD */ |
| 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, /* QW1,CHA-B, DQ2-ODD */ |
| 0x04040404,0x04040404,0x04040404,0x04040404, /* QW2,CHA-B, DQ2-ODD */ |
| 0x04040404,0x04040404,0x04040404,0x04040404, /* QW3,CHA-B, DQ2-ODD */ |
| 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, /* QW4,CHA-B, DQ2-ODD */ |
| 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, /* QW5,CHA-B, DQ2-ODD */ |
| 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, /* QW6,CHA-B, DQ2-ODD */ |
| 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, /* QW7,CHA-B, DQ2-ODD */ |
| 0x08080808,0x08080808,0x08080808,0x08080808, /* QW0,CHA-B, DQ3-ODD */ |
| 0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, /* QW1,CHA-B, DQ3-ODD */ |
| 0x08080808,0x08080808,0x08080808,0x08080808, /* QW2,CHA-B, DQ3-ODD */ |
| 0x08080808,0x08080808,0x08080808,0x08080808, /* QW3,CHA-B, DQ3-ODD */ |
| 0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, /* QW4,CHA-B, DQ3-ODD */ |
| 0x08080808,0x08080808,0x08080808,0x08080808, /* QW5,CHA-B, DQ3-ODD */ |
| 0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, /* QW6,CHA-B, DQ3-ODD */ |
| 0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, /* QW7,CHA-B, DQ3-ODD */ |
| 0x10101010,0x10101010,0x10101010,0x10101010, /* QW0,CHA-B, DQ4-ODD */ |
| 0x10101010,0x10101010,0x10101010,0x10101010, /* QW1,CHA-B, DQ4-ODD */ |
| 0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, /* QW2,CHA-B, DQ4-ODD */ |
| 0x10101010,0x10101010,0x10101010,0x10101010, /* QW3,CHA-B, DQ4-ODD */ |
| 0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, /* QW4,CHA-B, DQ4-ODD */ |
| 0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, /* QW5,CHA-B, DQ4-ODD */ |
| 0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, /* QW6,CHA-B, DQ4-ODD */ |
| 0x10101010,0x10101010,0x10101010,0x10101010, /* QW7,CHA-B, DQ4-ODD */ |
| 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, /* QW0,CHA-B, DQ5-ODD */ |
| 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, /* QW1,CHA-B, DQ5-ODD */ |
| 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, /* QW2,CHA-B, DQ5-ODD */ |
| 0x20202020,0x20202020,0x20202020,0x20202020, /* QW3,CHA-B, DQ5-ODD */ |
| 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, /* QW4,CHA-B, DQ5-ODD */ |
| 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, /* QW5,CHA-B, DQ5-ODD */ |
| 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, /* QW6,CHA-B, DQ5-ODD */ |
| 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, /* QW7,CHA-B, DQ5-ODD */ |
| 0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, /* QW0,CHA-B, DQ6-ODD */ |
| 0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, /* QW1,CHA-B, DQ6-ODD */ |
| 0x40404040,0x40404040,0x40404040,0x40404040, /* QW2,CHA-B, DQ6-ODD */ |
| 0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, /* QW3,CHA-B, DQ6-ODD */ |
| 0x40404040,0x40404040,0x40404040,0x40404040, /* QW4,CHA-B, DQ6-ODD */ |
| 0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, /* QW5,CHA-B, DQ6-ODD */ |
| 0x40404040,0x40404040,0x40404040,0x40404040, /* QW6,CHA-B, DQ6-ODD */ |
| 0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, /* QW7,CHA-B, DQ6-ODD */ |
| 0x80808080,0x80808080,0x80808080,0x80808080, /* QW0,CHA-B, DQ7-ODD */ |
| 0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F, /* QW1,CHA-B, DQ7-ODD */ |
| 0x80808080,0x80808080,0x80808080,0x80808080, /* QW2,CHA-B, DQ7-ODD */ |
| 0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F, /* QW3,CHA-B, DQ7-ODD */ |
| 0x80808080,0x80808080,0x80808080,0x80808080, /* QW4,CHA-B, DQ7-ODD */ |
| 0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F, /* QW5,CHA-B, DQ7-ODD */ |
| 0x80808080,0x80808080,0x80808080,0x80808080, /* QW6,CHA-B, DQ7-ODD */ |
| 0x80808080,0x80808080,0x80808080,0x80808080 /* QW7,CHA-B, DQ7-ODD */ |
| }; |
| |
| void TrainReceiverEn_D(struct MCTStatStruc *pMCTstat, |
| struct DCTStatStruc *pDCTstatA, u8 Pass) |
| { |
| u8 Node; |
| struct DCTStatStruc *pDCTstat; |
| u32 val; |
| |
| for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) { |
| pDCTstat = pDCTstatA + Node; |
| |
| if (pDCTstat->DCTSysLimit) { |
| val = Get_NB32(pDCTstat->dev_dct, 0x78); |
| val |= 1 <<DqsRcvEnTrain; |
| Set_NB32(pDCTstat->dev_dct, 0x78, val); |
| val = Get_NB32(pDCTstat->dev_dct, 0x78 + 0x100); |
| val |= 1 <<DqsRcvEnTrain; |
| Set_NB32(pDCTstat->dev_dct, 0x78 + 0x100, val); |
| mct_TrainRcvrEn_D(pMCTstat, pDCTstat, Pass); |
| } |
| } |
| } |
| |
| static void SetEccDQSRdWrPos_D(struct MCTStatStruc *pMCTstat, |
| struct DCTStatStruc *pDCTstat, u8 ChipSel) |
| { |
| u8 channel; |
| u8 direction; |
| |
| for (channel = 0; channel < 2; channel++){ |
| for (direction = 0; direction < 2; direction++) { |
| pDCTstat->Channel = channel; /* Channel A or B */ |
| pDCTstat->Direction = direction; /* Read or write */ |
| CalcEccDQSPos_D(pMCTstat, pDCTstat, pDCTstat->CH_EccDQSLike[channel], pDCTstat->CH_EccDQSScale[channel], ChipSel); |
| print_debug_dqs_pair("\t\tSetEccDQSRdWrPos: channel ", channel, direction==DQS_READDIR? " R dqs_delay":" W dqs_delay", pDCTstat->DQSDelay, 2); |
| pDCTstat->ByteLane = 8; |
| StoreDQSDatStrucVal_D(pMCTstat, pDCTstat, ChipSel); |
| mct_SetDQSDelayCSR_D(pMCTstat, pDCTstat, ChipSel); |
| } |
| } |
| } |
| |
| static void CalcEccDQSPos_D(struct MCTStatStruc *pMCTstat, |
| struct DCTStatStruc *pDCTstat, |
| u16 like, u8 scale, u8 ChipSel) |
| { |
| u8 DQSDelay0, DQSDelay1; |
| u16 DQSDelay; |
| |
| if (pDCTstat->Status & (1 << SB_Registered)) { |
| return; |
| } |
| |
| pDCTstat->ByteLane = like & 0xff; |
| GetDQSDatStrucVal_D(pMCTstat, pDCTstat, ChipSel); |
| DQSDelay0 = pDCTstat->DQSDelay; |
| |
| pDCTstat->ByteLane = (like >> 8) & 0xff; |
| GetDQSDatStrucVal_D(pMCTstat, pDCTstat, ChipSel); |
| DQSDelay1 = pDCTstat->DQSDelay; |
| |
| if (DQSDelay0>DQSDelay1) { |
| DQSDelay = DQSDelay0 - DQSDelay1; |
| } else { |
| DQSDelay = DQSDelay1 - DQSDelay0; |
| } |
| |
| DQSDelay = DQSDelay * (~scale); |
| |
| DQSDelay += 0x80; /* round it */ |
| |
| DQSDelay >>= 8; /* 256 */ |
| |
| if (DQSDelay0>DQSDelay1) { |
| DQSDelay = DQSDelay1 - DQSDelay; |
| } else { |
| DQSDelay += DQSDelay1; |
| } |
| |
| pDCTstat->DQSDelay = (u8)DQSDelay; |
| } |
| |
| static void write_dqs_write_data_timing_registers(uint16_t* delay, uint32_t dev, uint8_t dimm, uint32_t index_reg) |
| { |
| uint32_t dword; |
| |
| /* Lanes 0 - 3 */ |
| dword = Get_NB32_index_wait(dev, index_reg, 0x1 | (dimm << 8)); |
| dword &= ~0x7f7f7f7f; |
| dword |= (delay[3] & 0x7f) << 24; |
| dword |= (delay[2] & 0x7f) << 16; |
| dword |= (delay[1] & 0x7f) << 8; |
| dword |= delay[0] & 0x7f; |
| Set_NB32_index_wait(dev, index_reg, 0x1 | (dimm << 8), dword); |
| |
| /* Lanes 4 - 7 */ |
| dword = Get_NB32_index_wait(dev, index_reg, 0x2 | (dimm << 8)); |
| dword &= ~0x7f7f7f7f; |
| dword |= (delay[7] & 0x7f) << 24; |
| dword |= (delay[6] & 0x7f) << 16; |
| dword |= (delay[5] & 0x7f) << 8; |
| dword |= delay[4] & 0x7f; |
| Set_NB32_index_wait(dev, index_reg, 0x2 | (dimm << 8), dword); |
| |
| /* Lane 8 (ECC) */ |
| dword = Get_NB32_index_wait(dev, index_reg, 0x3 | (dimm << 8)); |
| dword &= ~0x0000007f; |
| dword |= delay[8] & 0x7f; |
| Set_NB32_index_wait(dev, index_reg, 0x3 | (dimm << 8), dword); |
| } |
| |
| static void write_dqs_read_data_timing_registers(uint16_t* delay, uint32_t dev, uint8_t dimm, uint32_t index_reg) |
| { |
| uint32_t dword; |
| |
| /* Lanes 0 - 3 */ |
| dword = Get_NB32_index_wait(dev, index_reg, 0x5 | (dimm << 8)); |
| dword &= ~0x3f3f3f3f; |
| dword |= (delay[3] & 0x3f) << 24; |
| dword |= (delay[2] & 0x3f) << 16; |
| dword |= (delay[1] & 0x3f) << 8; |
| dword |= delay[0] & 0x3f; |
| Set_NB32_index_wait(dev, index_reg, 0x5 | (dimm << 8), dword); |
| |
| /* Lanes 4 - 7 */ |
| dword = Get_NB32_index_wait(dev, index_reg, 0x6 | (dimm << 8)); |
| dword &= ~0x3f3f3f3f; |
| dword |= (delay[7] & 0x3f) << 24; |
| dword |= (delay[6] & 0x3f) << 16; |
| dword |= (delay[5] & 0x3f) << 8; |
| dword |= delay[4] & 0x3f; |
| Set_NB32_index_wait(dev, index_reg, 0x6 | (dimm << 8), dword); |
| |
| /* Lane 8 (ECC) */ |
| dword = Get_NB32_index_wait(dev, index_reg, 0x7 | (dimm << 8)); |
| dword &= ~0x0000003f; |
| dword |= delay[8] & 0x3f; |
| Set_NB32_index_wait(dev, index_reg, 0x7 | (dimm << 8), dword); |
| } |
| |
| /* DQS Position Training |
| * Algorithm detailed in the Fam10h BKDG Rev. 3.62 section 2.8.9.9.3 |
| */ |
| static void TrainDQSRdWrPos_D(struct MCTStatStruc *pMCTstat, |
| struct DCTStatStruc *pDCTstat) |
| { |
| u32 Errors; |
| u8 Channel; |
| u8 Receiver; |
| u8 _DisableDramECC = 0; |
| u32 PatternBuffer[304]; /* 288 + 16 */ |
| u8 _Wrap32Dis = 0, _SSE2 = 0; |
| |
| u32 dev; |
| u32 addr; |
| u8 valid; |
| u32 cr4; |
| u32 lo, hi; |
| u32 index_reg; |
| uint32_t TestAddr; |
| |
| uint8_t dual_rank; |
| uint8_t iter; |
| uint8_t lane; |
| uint16_t bytelane_test_results; |
| uint16_t current_write_dqs_delay[MAX_BYTE_LANES]; |
| uint16_t current_read_dqs_delay[MAX_BYTE_LANES]; |
| uint16_t write_dqs_delay_stepping_done[MAX_BYTE_LANES]; |
| uint8_t dqs_read_results_array[2][MAX_BYTE_LANES][64]; /* [rank][lane][step] */ |
| uint8_t dqs_write_results_array[2][MAX_BYTE_LANES][128]; /* [rank][lane][step] */ |
| |
| uint8_t last_pos = 0; |
| uint8_t cur_count = 0; |
| uint8_t best_pos = 0; |
| uint8_t best_count = 0; |
| |
| print_debug_dqs("\nTrainDQSRdWrPos: Node_ID ", pDCTstat->Node_ID, 0); |
| cr4 = read_cr4(); |
| if (cr4 & (1<<9)) { |
| _SSE2 = 1; |
| } |
| cr4 |= (1<<9); /* OSFXSR enable SSE2 */ |
| write_cr4(cr4); |
| |
| addr = HWCR; |
| _RDMSR(addr, &lo, &hi); |
| if (lo & (1<<17)) { |
| _Wrap32Dis = 1; |
| } |
| lo |= (1<<17); /* HWCR.wrap32dis */ |
| _WRMSR(addr, lo, hi); /* allow 64-bit memory references in real mode */ |
| |
| /* Disable ECC correction of reads on the dram bus. */ |
| _DisableDramECC = mct_DisableDimmEccEn_D(pMCTstat, pDCTstat); |
| |
| SetupDqsPattern_D(pMCTstat, pDCTstat, PatternBuffer); |
| |
| /* mct_BeforeTrainDQSRdWrPos_D */ |
| |
| dev = pDCTstat->dev_dct; |
| pDCTstat->Direction = DQS_READDIR; |
| |
| /* 2.8.9.9.3 (2) |
| * Loop over each channel, lane, and rank |
| */ |
| |
| /* NOTE |
| * The BKDG originally stated to iterate over lane, then rank, however this process is quite slow |
| * compared to an equivalent loop over rank, then lane as the latter allows multiple lanes to be |
| * tested simultaneously, thus improving performance by around 8x. |
| */ |
| |
| Errors = 0; |
| for (Channel = 0; Channel < 2; Channel++) { |
| print_debug_dqs("\tTrainDQSRdWrPos: 1 Channel ", Channel, 1); |
| pDCTstat->Channel = Channel; |
| |
| if (pDCTstat->DIMMValidDCT[Channel] == 0) /* mct_BeforeTrainDQSRdWrPos_D */ |
| continue; |
| |
| index_reg = 0x98 + 0x100 * Channel; |
| |
| dual_rank = 0; |
| Receiver = mct_InitReceiver_D(pDCTstat, Channel); |
| /* There are four receiver pairs, loosely associated with chipselects. |
| * This is essentially looping over each rank of each DIMM. |
| */ |
| for (; Receiver < 8; Receiver++) { |
| if ((Receiver & 0x1) == 0) { |
| /* Even rank of DIMM */ |
| if(mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, Receiver+1)) |
| dual_rank = 1; |
| else |
| dual_rank = 0; |
| } |
| |
| if (!mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, Receiver)) { |
| continue; |
| } |
| |
| /* Select the base test address for the current rank */ |
| TestAddr = mct_GetMCTSysAddr_D(pMCTstat, pDCTstat, Channel, Receiver, &valid); |
| if (!valid) { /* Address not supported on current CS */ |
| continue; |
| } |
| |
| print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 14 TestAddr ", TestAddr, 4); |
| SetUpperFSbase(TestAddr); /* fs:eax=far ptr to target */ |
| |
| print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 12 Receiver ", Receiver, 2); |
| |
| /* 2.8.9.9.3 (DRAM Write Data Timing Loop) |
| * Iterate over all possible DQS delay values (0x0 - 0x7f) |
| */ |
| uint8_t test_write_dqs_delay = 0; |
| uint8_t test_read_dqs_delay = 0; |
| uint8_t passing_dqs_delay_found[MAX_BYTE_LANES]; |
| |
| /* Initialize variables */ |
| for (lane = 0; lane < MAX_BYTE_LANES; lane++) { |
| current_write_dqs_delay[lane] = 0; |
| passing_dqs_delay_found[lane] = 0; |
| write_dqs_delay_stepping_done[lane] = 0; |
| } |
| |
| for (test_write_dqs_delay = 0; test_write_dqs_delay < 128; test_write_dqs_delay++) { |
| print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 16 test_write_dqs_delay ", test_write_dqs_delay, 6); |
| |
| /* Break out of loop if passing window already found, */ |
| if (write_dqs_delay_stepping_done[0] && write_dqs_delay_stepping_done[1] |
| && write_dqs_delay_stepping_done[2] && write_dqs_delay_stepping_done[3] |
| && write_dqs_delay_stepping_done[4] && write_dqs_delay_stepping_done[5] |
| && write_dqs_delay_stepping_done[6] && write_dqs_delay_stepping_done[7]) |
| break; |
| |
| /* Commit the current Write Data Timing settings to the hardware registers */ |
| write_dqs_write_data_timing_registers(current_write_dqs_delay, dev, (Receiver >> 1), index_reg); |
| |
| /* Write the DRAM training pattern to the base test address */ |
| WriteDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr << 8); |
| |
| /* 2.8.9.9.3 (DRAM Read DQS Timing Control Loop) |
| * Iterate over all possible DQS delay values (0x0 - 0x3f) |
| */ |
| for (test_read_dqs_delay = 0; test_read_dqs_delay < 64; test_read_dqs_delay++) { |
| print_debug_dqs("\t\t\t\t\tTrainDQSRdWrPos: 161 test_read_dqs_delay ", test_read_dqs_delay, 6); |
| |
| /* Initialize Read DQS Timing Control settings for this iteration */ |
| for (lane = 0; lane < MAX_BYTE_LANES; lane++) |
| if (!write_dqs_delay_stepping_done[lane]) |
| current_read_dqs_delay[lane] = test_read_dqs_delay; |
| |
| /* Commit the current Read DQS Timing Control settings to the hardware registers */ |
| write_dqs_read_data_timing_registers(current_read_dqs_delay, dev, (Receiver >> 1), index_reg); |
| |
| /* Initialize test result variable */ |
| bytelane_test_results = 0xff; |
| |
| /* Read the DRAM training pattern from the base test address three times |
| * NOTE |
| * While the BKDG states to read three times this is probably excessive! |
| * Decrease training time by only reading the test pattern once per iteration |
| */ |
| for (iter = 0; iter < 1; iter++) { |
| /* Flush caches */ |
| SetTargetWTIO_D(TestAddr); |
| FlushDQSTestPattern_D(pDCTstat, TestAddr << 8); |
| ResetTargetWTIO_D(); |
| |
| /* Read and compare pattern */ |
| bytelane_test_results &= (CompareDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr << 8) & 0xff); /* [Lane 7 :: Lane 0] 0=fail, 1=pass */ |
| |
| /* If all lanes have already failed testing bypass remaining re-read attempt(s) */ |
| if (bytelane_test_results == 0x0) |
| break; |
| } |
| |
| /* Store any lanes that passed testing for later use */ |
| for (lane = 0; lane < 8; lane++) |
| if (!write_dqs_delay_stepping_done[lane]) |
| dqs_read_results_array[Receiver & 0x1][lane][test_read_dqs_delay] = (!!(bytelane_test_results & (1 << lane))); |
| |
| print_debug_dqs("\t\t\t\t\tTrainDQSRdWrPos: 162 bytelane_test_results ", bytelane_test_results, 6); |
| } |
| |
| for (lane = 0; lane < MAX_BYTE_LANES; lane++) { |
| if (write_dqs_delay_stepping_done[lane]) |
| continue; |
| |
| /* Determine location and length of longest consecutive string of passing values |
| * Output is stored in best_pos and best_count |
| */ |
| last_pos = 0; |
| cur_count = 0; |
| best_pos = 0; |
| best_count = 0; |
| for (iter = 0; iter < 64; iter++) { |
| if ((dqs_read_results_array[Receiver & 0x1][lane][iter]) && (iter < 63)) { |
| /* Pass */ |
| cur_count++; |
| } else { |
| /* Failure or end of loop */ |
| if (cur_count > best_count) { |
| best_count = cur_count; |
| best_pos = last_pos; |
| } |
| cur_count = 0; |
| last_pos = iter; |
| } |
| } |
| |
| if (best_count > 2) { |
| /* Exit the DRAM Write Data Timing Loop after programming the Read DQS Timing Control |
| * register with the center of the passing window |
| */ |
| current_read_dqs_delay[lane] = (best_pos + (best_count / 2)); |
| passing_dqs_delay_found[lane] = 1; |
| |
| /* Commit the current Read DQS Timing Control settings to the hardware registers */ |
| write_dqs_read_data_timing_registers(current_read_dqs_delay, dev, (Receiver >> 1), index_reg); |
| |
| /* Exit the DRAM Write Data Timing Loop */ |
| write_dqs_delay_stepping_done[lane] = 1; |
| |
| print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 142 largest passing region ", best_count, 4); |
| print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 143 largest passing region start ", best_pos, 4); |
| } |
| |
| /* Increment the DQS Write Delay value if needed for the next DRAM Write Data Timing Loop iteration */ |
| if (!write_dqs_delay_stepping_done[lane]) |
| current_write_dqs_delay[lane]++; |
| } |
| } |
| |
| /* Flag failure(s) if present */ |
| for (lane = 0; lane < 8; lane++) { |
| if (!passing_dqs_delay_found[lane]) { |
| print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 121 Unable to find passing region for lane ", lane, 2); |
| |
| /* Flag absence of passing window */ |
| Errors |= 1 << SB_NODQSPOS; |
| } |
| } |
| |
| /* Iterate over all possible Write Data Timing values (0x0 - 0x7f) |
| * Note that the Read DQS Timing Control was calibrated / centered in the prior nested loop |
| */ |
| for (test_write_dqs_delay = 0; test_write_dqs_delay < 128; test_write_dqs_delay++) { |
| /* Initialize Write Data Timing settings for this iteration */ |
| for (lane = 0; lane < MAX_BYTE_LANES; lane++) |
| current_write_dqs_delay[lane] = test_write_dqs_delay; |
| |
| /* Commit the current Write Data Timing settings to the hardware registers */ |
| write_dqs_write_data_timing_registers(current_write_dqs_delay, dev, (Receiver >> 1), index_reg); |
| |
| /* Write the DRAM training pattern to the base test address */ |
| WriteDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr << 8); |
| |
| /* Flush caches */ |
| SetTargetWTIO_D(TestAddr); |
| FlushDQSTestPattern_D(pDCTstat, TestAddr << 8); |
| ResetTargetWTIO_D(); |
| |
| /* Read and compare pattern from the base test address */ |
| bytelane_test_results = (CompareDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr << 8) & 0xff); /* [Lane 7 :: Lane 0] 0=fail, 1=pass */ |
| |
| /* Store any lanes that passed testing for later use */ |
| for (lane = 0; lane < 8; lane++) |
| dqs_write_results_array[Receiver & 0x1][lane][test_write_dqs_delay] = (!!(bytelane_test_results & (1 << lane))); |
| } |
| |
| for (lane = 0; lane < 8; lane++) { |
| if ((!dual_rank) || (dual_rank && (Receiver & 0x1))) { |
| |
| #ifdef PRINT_PASS_FAIL_BITMAPS |
| for (iter = 0; iter < 64; iter++) { |
| if (dqs_read_results_array[0][lane][iter]) |
| printk(BIOS_DEBUG, "+"); |
| else |
| printk(BIOS_DEBUG, "."); |
| } |
| printk(BIOS_DEBUG, "\n"); |
| for (iter = 0; iter < 64; iter++) { |
| if (dqs_read_results_array[1][lane][iter]) |
| printk(BIOS_DEBUG, "+"); |
| else |
| printk(BIOS_DEBUG, "."); |
| } |
| printk(BIOS_DEBUG, "\n\n"); |
| for (iter = 0; iter < 128; iter++) { |
| if (dqs_write_results_array[0][lane][iter]) |
| printk(BIOS_DEBUG, "+"); |
| else |
| printk(BIOS_DEBUG, "."); |
| } |
| printk(BIOS_DEBUG, "\n"); |
| for (iter = 0; iter < 128; iter++) { |
| if (dqs_write_results_array[1][lane][iter]) |
| printk(BIOS_DEBUG, "+"); |
| else |
| printk(BIOS_DEBUG, "."); |
| } |
| printk(BIOS_DEBUG, "\n\n"); |
| #endif |
| |
| /* Base rank of single-rank DIMM, or odd rank of dual-rank DIMM */ |
| if (dual_rank) { |
| /* Intersect the passing windows of both ranks */ |
| for (iter = 0; iter < 64; iter++) |
| if (!dqs_read_results_array[1][lane][iter]) |
| dqs_read_results_array[0][lane][iter] = 0; |
| for (iter = 0; iter < 128; iter++) |
| if (!dqs_write_results_array[1][lane][iter]) |
| dqs_write_results_array[0][lane][iter] = 0; |
| } |
| |
| /* Determine location and length of longest consecutive string of passing values for read DQS timing |
| * Output is stored in best_pos and best_count |
| */ |
| last_pos = 0; |
| cur_count = 0; |
| best_pos = 0; |
| best_count = 0; |
| for (iter = 0; iter < 64; iter++) { |
| if ((dqs_read_results_array[0][lane][iter]) && (iter < 63)) { |
| /* Pass */ |
| cur_count++; |
| } else { |
| /* Failure or end of loop */ |
| if (cur_count > best_count) { |
| best_count = cur_count; |
| best_pos = last_pos; |
| } |
| cur_count = 0; |
| last_pos = iter; |
| } |
| } |
| print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 144 largest read passing region ", best_count, 4); |
| if (best_count > 0) { |
| if (best_count < MIN_DQS_WNDW) { |
| /* Flag excessively small passing window */ |
| Errors |= 1 << SB_SMALLDQS; |
| } |
| |
| /* Find the center of the passing window */ |
| current_read_dqs_delay[lane] = (best_pos + (best_count / 2)); |
| |
| /* Commit the current Read DQS Timing Control settings to the hardware registers */ |
| write_dqs_read_data_timing_registers(current_read_dqs_delay, dev, (Receiver >> 1), index_reg); |
| |
| /* Save the final Read DQS Timing Control settings for later use */ |
| pDCTstat->CH_D_DIR_B_DQS[Channel][Receiver >> 1][DQS_READDIR][lane] = current_read_dqs_delay[lane]; |
| } else { |
| print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 122 Unable to find read passing region for lane ", lane, 2); |
| |
| /* Flag absence of passing window */ |
| Errors |= 1 << SB_NODQSPOS; |
| } |
| |
| /* Determine location and length of longest consecutive string of passing values for write DQS timing |
| * Output is stored in best_pos and best_count |
| */ |
| last_pos = 0; |
| cur_count = 0; |
| best_pos = 0; |
| best_count = 0; |
| for (iter = 0; iter < 128; iter++) { |
| if ((dqs_write_results_array[0][lane][iter]) && (iter < 127)) { |
| /* Pass */ |
| cur_count++; |
| } else { |
| /* Failure or end of loop */ |
| if (cur_count > best_count) { |
| best_count = cur_count; |
| best_pos = last_pos; |
| } |
| cur_count = 0; |
| last_pos = iter; |
| } |
| } |
| print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 145 largest write passing region ", best_count, 4); |
| if (best_count > 0) { |
| if (best_count < MIN_DQS_WNDW) { |
| /* Flag excessively small passing window */ |
| Errors |= 1 << SB_SMALLDQS; |
| } |
| |
| /* Find the center of the passing window */ |
| current_write_dqs_delay[lane] = (best_pos + (best_count / 2)); |
| |
| /* Commit the current Write Data Timing settings to the hardware registers */ |
| write_dqs_write_data_timing_registers(current_write_dqs_delay, dev, (Receiver >> 1), index_reg); |
| |
| /* Save the final Write Data Timing settings for later use */ |
| pDCTstat->CH_D_DIR_B_DQS[Channel][Receiver >> 1][DQS_WRITEDIR][lane] = current_write_dqs_delay[lane]; |
| } else { |
| print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 123 Unable to find write passing region for lane ", lane, 2); |
| |
| /* Flag absence of passing window */ |
| Errors |= 1 << SB_NODQSPOS; |
| } |
| } |
| } |
| |
| } |
| } |
| |
| pDCTstat->TrainErrors |= Errors; |
| pDCTstat->ErrStatus |= Errors; |
| |
| #if DQS_TRAIN_DEBUG > 0 |
| { |
| u8 val; |
| u8 i; |
| u8 ChannelDTD, ReceiverDTD, Dir; |
| u8 *p; |
| |
| for (Dir = 0; Dir < 2; Dir++) { |
| if (Dir == 1) { |
| printk(BIOS_DEBUG, "TrainDQSRdWrPos: CH_D_DIR_B_DQS WR:\n"); |
| } else { |
| printk(BIOS_DEBUG, "TrainDQSRdWrPos: CH_D_DIR_B_DQS RD:\n"); |
| } |
| for (ChannelDTD = 0; ChannelDTD < 2; ChannelDTD++) { |
| printk(BIOS_DEBUG, "Channel: %02x\n", ChannelDTD); |
| for (ReceiverDTD = 0; ReceiverDTD < MAX_CS_SUPPORTED; ReceiverDTD += 2) { |
| printk(BIOS_DEBUG, "\t\tReceiver: %02x:", ReceiverDTD); |
| p = pDCTstat->CH_D_DIR_B_DQS[ChannelDTD][ReceiverDTD >> 1][Dir]; |
| for (i=0;i<8; i++) { |
| val = p[i]; |
| printk(BIOS_DEBUG, " %02x", val); |
| } |
| printk(BIOS_DEBUG, "\n"); |
| } |
| } |
| } |
| |
| } |
| #endif |
| if (_DisableDramECC) { |
| mct_EnableDimmEccEn_D(pMCTstat, pDCTstat, _DisableDramECC); |
| } |
| if (!_Wrap32Dis) { |
| addr = HWCR; |
| _RDMSR(addr, &lo, &hi); |
| lo &= ~(1<<17); /* restore HWCR.wrap32dis */ |
| _WRMSR(addr, lo, hi); |
| } |
| if (!_SSE2){ |
| cr4 = read_cr4(); |
| cr4 &= ~(1<<9); /* restore cr4.OSFXSR */ |
| write_cr4(cr4); |
| } |
| |
| printk(BIOS_DEBUG, "TrainDQSRdWrPos: Status %x\n", pDCTstat->Status); |
| printk(BIOS_DEBUG, "TrainDQSRdWrPos: TrainErrors %x\n", pDCTstat->TrainErrors); |
| printk(BIOS_DEBUG, "TrainDQSRdWrPos: ErrStatus %x\n", pDCTstat->ErrStatus); |
| printk(BIOS_DEBUG, "TrainDQSRdWrPos: ErrCode %x\n", pDCTstat->ErrCode); |
| printk(BIOS_DEBUG, "TrainDQSRdWrPos: Done\n\n"); |
| } |
| |
| static void SetupDqsPattern_D(struct MCTStatStruc *pMCTstat, |
| struct DCTStatStruc *pDCTstat, u32 *buffer) |
| { |
| /* 1. Set the Pattern type (0 or 1) in DCTStatstruc.Pattern |
| * 2. Copy the pattern from ROM to Cache, aligning on 16 byte boundary |
| * 3. Set the ptr to Cacheable copy in DCTStatstruc.PtrPatternBufA |
| */ |
| |
| u32 *buf; |
| u16 i; |
| |
| buf = (u32 *)(((u32)buffer + 0x10) & (0xfffffff0)); |
| if (pDCTstat->Status & (1<<SB_128bitmode)) { |
| pDCTstat->Pattern = 1; /* 18 cache lines, alternating qwords */ |
| for (i=0; i<16*18; i++) |
| buf[i] = TestPatternJD1b_D[i]; |
| } else { |
| pDCTstat->Pattern = 0; /* 9 cache lines, sequential qwords */ |
| for (i=0; i<16*9; i++) |
| buf[i] = TestPatternJD1a_D[i]; |
| } |
| pDCTstat->PtrPatternBufA = (u32)buf; |
| } |
| |
| static void StoreDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat, |
| struct DCTStatStruc *pDCTstat, u8 ChipSel) |
| { |
| /* Store the DQSDelay value, found during a training sweep, into the DCT |
| * status structure for this node |
| */ |
| |
| /* When 400, 533, 667, it will support dimm0/1/2/3, |
| * and set conf for dimm0, hw will copy to dimm1/2/3 |
| * set for dimm1, hw will copy to dimm3 |
| * Rev A/B only support DIMM0/1 when 800MHz and above + 0x100 to next dimm |
| * Rev C support DIMM0/1/2/3 when 800MHz and above + 0x100 to next dimm |
| */ |
| |
| /* FindDQSDatDimmVal_D is not required since we use an array */ |
| u8 dn = 0; |
| |
| dn = ChipSel>>1; /* if odd or even logical DIMM */ |
| |
| pDCTstat->CH_D_DIR_B_DQS[pDCTstat->Channel][dn][pDCTstat->Direction][pDCTstat->ByteLane] = |
| pDCTstat->DQSDelay; |
| } |
| |
| static void GetDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat, |
| struct DCTStatStruc *pDCTstat, u8 ChipSel) |
| { |
| u8 dn = 0; |
| |
| /* When 400, 533, 667, it will support dimm0/1/2/3, |
| * and set conf for dimm0, hw will copy to dimm1/2/3 |
| * set for dimm1, hw will copy to dimm3 |
| * Rev A/B only support DIMM0/1 when 800MHz and above + 0x100 to next dimm |
| * Rev C support DIMM0/1/2/3 when 800MHz and above + 0x100 to next dimm |
| */ |
| |
| /* FindDQSDatDimmVal_D is not required since we use an array */ |
| dn = ChipSel >> 1; /*if odd or even logical DIMM */ |
| |
| pDCTstat->DQSDelay = |
| pDCTstat->CH_D_DIR_B_DQS[pDCTstat->Channel][dn][pDCTstat->Direction][pDCTstat->ByteLane]; |
| } |
| |
| /* FindDQSDatDimmVal_D is not required since we use an array */ |
| |
| static void proc_IOCLFLUSH_D(u32 addr_hi) |
| { |
| SetTargetWTIO_D(addr_hi); |
| proc_CLFLUSH(addr_hi); |
| ResetTargetWTIO_D(); |
| } |
| |
| static u8 ChipSelPresent_D(struct MCTStatStruc *pMCTstat, |
| struct DCTStatStruc *pDCTstat, |
| u8 Channel, u8 ChipSel) |
| { |
| u32 val; |
| u32 reg; |
| u32 dev = pDCTstat->dev_dct; |
| u32 reg_off; |
| u8 ret = 0; |
| |
| if (!pDCTstat->GangedMode) { |
| reg_off = 0x100 * Channel; |
| } else { |
| reg_off = 0; |
| } |
| |
| if (ChipSel < MAX_CS_SUPPORTED){ |
| reg = 0x40 + (ChipSel << 2) + reg_off; |
| val = Get_NB32(dev, reg); |
| if (val & ( 1 << 0)) |
| ret = 1; |
| } |
| |
| return ret; |
| } |
| |
| /* proc_CLFLUSH_D located in mct_gcc.h */ |
| |
| static void WriteDQSTestPattern_D(struct MCTStatStruc *pMCTstat, |
| struct DCTStatStruc *pDCTstat, |
| u32 TestAddr_lo) |
| { |
| /* Write a pattern of 72 bit times (per DQ), to test dram functionality. |
| * The pattern is a stress pattern which exercises both ISI and |
| * crosstalk. The number of cache lines to fill is dependent on DCT |
| * width mode and burstlength. |
| * Mode BL Lines Pattern no. |
| * ----+---+------------------- |
| * 64 4 9 0 |
| * 64 8 9 0 |
| * 64M 4 9 0 |
| * 64M 8 9 0 |
| * 128 4 18 1 |
| * 128 8 N/A - |
| */ |
| if (pDCTstat->Pattern == 0) |
| WriteL9TestPattern_D(pDCTstat, TestAddr_lo); |
| else |
| WriteL18TestPattern_D(pDCTstat, TestAddr_lo); |
| } |
| |
| static void WriteL18TestPattern_D(struct DCTStatStruc *pDCTstat, |
| u32 TestAddr_lo) |
| { |
| u8 *buf; |
| |
| buf = (u8 *)pDCTstat->PtrPatternBufA; |
| WriteLNTestPattern(TestAddr_lo, buf, 18); |
| |
| } |
| |
| static void WriteL9TestPattern_D(struct DCTStatStruc *pDCTstat, |
| u32 TestAddr_lo) |
| { |
| u8 *buf; |
| |
| buf = (u8 *)pDCTstat->PtrPatternBufA; |
| WriteLNTestPattern(TestAddr_lo, buf, 9); |
| } |
| |
| static u16 CompareDQSTestPattern_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u32 addr_lo) |
| { |
| /* Compare a pattern of 72 bit times (per DQ), to test dram functionality. |
| * The pattern is a stress pattern which exercises both ISI and |
| * crosstalk. The number of cache lines to fill is dependent on DCT |
| * width mode and burstlength. |
| * Mode BL Lines Pattern no. |
| * ----+---+------------------- |
| * 64 4 9 0 |
| * 64 8 9 0 |
| * 64M 4 9 0 |
| * 64M 8 9 0 |
| * 128 4 18 1 |
| * 128 8 N/A - |
| */ |
| |
| u32 *test_buf; |
| u16 MEn1Results, bitmap; |
| u8 bytelane; |
| u8 i; |
| u32 value; |
| u8 j; |
| u32 value_test; |
| u32 value_r = 0, value_r_test = 0; |
| u8 pattern, channel, BeatCnt; |
| struct DCTStatStruc *ptrAddr; |
| |
| ptrAddr = pDCTstat; |
| pattern = pDCTstat->Pattern; |
| channel = pDCTstat->Channel; |
| test_buf = (u32 *)pDCTstat->PtrPatternBufA; |
| |
| if (pattern && channel) { |
| addr_lo += 8; /* second channel */ |
| test_buf += 2; |
| } |
| |
| bytelane = 0; /* bytelane counter */ |
| bitmap = 0xFFFF; /* bytelane test bitmap, 1=pass */ |
| MEn1Results = 0xFFFF; |
| BeatCnt = 0; |
| for (i = 0; i < (9 * 64 / 4); i++) { /* sizeof testpattern. /4 due to next loop */ |
| value = read32_fs(addr_lo); |
| value_test = *test_buf; |
| |
| print_debug_dqs_pair("\t\t\t\t\t\ttest_buf = ", (u32)test_buf, " value = ", value_test, 7); |
| print_debug_dqs_pair("\t\t\t\t\t\ttaddr_lo = ", addr_lo, " value = ", value, 7); |
| |
| if (pDCTstat->Direction == DQS_READDIR) { |
| if (BeatCnt != 0) { |
| value_r = *test_buf; |
| if (pattern) /* if multi-channel */ |
| value_r_test = read32_fs(addr_lo - 16); |
| else |
| value_r_test = read32_fs(addr_lo - 8); |
| } |
| print_debug_dqs_pair("\t\t\t\t\t\t\ttest_buf = ", (u32)test_buf, " value_r_test = ", value_r, 7); |
| print_debug_dqs_pair("\t\t\t\t\t\t\ttaddr_lo = ", addr_lo, " value_r = ", value_r_test, 7); |
| } |
| |
| for (j = 0; j < (4 * 8); j += 8) { /* go through a 32bit data, on 1 byte step. */ |
| if (((value >> j) & 0xff) != ((value_test >> j) & 0xff)) { |
| bitmap &= ~(1 << bytelane); |
| } |
| |
| if (pDCTstat->Direction == DQS_READDIR) { |
| if (BeatCnt != 0) { |
| if (((value_r >> j) & 0xff) != ((value_r_test >> j) & 0xff)) { |
| MEn1Results &= ~(1 << bytelane); |
| } |
| } |
| } |
| bytelane++; |
| bytelane &= 0x7; |
| } |
| |
| print_debug_dqs("\t\t\t\t\t\tbitmap = ", bitmap, 7); |
| print_debug_dqs("\t\t\t\t\t\tMEn1Results = ", MEn1Results, 7); |
| |
| if (!bitmap) |
| break; |
| |
| if (bytelane == 0){ |
| BeatCnt += 4; |
| if (!(pDCTstat->Status & (1 << SB_128bitmode))) { |
| if (BeatCnt == 8) BeatCnt = 0; /* 8 beat burst */ |
| } else { |
| if (BeatCnt == 4) BeatCnt = 0; /* 4 beat burst */ |
| } |
| if (pattern == 1) { /* dual channel */ |
| addr_lo += 8; /* skip over other channel's data */ |
| test_buf += 2; |
| } |
| } |
| addr_lo += 4; |
| test_buf += 1; |
| } |
| |
| if (pDCTstat->Direction == DQS_READDIR) { |
| bitmap &= 0xFF; |
| bitmap |= MEn1Results << 8; |
| } |
| |
| print_debug_dqs("\t\t\t\t\t\tbitmap = ", bitmap, 6); |
| |
| return bitmap; |
| } |
| |
| static void FlushDQSTestPattern_D(struct DCTStatStruc *pDCTstat, |
| u32 addr_lo) |
| { |
| /* Flush functions in mct_gcc.h */ |
| if (pDCTstat->Pattern == 0){ |
| FlushDQSTestPattern_L9(addr_lo); |
| } else { |
| FlushDQSTestPattern_L18(addr_lo); |
| } |
| } |
| |
| static void SetTargetWTIO_D(u32 TestAddr) |
| { |
| u32 lo, hi; |
| hi = TestAddr >> 24; |
| lo = TestAddr << 8; |
| _WRMSR(0xC0010016, lo, hi); /* IORR0 Base */ |
| hi = 0xFF; |
| lo = 0xFC000800; /* 64MB Mask */ |
| _WRMSR(0xC0010017, lo, hi); /* IORR0 Mask */ |
| } |
| |
| static void ResetTargetWTIO_D(void) |
| { |
| u32 lo, hi; |
| |
| hi = 0; |
| lo = 0; |
| _WRMSR(0xc0010017, lo, hi); /* IORR0 Mask */ |
| } |
| |
| u32 SetUpperFSbase(u32 addr_hi) |
| { |
| /* Set the upper 32-bits of the Base address, 4GB aligned) for the |
| * FS selector. |
| */ |
| u32 lo, hi; |
| u32 addr; |
| lo = 0; |
| hi = addr_hi>>24; |
| addr = FS_Base; |
| _WRMSR(addr, lo, hi); |
| return addr_hi << 8; |
| } |
| |
| void ResetDCTWrPtr_D(u32 dev, u32 index_reg, u32 index) |
| { |
| u32 val; |
| |
| val = Get_NB32_index_wait(dev, index_reg, index); |
| Set_NB32_index_wait(dev, index_reg, index, val); |
| } |
| |
| void mct_TrainDQSPos_D(struct MCTStatStruc *pMCTstat, |
| struct DCTStatStruc *pDCTstatA) |
| { |
| u8 Node; |
| u8 ChipSel; |
| struct DCTStatStruc *pDCTstat; |
| |
| for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) { |
| pDCTstat = pDCTstatA + Node; |
| if (pDCTstat->DCTSysLimit) { |
| TrainDQSRdWrPos_D(pMCTstat, pDCTstat); |
| for (ChipSel = 0; ChipSel < MAX_CS_SUPPORTED; ChipSel += 2) { |
| SetEccDQSRdWrPos_D(pMCTstat, pDCTstat, ChipSel); |
| } |
| } |
| } |
| } |
| |
| /* mct_BeforeTrainDQSRdWrPos_D |
| * Function is inline. |
| */ |
| u8 mct_DisableDimmEccEn_D(struct MCTStatStruc *pMCTstat, |
| struct DCTStatStruc *pDCTstat) |
| { |
| u8 _DisableDramECC = 0; |
| u32 val; |
| u32 reg; |
| u32 dev; |
| |
| /*Disable ECC correction of reads on the dram bus. */ |
| |
| dev = pDCTstat->dev_dct; |
| reg = 0x90; |
| val = Get_NB32(dev, reg); |
| if (val & (1<<DimmEcEn)) { |
| _DisableDramECC |= 0x01; |
| val &= ~(1<<DimmEcEn); |
| Set_NB32(dev, reg, val); |
| } |
| if (!pDCTstat->GangedMode) { |
| reg = 0x190; |
| val = Get_NB32(dev, reg); |
| if (val & (1<<DimmEcEn)) { |
| _DisableDramECC |= 0x02; |
| val &= ~(1<<DimmEcEn); |
| Set_NB32(dev, reg, val); |
| } |
| } |
| return _DisableDramECC; |
| } |
| |
| void mct_EnableDimmEccEn_D(struct MCTStatStruc *pMCTstat, |
| struct DCTStatStruc *pDCTstat, u8 _DisableDramECC) |
| { |
| u32 val; |
| u32 reg; |
| u32 dev; |
| |
| /* Enable ECC correction if it was previously disabled */ |
| |
| dev = pDCTstat->dev_dct; |
| |
| if ((_DisableDramECC & 0x01) == 0x01) { |
| reg = 0x90; |
| val = Get_NB32(dev, reg); |
| val |= (1<<DimmEcEn); |
| Set_NB32(dev, reg, val); |
| } |
| if ((_DisableDramECC & 0x02) == 0x02) { |
| reg = 0x190; |
| val = Get_NB32(dev, reg); |
| val |= (1<<DimmEcEn); |
| Set_NB32(dev, reg, val); |
| } |
| } |
| |
| /* |
| * Set DQS delay value to related register |
| */ |
| static void mct_SetDQSDelayCSR_D(struct MCTStatStruc *pMCTstat, |
| struct DCTStatStruc *pDCTstat, u8 ChipSel) |
| { |
| u8 ByteLane; |
| u32 val; |
| u32 index_reg = 0x98 + 0x100 * pDCTstat->Channel; |
| u8 shift; |
| u32 dqs_delay = (u32)pDCTstat->DQSDelay; |
| u32 dev = pDCTstat->dev_dct; |
| u32 index; |
| |
| ByteLane = pDCTstat->ByteLane; |
| |
| if (!(pDCTstat->DqsRdWrPos_Saved & (1 << ByteLane))) { |
| /* Channel is offset */ |
| if (ByteLane < 4) { |
| index = 1; |
| } else if (ByteLane <8) { |
| index = 2; |
| } else { |
| index = 3; |
| } |
| |
| if (pDCTstat->Direction == DQS_READDIR) { |
| index += 4; |
| } |
| |
| /* get the proper register index */ |
| shift = ByteLane % 4; |
| shift <<= 3; /* get bit position of bytelane, 8 bit */ |
| |
| index += (ChipSel>>1) << 8; |
| |
| val = Get_NB32_index_wait(dev, index_reg, index); |
| if (ByteLane < 8) { |
| if (pDCTstat->Direction == DQS_WRITEDIR) { |
| dqs_delay += pDCTstat->CH_D_B_TxDqs[pDCTstat->Channel][ChipSel>>1][ByteLane]; |
| } else { |
| dqs_delay <<= 1; |
| } |
| } |
| val &= ~(0x7f << shift); |
| val |= (dqs_delay << shift); |
| Set_NB32_index_wait(dev, index_reg, index, val); |
| } |
| } |
| |
| u8 mct_RcvrRankEnabled_D(struct MCTStatStruc *pMCTstat, |
| struct DCTStatStruc *pDCTstat, |
| u8 Channel, u8 ChipSel) |
| { |
| u8 ret; |
| |
| ret = ChipSelPresent_D(pMCTstat, pDCTstat, Channel, ChipSel); |
| return ret; |
| } |
| |
| u32 mct_GetRcvrSysAddr_D(struct MCTStatStruc *pMCTstat, |
| struct DCTStatStruc *pDCTstat, |
| u8 channel, u8 receiver, u8 *valid) |
| { |
| return mct_GetMCTSysAddr_D(pMCTstat, pDCTstat, channel, receiver, valid); |
| } |
| |
| u32 mct_GetMCTSysAddr_D(struct MCTStatStruc *pMCTstat, |
| struct DCTStatStruc *pDCTstat, |
| u8 Channel, u8 receiver, u8 *valid) |
| { |
| u32 val; |
| u32 reg_off = 0; |
| u32 reg; |
| u32 dword; |
| u32 dev = pDCTstat->dev_dct; |
| |
| *valid = 0; |
| |
| |
| if (!pDCTstat->GangedMode) { |
| reg_off = 0x100 * Channel; |
| } |
| |
| /* get the local base addr of the chipselect */ |
| reg = 0x40 + (receiver << 2) + reg_off; |
| val = Get_NB32(dev, reg); |
| |
| val &= ~0xe007c01f; |
| |
| /* unganged mode DCT0+DCT1, sys addr of DCT1=node |
| * base+DctSelBaseAddr+local ca base*/ |
| if ((Channel) && (pDCTstat->GangedMode == 0) && ( pDCTstat->DIMMValidDCT[0] > 0)) { |
| reg = 0x110; |
| dword = Get_NB32(dev, reg); |
| dword &= 0xfffff800; |
| dword <<= 8; /* scale [47:27] of F2x110[31:11] to [39:8]*/ |
| val += dword; |
| |
| /* if DCTSelBaseAddr < Hole, and eax > HoleBase, then add Hole size to test address */ |
| if ((val >= pDCTstat->DCTHoleBase) && (pDCTstat->DCTHoleBase > dword)) { |
| dword = (~(pDCTstat->DCTHoleBase >> (24 - 8)) + 1) & 0xFF; |
| dword <<= (24 - 8); |
| val += dword; |
| } |
| } else { |
| /* sys addr=node base+local cs base */ |
| val += pDCTstat->DCTSysBase; |
| |
| /* New stuff */ |
| if (pDCTstat->DCTHoleBase && (val >= pDCTstat->DCTHoleBase)) { |
| val -= pDCTstat->DCTSysBase; |
| dword = Get_NB32(pDCTstat->dev_map, 0xF0); /* get Hole Offset */ |
| val += (dword & 0x0000ff00) << (24-8-8); |
| } |
| } |
| |
| /* New stuff */ |
| val += ((1 << 21) >> 8); /* Add 2MB offset to avoid compat area */ |
| if (val >= MCT_TRNG_KEEPOUT_START) { |
| while(val < MCT_TRNG_KEEPOUT_END) |
| val += (1 << (15-8)); /* add 32K */ |
| } |
| |
| /* Add a node seed */ |
| val += (((1 * pDCTstat->Node_ID) << 20) >> 8); /* Add 1MB per node to avoid aliases */ |
| |
| /* HW remap disabled? */ |
| if (!(pDCTstat->Status & (1 << SB_HWHole))) { |
| if (!(pDCTstat->Status & (1 << SB_SWNodeHole))) { |
| /* SW memhole disabled */ |
| u32 lo, hi; |
| _RDMSR(TOP_MEM, &lo, &hi); |
| lo >>= 8; |
| if ((val >= lo) && (val < _4GB_RJ8)) { |
| val = 0; |
| *valid = 0; |
| goto exitGetAddr; |
| } else { |
| *valid = 1; |
| goto exitGetAddrWNoError; |
| } |
| } else { |
| *valid = 1; |
| goto exitGetAddrWNoError; |
| } |
| } else { |
| *valid = 1; |
| goto exitGetAddrWNoError; |
| } |
| |
| exitGetAddrWNoError: |
| |
| /* Skip if Address is in UMA region */ |
| dword = pMCTstat->Sub4GCacheTop; |
| dword >>= 8; |
| if (dword != 0) { |
| if ((val >= dword) && (val < _4GB_RJ8)) { |
| val = 0; |
| *valid = 0; |
| } else { |
| *valid = 1; |
| } |
| } |
| print_debug_dqs("mct_GetMCTSysAddr_D: receiver ", receiver, 2); |
| print_debug_dqs("mct_GetMCTSysAddr_D: Channel ", Channel, 2); |
| print_debug_dqs("mct_GetMCTSysAddr_D: base_addr ", val, 2); |
| print_debug_dqs("mct_GetMCTSysAddr_D: valid ", *valid, 2); |
| print_debug_dqs("mct_GetMCTSysAddr_D: status ", pDCTstat->Status, 2); |
| print_debug_dqs("mct_GetMCTSysAddr_D: SysBase ", pDCTstat->DCTSysBase, 2); |
| print_debug_dqs("mct_GetMCTSysAddr_D: HoleBase ", pDCTstat->DCTHoleBase, 2); |
| print_debug_dqs("mct_GetMCTSysAddr_D: Cachetop ", pMCTstat->Sub4GCacheTop, 2); |
| |
| exitGetAddr: |
| return val; |
| } |
| |
| static void mct_Write1LTestPattern_D(struct MCTStatStruc *pMCTstat, |
| struct DCTStatStruc *pDCTstat, |
| u32 TestAddr, u8 pattern) |
| { |
| |
| u8 *buf; |
| |
| /* Issue the stream of writes. When F2x11C[MctWrLimit] is reached |
| * (or when F2x11C[FlushWr] is set again), all the writes are written |
| * to DRAM. |
| */ |
| |
| SetUpperFSbase(TestAddr); |
| |
| if (pattern) |
| buf = (u8 *)pDCTstat->PtrPatternBufB; |
| else |
| buf = (u8 *)pDCTstat->PtrPatternBufA; |
| |
| WriteLNTestPattern(TestAddr << 8, buf, 1); |
| } |
| |
| void mct_Read1LTestPattern_D(struct MCTStatStruc *pMCTstat, |
| struct DCTStatStruc *pDCTstat, u32 addr) |
| { |
| u32 value; |
| |
| /* BIOS issues the remaining (Ntrain - 2) reads after checking that |
| * F2x11C[PrefDramTrainMode] is cleared. These reads must be to |
| * consecutive cache lines (i.e., 64 bytes apart) and must not cross |
| * a naturally aligned 4KB boundary. These reads hit the prefetches and |
| * read the data from the prefetch buffer. |
| */ |
| |
| /* get data from DIMM */ |
| SetUpperFSbase(addr); |
| |
| /* 1st move causes read fill (to exclusive or shared)*/ |
| value = read32_fs(addr << 8); |
| } |