blob: 7378391507dbd91689e5ba8bebb32d638da40e99 [file] [log] [blame]
/* SPDX-License-Identifier: GPL-2.0-or-later */
#include <device/mmio.h>
#include <console/console.h>
#include <delay.h>
#include <string.h>
#include <types.h>
#include "x4x.h"
#include "iomap.h"
static void print_dll_setting(const struct dll_setting *dll_setting,
u8 default_verbose)
{
u8 debug_level = default_verbose ? BIOS_DEBUG : RAM_DEBUG;
printk(debug_level, "%d.%d.%d.%d:%d.%d\n", dll_setting->coarse,
dll_setting->clk_delay, dll_setting->tap,
dll_setting->pi, dll_setting->db_en,
dll_setting->db_sel);
}
struct db_limit {
u8 tap0;
u8 tap1;
u8 pi0;
u8 pi1;
};
static void set_db(const struct sysinfo *s, struct dll_setting *dq_dqs_setting)
{
struct db_limit limit;
switch (s->selected_timings.mem_clk) {
default:
case MEM_CLOCK_800MHz:
limit.tap0 = 3;
limit.tap1 = 10;
limit.pi0 = 2;
limit.pi1 = 3;
break;
case MEM_CLOCK_1066MHz:
limit.tap0 = 2;
limit.tap1 = 8;
limit.pi0 = 6;
limit.pi1 = 7;
break;
case MEM_CLOCK_1333MHz:
limit.tap0 = 3;
limit.tap1 = 11;
/* TO CHECK: Might be reverse since this makes little sense */
limit.pi0 = 6;
limit.pi1 = 4;
break;
}
if (dq_dqs_setting->tap < limit.tap0) {
dq_dqs_setting->db_en = 1;
dq_dqs_setting->db_sel = 1;
} else if ((dq_dqs_setting->tap == limit.tap0)
&& (dq_dqs_setting->pi < limit.pi0)) {
dq_dqs_setting->db_en = 1;
dq_dqs_setting->db_sel = 1;
} else if (dq_dqs_setting->tap < limit.tap1) {
dq_dqs_setting->db_en = 0;
dq_dqs_setting->db_sel = 0;
} else if ((dq_dqs_setting->tap == limit.tap1)
&& (dq_dqs_setting->pi < limit.pi1)) {
dq_dqs_setting->db_en = 0;
dq_dqs_setting->db_sel = 0;
} else {
dq_dqs_setting->db_en = 1;
dq_dqs_setting->db_sel = 0;
}
}
static const u8 max_tap[3] = {12, 10, 13};
static int increment_dq_dqs(const struct sysinfo *s,
struct dll_setting *dq_dqs_setting)
{
u8 max_tap_val = max_tap[s->selected_timings.mem_clk
- MEM_CLOCK_800MHz];
if (dq_dqs_setting->pi < 6) {
dq_dqs_setting->pi += 1;
} else if (dq_dqs_setting->tap < max_tap_val) {
dq_dqs_setting->pi = 0;
dq_dqs_setting->tap += 1;
} else if (dq_dqs_setting->clk_delay < 2) {
dq_dqs_setting->pi = 0;
dq_dqs_setting->tap = 0;
dq_dqs_setting->clk_delay += 1;
} else if (dq_dqs_setting->coarse < 1) {
dq_dqs_setting->pi = 0;
dq_dqs_setting->tap = 0;
dq_dqs_setting->clk_delay -= 1;
dq_dqs_setting->coarse += 1;
} else {
return CB_ERR;
}
set_db(s, dq_dqs_setting);
return CB_SUCCESS;
}
static int decrement_dq_dqs(const struct sysinfo *s,
struct dll_setting *dq_dqs_setting)
{
u8 max_tap_val = max_tap[s->selected_timings.mem_clk
- MEM_CLOCK_800MHz];
if (dq_dqs_setting->pi > 0) {
dq_dqs_setting->pi -= 1;
} else if (dq_dqs_setting->tap > 0) {
dq_dqs_setting->pi = 6;
dq_dqs_setting->tap -= 1;
} else if (dq_dqs_setting->clk_delay > 0) {
dq_dqs_setting->pi = 6;
dq_dqs_setting->tap = max_tap_val;
dq_dqs_setting->clk_delay -= 1;
} else if (dq_dqs_setting->coarse > 0) {
dq_dqs_setting->pi = 6;
dq_dqs_setting->tap = max_tap_val;
dq_dqs_setting->clk_delay += 1;
dq_dqs_setting->coarse -= 1;
} else {
return CB_ERR;
}
set_db(s, dq_dqs_setting);
return CB_SUCCESS;
}
#define WT_PATTERN_SIZE 80
static const u32 write_training_schedule[WT_PATTERN_SIZE] = {
0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
0x03030303, 0x04040404, 0x09090909, 0x10101010,
0x21212121, 0x40404040, 0x81818181, 0x00000000,
0x03030303, 0x04040404, 0x09090909, 0x10101010,
0x21212121, 0x40404040, 0x81818181, 0x00000000,
0xfdfdfdfd, 0xfafafafa, 0xf7f7f7f7, 0xeeeeeeee,
0xdfdfdfdf, 0xbebebebe, 0x7f7f7f7f, 0xfefefefe,
0xfdfdfdfd, 0xfafafafa, 0xf7f7f7f7, 0xeeeeeeee,
0xdfdfdfdf, 0xbebebebe, 0x7f7f7f7f, 0xfefefefe,
};
enum training_modes {
SUCCEEDING = 0,
FAILING = 1
};
static u8 test_dq_aligned(const struct sysinfo *s,
const u8 channel)
{
u32 address;
int rank, lane;
u8 count, count1;
u8 data[8];
u8 lane_error = 0;
FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, channel, rank) {
address = test_address(channel, rank);
for (count = 0; count < WT_PATTERN_SIZE; count++) {
for (count1 = 0; count1 < WT_PATTERN_SIZE; count1++) {
if ((count1 % 16) == 0)
MCHBAR32(0xf90) = 1;
const u32 pattern =
write_training_schedule[count1];
write32((u32 *)address + 8 * count1, pattern);
write32((u32 *)address + 8 * count1 + 4,
pattern);
}
const u32 good = write_training_schedule[count];
write32(&data[0], read32((u32 *)address + 8 * count));
write32(&data[4],
read32((u32 *)address + 8 * count + 4));
FOR_EACH_BYTELANE(lane) {
u8 expected = (good >> ((lane % 4) * 8)) & 0xff;
if (data[lane] != expected)
lane_error |= 1 << lane;
}
}
}
return lane_error;
}
#define CONSISTENCY 10
/*
* This function finds either failing or succeeding writes by increasing DQ.
* When it has found a failing or succeeding setting it will increase DQ
* another 10 times to make sure the result is consistent.
* This is probably done because lanes cannot be trained independent from
* each other.
*/
static int find_dq_limit(const struct sysinfo *s, const u8 channel,
struct dll_setting dq_setting[TOTAL_BYTELANES],
u8 dq_lim[TOTAL_BYTELANES],
const enum training_modes expected_result)
{
int status = CB_SUCCESS;
int lane;
u8 test_result;
u8 pass_count[TOTAL_BYTELANES];
u8 succes_mask = 0xff;
printk(RAM_DEBUG, "Looking for %s writes on channel %d\n",
expected_result == FAILING ? "failing" : "succeeding", channel);
memset(pass_count, 0, sizeof(pass_count));
while (succes_mask) {
test_result = test_dq_aligned(s, channel);
FOR_EACH_BYTELANE(lane) {
if (((test_result >> lane) & 1) != expected_result) {
status = increment_dq_dqs(s, &dq_setting[lane]);
dqset(channel, lane, &dq_setting[lane]);
dq_lim[lane]++;
} else if (pass_count[lane] < CONSISTENCY) {
status = increment_dq_dqs(s, &dq_setting[lane]);
dqset(channel, lane, &dq_setting[lane]);
dq_lim[lane]++;
pass_count[lane]++;
} else if (pass_count[lane] == CONSISTENCY) {
succes_mask &= ~(1 << lane);
}
if (status == CB_ERR) {
printk(BIOS_CRIT, "Could not find a case of %s "
"writes on CH%d, lane %d\n",
expected_result == FAILING ? "failing"
: "succeeding", channel, lane);
return CB_ERR;
}
}
}
return CB_SUCCESS;
}
/*
* This attempts to find the ideal delay for DQ to account for the skew between
* the DQ and the DQS signal.
* The training works this way:
* - start from the DQS delay values (DQ is always later than DQS)
* - increment the DQ delay until a succeeding write is found on all bytelayes,
* on all ranks on a channel and save these values
* - again increment the DQ delay until write start to fail on all bytelanes and
* save that value
* - use the mean between the saved succeeding and failing value
* - note: bytelanes cannot be trained independently, so the delays need to be
* adjusted and tested for all of them at the same time
*/
int do_write_training(struct sysinfo *s)
{
int i;
u8 channel, lane;
u8 dq_lower[TOTAL_BYTELANES];
u8 dq_upper[TOTAL_BYTELANES];
struct dll_setting dq_setting[TOTAL_BYTELANES];
printk(BIOS_DEBUG, "Starting DQ write training\n");
FOR_EACH_POPULATED_CHANNEL(s->dimms, channel) {
printk(BIOS_DEBUG, "Doing DQ write training on CH%d\n", channel);
/* Start all lanes at DQS values */
FOR_EACH_BYTELANE(lane) {
dqset(channel, lane, &s->dqs_settings[channel][lane]);
s->dq_settings[channel][lane] = s->dqs_settings[channel][lane];
}
memset(dq_lower, 0, sizeof(dq_lower));
/* Start from DQS settings */
memcpy(dq_setting, s->dqs_settings[channel], sizeof(dq_setting));
if (find_dq_limit(s, channel, dq_setting, dq_lower,
SUCCEEDING)) {
printk(BIOS_CRIT,
"Could not find working lower limit DQ setting\n");
return CB_ERR;
}
memcpy(dq_upper, dq_lower, sizeof(dq_lower));
if (find_dq_limit(s, channel, dq_setting, dq_upper,
FAILING)) {
printk(BIOS_WARNING,
"Could not find failing upper limit DQ setting\n");
return CB_ERR;
}
FOR_EACH_BYTELANE(lane) {
dq_lower[lane] -= CONSISTENCY - 1;
dq_upper[lane] -= CONSISTENCY - 1;
u8 dq_center = (dq_upper[lane] + dq_lower[lane]) / 2;
printk(RAM_DEBUG, "Centered value for DQ DLL:"
" ch%d, lane %d, #steps = %d\n",
channel, lane, dq_center);
for (i = 0; i < dq_center; i++) {
/* Should never happen */
if (increment_dq_dqs(s, &s->dq_settings[channel][lane])
== CB_ERR)
printk(BIOS_ERR,
"Huh? write training overflowed!!\n");
}
}
/* Reset DQ DLL settings and increment with centered value*/
printk(BIOS_DEBUG, "Final DQ timings on CH%d\n", channel);
FOR_EACH_BYTELANE(lane) {
printk(BIOS_DEBUG, "\tlane%d: ", lane);
print_dll_setting(&s->dq_settings[channel][lane], 1);
dqset(channel, lane, &s->dq_settings[channel][lane]);
}
}
printk(BIOS_DEBUG, "Done DQ write training\n");
return CB_SUCCESS;
}
#define RT_PATTERN_SIZE 40
static const u32 read_training_schedule[RT_PATTERN_SIZE] = {
0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
0x03030303, 0x04040404, 0x09090909, 0x10101010,
0x21212121, 0x40404040, 0x81818181, 0x00000000,
0xfdfdfdfd, 0xfafafafa, 0xf7f7f7f7, 0xeeeeeeee,
0xdfdfdfdf, 0xbebebebe, 0x7f7f7f7f, 0xfefefefe
};
static int rt_increment_dqs(struct rt_dqs_setting *setting)
{
if (setting->pi < 7) {
setting->pi++;
} else if (setting->tap < 14) {
setting->pi = 0;
setting->tap++;
} else {
return CB_ERR;
}
return CB_SUCCESS;
}
static u8 test_dqs_aligned(const struct sysinfo *s, const u8 channel)
{
int i, rank, lane;
volatile u8 data[8];
u32 address;
u8 bytelane_error = 0;
FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, channel, rank) {
address = test_address(channel, rank);
for (i = 0; i < RT_PATTERN_SIZE; i++) {
const u32 good = read_training_schedule[i];
write32(&data[0], read32((u32 *)address + i * 8));
write32(&data[4], read32((u32 *)address + i * 8 + 4));
FOR_EACH_BYTELANE(lane) {
if (data[lane] != (good & 0xff))
bytelane_error |= 1 << lane;
}
}
}
return bytelane_error;
}
static int rt_find_dqs_limit(struct sysinfo *s, u8 channel,
struct rt_dqs_setting dqs_setting[TOTAL_BYTELANES],
u8 dqs_lim[TOTAL_BYTELANES],
const enum training_modes expected_result)
{
int lane;
u8 test_result;
int status = CB_SUCCESS;
FOR_EACH_BYTELANE(lane)
rt_set_dqs(channel, lane, 0, &dqs_setting[lane]);
while (status == CB_SUCCESS) {
test_result = test_dqs_aligned(s, channel);
if (test_result == (expected_result == SUCCEEDING ? 0 : 0xff))
return CB_SUCCESS;
FOR_EACH_BYTELANE(lane) {
if (((test_result >> lane) & 1) != expected_result) {
status = rt_increment_dqs(&dqs_setting[lane]);
dqs_lim[lane]++;
rt_set_dqs(channel, lane, 0, &dqs_setting[lane]);
}
}
}
if (expected_result == SUCCEEDING) {
printk(BIOS_CRIT,
"Could not find RT DQS setting\n");
return CB_ERR;
} else {
printk(RAM_DEBUG,
"Read succeeded over all DQS"
" settings, continuing\n");
return CB_SUCCESS;
}
}
#define RT_LOOPS 3
/*
* This attempts to find the ideal delay for DQS on reads (rx).
* The training works this way:
* - start from the lowest possible delay (0) on all bytelanes
* - increment the DQS rx delays until a succeeding write is found on all
* bytelayes, on all ranks on a channel and save these values
* - again increment the DQS rx delay until write start to fail on all bytelanes
* and save that value
* - use the mean between the saved succeeding and failing value
* - note0: bytelanes cannot be trained independently, so the delays need to be
* adjusted and tested for all of them at the same time
* - note1: At this stage all ranks effectively use the rank0's rt_dqs settings,
* but later on their respective settings are used (TODO where is the
* 'switch' register??). So programming the results for all ranks at the end
* of the training. Programming on all ranks instead of all populated ranks,
* seems to be required, most likely because the signals can't really be generated
* separately.
*/
int do_read_training(struct sysinfo *s)
{
int loop, channel, i, lane, rank;
u32 address, content;
u8 dqs_lower[TOTAL_BYTELANES];
u8 dqs_upper[TOTAL_BYTELANES];
struct rt_dqs_setting dqs_setting[TOTAL_BYTELANES];
u16 saved_dqs_center[TOTAL_CHANNELS][TOTAL_BYTELANES];
memset(saved_dqs_center, 0, sizeof(saved_dqs_center));
printk(BIOS_DEBUG, "Starting DQS read training\n");
for (loop = 0; loop < RT_LOOPS; loop++) {
FOR_EACH_POPULATED_CHANNEL(s->dimms, channel) {
printk(RAM_DEBUG, "Doing DQS read training on CH%d\n",
channel);
/* Write pattern to strobe address */
FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, channel, rank) {
address = test_address(channel, rank);
for (i = 0; i < RT_PATTERN_SIZE; i++) {
content = read_training_schedule[i];
write32((u32 *)address + 8 * i, content);
write32((u32 *)address + 8 * i + 4, content);
}
}
memset(dqs_lower, 0, sizeof(dqs_lower));
memset(&dqs_setting, 0, sizeof(dqs_setting));
if (rt_find_dqs_limit(s, channel, dqs_setting, dqs_lower,
SUCCEEDING)) {
printk(BIOS_CRIT,
"Could not find working lower limit DQS setting\n");
return CB_ERR;
}
FOR_EACH_BYTELANE(lane)
dqs_upper[lane] = dqs_lower[lane];
if (rt_find_dqs_limit(s, channel, dqs_setting, dqs_upper,
FAILING)) {
printk(BIOS_CRIT,
"Could not find failing upper limit DQ setting\n");
return CB_ERR;
}
printk(RAM_DEBUG, "Centered values, loop %d:\n", loop);
FOR_EACH_BYTELANE(lane) {
u8 center = (dqs_lower[lane] + dqs_upper[lane]) / 2;
printk(RAM_DEBUG, "\t lane%d: #%d\n", lane, center);
saved_dqs_center[channel][lane] += center;
}
} /* END FOR_EACH_POPULATED_CHANNEL */
} /* end RT_LOOPS */
memset(s->rt_dqs, 0, sizeof(s->rt_dqs));
FOR_EACH_POPULATED_CHANNEL(s->dimms, channel) {
printk(BIOS_DEBUG, "Final timings on CH%d:\n", channel);
FOR_EACH_BYTELANE(lane) {
saved_dqs_center[channel][lane] /= RT_LOOPS;
while (saved_dqs_center[channel][lane]--) {
if (rt_increment_dqs(&s->rt_dqs[channel][lane])
== CB_ERR)
/* Should never happen */
printk(BIOS_ERR,
"Huh? read training overflowed!!\n");
}
/* Later on separate settings for each rank are used so program
all of them */
FOR_EACH_RANK_IN_CHANNEL(rank)
rt_set_dqs(channel, lane, rank,
&s->rt_dqs[channel][lane]);
printk(BIOS_DEBUG, "\tlane%d: %d.%d\n",
lane, s->rt_dqs[channel][lane].tap,
s->rt_dqs[channel][lane].pi);
}
}
printk(BIOS_DEBUG, "Done DQS read training\n");
return CB_SUCCESS;
}
/* Enable write leveling on selected rank and disable output on other ranks */
static void set_rank_write_level(struct sysinfo *s, u8 channel, u8 config,
u8 config_rank, u8 target_rank, int wl_enable)
{
u32 emrs1;
/* Is shifted by bits 2 later so u8 can be used to reduce size */
static const u8 emrs1_lut[8][4][4] = { /* [Config][Leveling Rank][Rank] */
{ /* Config 0: 2R2R */
{0x11, 0x00, 0x91, 0x00},
{0x00, 0x11, 0x91, 0x00},
{0x91, 0x00, 0x11, 0x00},
{0x91, 0x00, 0x00, 0x11}
},
{ // Config 1: 2R1R
{0x11, 0x00, 0x91, 0x00},
{0x00, 0x11, 0x91, 0x00},
{0x91, 0x00, 0x11, 0x00},
{0x00, 0x00, 0x00, 0x00}
},
{ // Config 2: 1R2R
{0x11, 0x00, 0x91, 0x00},
{0x00, 0x00, 0x00, 0x00},
{0x91, 0x00, 0x11, 0x00},
{0x91, 0x00, 0x00, 0x11}
},
{ // Config 3: 1R1R
{0x11, 0x00, 0x91, 0x00},
{0x00, 0x00, 0x00, 0x00},
{0x91, 0x00, 0x11, 0x00},
{0x00, 0x00, 0x00, 0x00}
},
{ // Config 4: 2R0R
{0x11, 0x00, 0x00, 0x00},
{0x00, 0x11, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00}
},
{ // Config 5: 0R2R
{0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x11, 0x00},
{0x00, 0x00, 0x00, 0x11}
},
{ // Config 6: 1R0R
{0x11, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00}
},
{ // Config 7: 0R1R
{0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x11, 0x00},
{0x00, 0x00, 0x00, 0x00}
}
};
if (wl_enable) {
printk(RAM_DEBUG, "Entering WL mode\n");
printk(RAM_DEBUG, "Using WL ODT values\n");
emrs1 = emrs1_lut[config][target_rank][config_rank];
} else {
printk(RAM_DEBUG, "Exiting WL mode\n");
emrs1 = ddr3_emrs1_rtt_nom_config[s->dimm_config[channel]][config_rank];
}
printk(RAM_DEBUG, "Setting ODT for rank%d to ", config_rank);
switch (emrs1) {
case 0:
printk(RAM_DEBUG, "High-Z\n");
break;
case 0x11:
printk(RAM_DEBUG, "40 Ohm\n");
break;
case 0x81:
printk(RAM_DEBUG, "30 Ohm\n");
break;
case 0x80:
printk(RAM_DEBUG, "20 Ohm\n");
break;
case 0x10:
printk(RAM_DEBUG, "120 Ohm\n");
break;
case 0x01:
printk(RAM_DEBUG, "60 Ohm\n");
break;
default:
printk(BIOS_WARNING, "ODT value Undefined!\n");
break;
}
emrs1 <<= 2;
/* Set output drive strength to 34 Ohm during write levelling */
emrs1 |= (1 << 1);
if (wl_enable && (target_rank != config_rank)) {
printk(RAM_DEBUG, "Disabling output for rank%d\n", config_rank);
emrs1 |= (1 << 12);
}
if (wl_enable && (target_rank == config_rank)) {
printk(RAM_DEBUG, "Enabling WL for rank%d\n", config_rank);
emrs1 |= (1 << 7);
}
send_jedec_cmd(s, config_rank, channel, EMRS1_CMD, emrs1);
}
#define N_SAMPLES 5
static void sample_dq(const struct sysinfo *s, u8 channel, u8 rank,
u8 high_found[8]) {
u32 address = test_address(channel, rank);
int samples, lane;
memset(high_found, 0, TOTAL_BYTELANES * sizeof(high_found[0]));
for (samples = 0; samples < N_SAMPLES; samples++) {
write32((u32 *)address, 0x12341234);
write32((u32 *)address + 4, 0x12341234);
udelay(5);
FOR_EACH_BYTELANE(lane) {
u8 dq_high = (MCHBAR8(0x561 + 0x400 * channel
+ (lane * 4)) >> 7) & 1;
high_found[lane] += dq_high;
}
}
}
static enum cb_err increment_to_dqs_edge(struct sysinfo *s, u8 channel, u8 rank)
{
int lane;
u8 saved_24d;
struct dll_setting dqs_setting[TOTAL_BYTELANES];
u8 bytelane_ok = 0;
u8 dq_sample[TOTAL_BYTELANES];
memcpy(dqs_setting, s->dqs_settings[channel], sizeof(dqs_setting));
FOR_EACH_BYTELANE(lane)
dqsset(channel, lane, &dqs_setting[lane]);
saved_24d = MCHBAR8(0x24d + 0x400 * channel);
/* Loop 0: Find DQ sample low, by decreasing */
while (bytelane_ok != 0xff) {
sample_dq(s, channel, rank, dq_sample);
FOR_EACH_BYTELANE(lane) {
if (bytelane_ok & (1 << lane))
continue;
printk(RAM_SPEW, "%d, %d, %02d, %d,"
" lane%d sample: %d\n",
dqs_setting[lane].coarse,
dqs_setting[lane].clk_delay,
dqs_setting[lane].tap,
dqs_setting[lane].pi,
lane,
dq_sample[lane]);
if (dq_sample[lane] > 0) {
if (decrement_dq_dqs(s, &dqs_setting[lane])) {
printk(BIOS_EMERG,
"DQS setting channel%d, "
"lane %d reached a minimum!\n",
channel, lane);
return CB_ERR;
}
} else {
bytelane_ok |= (1 << lane);
}
dqsset(channel, lane, &dqs_setting[lane]);
}
}
printk(RAM_DEBUG, "DQS settings on PASS #0:\n");
FOR_EACH_BYTELANE(lane) {
printk(RAM_DEBUG, "lane %d: ", lane);
print_dll_setting(&dqs_setting[lane], 0);
}
/* Loop 1: Find DQ sample high, by increasing */
bytelane_ok = 0;
while (bytelane_ok != 0xff) {
sample_dq(s, channel, rank, dq_sample);
FOR_EACH_BYTELANE(lane) {
if (bytelane_ok & (1 << lane))
continue;
printk(RAM_SPEW, "%d, %d, %02d, %d, lane%d sample: %d\n",
dqs_setting[lane].coarse,
dqs_setting[lane].clk_delay,
dqs_setting[lane].tap,
dqs_setting[lane].pi,
lane,
dq_sample[lane]);
if (dq_sample[lane] == N_SAMPLES) {
bytelane_ok |= (1 << lane);
} else {
if (increment_dq_dqs(s, &dqs_setting[lane])) {
printk(BIOS_EMERG,
"DQS setting channel%d, "
"lane %d reached a maximum!\n",
channel, lane);
return CB_ERR;
}
}
dqsset(channel, lane, &dqs_setting[lane]);
}
}
printk(RAM_DEBUG, "DQS settings on PASS #1:\n");
FOR_EACH_BYTELANE(lane) {
printk(RAM_DEBUG, "lane %d: ", lane);
print_dll_setting(&dqs_setting[lane], 0);
}
printk(BIOS_DEBUG, "final WL DQS settings on CH%d\n", channel);
FOR_EACH_BYTELANE(lane) {
printk(BIOS_DEBUG, "\tlane%d: ", lane);
print_dll_setting(&dqs_setting[lane], 1);
s->dqs_settings[channel][lane] = dqs_setting[lane];
}
MCHBAR8(0x24d + 0x400 * channel) = saved_24d;
return CB_SUCCESS;
}
/*
* DDR3 uses flyby topology where the clock signal takes a different path
* than the data signal, to allow for better signal intergrity.
* Therefore the delay on the data signals needs to account for this.
* This is done by by sampleling the the DQS write (tx) signal back over
* the DQ signal and looking for delay values where the sample transitions
* from high to low.
* Here the following is done:
* - enable write levelling on the first populated rank
* - disable output on other populated ranks
* - start from safe DQS (tx) delays (other transitions can be
* found at different starting values but are generally bad)
* - loop0: decrease DQS (tx) delays until low is sampled,
* loop1: increase DQS (tx) delays until high is sampled,
* That way we are sure to hit a low-high transition
* - put all ranks in normal mode of operation again
* - note: All ranks need to be leveled together
*/
void search_write_leveling(struct sysinfo *s)
{
int i, ch, count;
u8 config, rank0, rank1, lane;
struct dll_setting dq_setting;
u8 chanconfig_lut[16]={0, 6, 4, 6, 7, 3, 1, 3, 5, 2, 0, 2, 7, 3, 1, 3};
u8 odt_force[8][4] = { /* [Config][leveling rank] */
{0x5, 0x6, 0x5, 0x9},
{0x5, 0x6, 0x5, 0x0},
{0x5, 0x0, 0x5, 0x9},
{0x5, 0x0, 0x5, 0x0},
{0x1, 0x2, 0x0, 0x0},
{0x0, 0x0, 0x4, 0x8},
{0x1, 0x0, 0x0, 0x0},
{0x0, 0x0, 0x4, 0x0}
};
printk(BIOS_DEBUG, "Starting write levelling.\n");
FOR_EACH_POPULATED_CHANNEL(s->dimms, ch) {
printk(BIOS_DEBUG, "\tCH%d\n", ch);
config = chanconfig_lut[s->dimm_config[ch]];
MCHBAR8(0x5d8 + 0x400 * ch) =
MCHBAR8(0x5d8 + 0x400 * ch) & ~0x0e;
MCHBAR16(0x5c4 + 0x400 * ch) = (MCHBAR16(0x5c4 + 0x400 * ch) &
~0x3fff) | 0x3fff;
MCHBAR8(0x265 + 0x400 * ch) =
MCHBAR8(0x265 + 0x400 * ch) & ~0x1f;
/* find the first populated rank */
FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, ch, rank0)
break;
/* Enable WL for the first populated rank and disable output
for others */
FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, ch, rank1)
set_rank_write_level(s, ch, config, rank1, rank0, 1);
MCHBAR8(0x298 + 2 + 0x400 * ch) =
(MCHBAR8(0x298 + 2 + 0x400 * ch) & ~0x0f)
| odt_force[config][rank0];
MCHBAR8(0x271 + 0x400 * ch) = (MCHBAR8(0x271 + 0x400 * ch)
& ~0x7e) | 0x4e;
MCHBAR8(0x5d9 + 0x400 * ch) =
(MCHBAR8(0x5d9 + 0x400 * ch) & ~0x04) | 0x04;
MCHBAR32(0x1a0) = (MCHBAR32(0x1a0) & ~0x07ffffff)
| 0x00014000;
if (increment_to_dqs_edge(s, ch, rank0))
die("Write Leveling failed!");
MCHBAR8(0x298 + 2 + 0x400 * ch) =
MCHBAR8(0x298 + 2 + 0x400 * ch) & ~0x0f;
MCHBAR8(0x271 + 0x400 * ch) =
(MCHBAR8(0x271 + 0x400 * ch) & ~0x7e)
| 0x0e;
MCHBAR8(0x5d9 + 0x400 * ch) =
(MCHBAR8(0x5d9 + 0x400 * ch) & ~0x04);
MCHBAR32(0x1a0) = (MCHBAR32(0x1a0)
& ~0x07ffffff) | 0x00555801;
/* Disable WL on the trained rank */
set_rank_write_level(s, ch, config, rank0, rank0, 0);
send_jedec_cmd(s, rank0, ch, NORMALOP_CMD, 1 << 12);
MCHBAR8(0x5d8 + 0x400 * ch) = (MCHBAR8(0x5d8 + 0x400 * ch)
& ~0x0e) | 0x0e;
MCHBAR16(0x5c4 + 0x400 * ch) = (MCHBAR16(0x5c4 + 0x400 * ch)
& ~0x3fff) | 0x1807;
MCHBAR8(0x265 + 0x400 * ch) = MCHBAR8(0x265 + 0x400 * ch) & ~0x1f;
/* Disable write level mode for all ranks */
FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, ch, rank0)
set_rank_write_level(s, ch, config, rank0, rank0, 0);
}
MCHBAR8(0x5dc) = (MCHBAR8(0x5dc) & ~0x80) | 0x80;
/* Increment DQ (rx) dll setting by a standard amount past DQS,
This is further trained in write training. */
switch (s->selected_timings.mem_clk) {
default:
case MEM_CLOCK_800MHz:
count = 39;
break;
case MEM_CLOCK_1066MHz:
count = 32;
break;
case MEM_CLOCK_1333MHz:
count = 42;
break;
}
FOR_EACH_POPULATED_CHANNEL_AND_BYTELANE(s->dimms, ch, lane) {
dq_setting = s->dqs_settings[ch][lane];
for (i = 0; i < count; i++)
if (increment_dq_dqs(s, &dq_setting))
die("Can't further increase DQ past DQS delay");
dqset(ch, lane, &dq_setting);
}
printk(BIOS_DEBUG, "Done write levelling.\n");
}