| /* |
| * This file is part of the coreboot project. |
| * |
| * Copyright (C) 2017-2018 Arthur Heymans <arthur@aheymans.xyz> |
| * |
| * This program is free software; you can redistribute it and/or |
| * modify it under the terms of the GNU General Public License as |
| * published by the Free Software Foundation; either version 2 of |
| * the License, or (at your option) any later version. |
| * |
| * This program is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| * GNU General Public License for more details. |
| */ |
| |
| #include <device/mmio.h> |
| #include <console/console.h> |
| #include <delay.h> |
| #include <stdint.h> |
| #include <string.h> |
| #include <types.h> |
| #include "x4x.h" |
| #include "iomap.h" |
| |
| static void print_dll_setting(const struct dll_setting *dll_setting, |
| u8 default_verbose) |
| { |
| u8 debug_level = default_verbose ? BIOS_DEBUG : RAM_DEBUG; |
| |
| printk(debug_level, "%d.%d.%d.%d:%d.%d\n", dll_setting->coarse, |
| dll_setting->clk_delay, dll_setting->tap, |
| dll_setting->pi, dll_setting->db_en, |
| dll_setting->db_sel); |
| } |
| |
| struct db_limit { |
| u8 tap0; |
| u8 tap1; |
| u8 pi0; |
| u8 pi1; |
| }; |
| |
| static void set_db(const struct sysinfo *s, struct dll_setting *dq_dqs_setting) |
| { |
| struct db_limit limit; |
| |
| switch (s->selected_timings.mem_clk) { |
| default: |
| case MEM_CLOCK_800MHz: |
| limit.tap0 = 3; |
| limit.tap1 = 10; |
| limit.pi0 = 2; |
| limit.pi1 = 3; |
| break; |
| case MEM_CLOCK_1066MHz: |
| limit.tap0 = 2; |
| limit.tap1 = 8; |
| limit.pi0 = 6; |
| limit.pi1 = 7; |
| break; |
| case MEM_CLOCK_1333MHz: |
| limit.tap0 = 3; |
| limit.tap1 = 11; |
| /* TO CHECK: Might be reverse since this makes little sense */ |
| limit.pi0 = 6; |
| limit.pi1 = 4; |
| break; |
| } |
| |
| if (dq_dqs_setting->tap < limit.tap0) { |
| dq_dqs_setting->db_en = 1; |
| dq_dqs_setting->db_sel = 1; |
| } else if ((dq_dqs_setting->tap == limit.tap0) |
| && (dq_dqs_setting->pi < limit.pi0)) { |
| dq_dqs_setting->db_en = 1; |
| dq_dqs_setting->db_sel = 1; |
| } else if (dq_dqs_setting->tap < limit.tap1) { |
| dq_dqs_setting->db_en = 0; |
| dq_dqs_setting->db_sel = 0; |
| } else if ((dq_dqs_setting->tap == limit.tap1) |
| && (dq_dqs_setting->pi < limit.pi1)) { |
| dq_dqs_setting->db_en = 0; |
| dq_dqs_setting->db_sel = 0; |
| } else { |
| dq_dqs_setting->db_en = 1; |
| dq_dqs_setting->db_sel = 0; |
| } |
| } |
| |
| const static u8 max_tap[3] = {12, 10, 13}; |
| |
| static int increment_dq_dqs(const struct sysinfo *s, |
| struct dll_setting *dq_dqs_setting) |
| { |
| u8 max_tap_val = max_tap[s->selected_timings.mem_clk |
| - MEM_CLOCK_800MHz]; |
| |
| if (dq_dqs_setting->pi < 6) { |
| dq_dqs_setting->pi += 1; |
| } else if (dq_dqs_setting->tap < max_tap_val) { |
| dq_dqs_setting->pi = 0; |
| dq_dqs_setting->tap += 1; |
| } else if (dq_dqs_setting->clk_delay < 2) { |
| dq_dqs_setting->pi = 0; |
| dq_dqs_setting->tap = 0; |
| dq_dqs_setting->clk_delay += 1; |
| } else if (dq_dqs_setting->coarse < 1) { |
| dq_dqs_setting->pi = 0; |
| dq_dqs_setting->tap = 0; |
| dq_dqs_setting->clk_delay -= 1; |
| dq_dqs_setting->coarse += 1; |
| } else { |
| return CB_ERR; |
| } |
| set_db(s, dq_dqs_setting); |
| return CB_SUCCESS; |
| } |
| |
| static int decrement_dq_dqs(const struct sysinfo *s, |
| struct dll_setting *dq_dqs_setting) |
| { |
| u8 max_tap_val = max_tap[s->selected_timings.mem_clk |
| - MEM_CLOCK_800MHz]; |
| |
| if (dq_dqs_setting->pi > 0) { |
| dq_dqs_setting->pi -= 1; |
| } else if (dq_dqs_setting->tap > 0) { |
| dq_dqs_setting->pi = 6; |
| dq_dqs_setting->tap -= 1; |
| } else if (dq_dqs_setting->clk_delay > 0) { |
| dq_dqs_setting->pi = 6; |
| dq_dqs_setting->tap = max_tap_val; |
| dq_dqs_setting->clk_delay -= 1; |
| } else if (dq_dqs_setting->coarse > 0) { |
| dq_dqs_setting->pi = 6; |
| dq_dqs_setting->tap = max_tap_val; |
| dq_dqs_setting->clk_delay += 1; |
| dq_dqs_setting->coarse -= 1; |
| } else { |
| return CB_ERR; |
| } |
| set_db(s, dq_dqs_setting); |
| return CB_SUCCESS; |
| } |
| |
| |
| #define WT_PATTERN_SIZE 80 |
| |
| static const u32 write_training_schedule[WT_PATTERN_SIZE] = { |
| 0xffffffff, 0x00000000, 0xffffffff, 0x00000000, |
| 0xffffffff, 0x00000000, 0xffffffff, 0x00000000, |
| 0xffffffff, 0x00000000, 0xffffffff, 0x00000000, |
| 0xffffffff, 0x00000000, 0xffffffff, 0x00000000, |
| 0xefefefef, 0x10101010, 0xefefefef, 0x10101010, |
| 0xefefefef, 0x10101010, 0xefefefef, 0x10101010, |
| 0xefefefef, 0x10101010, 0xefefefef, 0x10101010, |
| 0xefefefef, 0x10101010, 0xefefefef, 0x10101010, |
| 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010, |
| 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010, |
| 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010, |
| 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010, |
| 0x03030303, 0x04040404, 0x09090909, 0x10101010, |
| 0x21212121, 0x40404040, 0x81818181, 0x00000000, |
| 0x03030303, 0x04040404, 0x09090909, 0x10101010, |
| 0x21212121, 0x40404040, 0x81818181, 0x00000000, |
| 0xfdfdfdfd, 0xfafafafa, 0xf7f7f7f7, 0xeeeeeeee, |
| 0xdfdfdfdf, 0xbebebebe, 0x7f7f7f7f, 0xfefefefe, |
| 0xfdfdfdfd, 0xfafafafa, 0xf7f7f7f7, 0xeeeeeeee, |
| 0xdfdfdfdf, 0xbebebebe, 0x7f7f7f7f, 0xfefefefe, |
| }; |
| |
| enum training_modes { |
| SUCCEEDING = 0, |
| FAILING = 1 |
| }; |
| |
| static u8 test_dq_aligned(const struct sysinfo *s, |
| const u8 channel) |
| { |
| u32 address; |
| int rank, lane; |
| u8 count, count1; |
| u8 data[8]; |
| u8 lane_error = 0; |
| |
| FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, channel, rank) { |
| address = test_address(channel, rank); |
| for (count = 0; count < WT_PATTERN_SIZE; count++) { |
| for (count1 = 0; count1 < WT_PATTERN_SIZE; count1++) { |
| if ((count1 % 16) == 0) |
| MCHBAR32(0xf90) = 1; |
| const u32 pattern = |
| write_training_schedule[count1]; |
| write32((u32 *)address + 8 * count1, pattern); |
| write32((u32 *)address + 8 * count1 + 4, |
| pattern); |
| } |
| |
| const u32 good = write_training_schedule[count]; |
| write32(&data[0], read32((u32 *)address + 8 * count)); |
| write32(&data[4], |
| read32((u32 *)address + 8 * count + 4)); |
| FOR_EACH_BYTELANE(lane) { |
| u8 expected = (good >> ((lane % 4) * 8)) & 0xff; |
| if (data[lane] != expected) |
| lane_error |= 1 << lane; |
| } |
| } |
| } |
| return lane_error; |
| } |
| |
| #define CONSISTENCY 10 |
| |
| /* |
| * This function finds either failing or succeeding writes by increasing DQ. |
| * When it has found a failing or succeeding setting it will increase DQ |
| * another 10 times to make sure the result is consistent. |
| * This is probably done because lanes cannot be trained independent from |
| * each other. |
| */ |
| static int find_dq_limit(const struct sysinfo *s, const u8 channel, |
| struct dll_setting dq_setting[TOTAL_BYTELANES], |
| u8 dq_lim[TOTAL_BYTELANES], |
| const enum training_modes expected_result) |
| { |
| int status = CB_SUCCESS; |
| int lane; |
| u8 test_result; |
| u8 pass_count[TOTAL_BYTELANES]; |
| u8 succes_mask = 0xff; |
| |
| printk(RAM_DEBUG, "Looking for %s writes on channel %d\n", |
| expected_result == FAILING ? "failing" : "succeeding", channel); |
| memset(pass_count, 0, sizeof(pass_count)); |
| |
| while(succes_mask) { |
| test_result = test_dq_aligned(s, channel); |
| FOR_EACH_BYTELANE(lane) { |
| if (((test_result >> lane) & 1) != expected_result) { |
| status = increment_dq_dqs(s, &dq_setting[lane]); |
| dqset(channel, lane, &dq_setting[lane]); |
| dq_lim[lane]++; |
| } else if (pass_count[lane] < CONSISTENCY) { |
| status = increment_dq_dqs(s, &dq_setting[lane]); |
| dqset(channel, lane, &dq_setting[lane]); |
| dq_lim[lane]++; |
| pass_count[lane]++; |
| } else if (pass_count[lane] == CONSISTENCY) { |
| succes_mask &= ~(1 << lane); |
| } |
| if (status == CB_ERR) { |
| printk(BIOS_CRIT, "Could not find a case of %s " |
| "writes on CH%d, lane %d\n", |
| expected_result == FAILING ? "failing" |
| : "succeeding", channel, lane); |
| return CB_ERR; |
| } |
| } |
| } |
| return CB_SUCCESS; |
| } |
| |
| /* |
| * This attempts to find the ideal delay for DQ to account for the skew between |
| * the DQ and the DQS signal. |
| * The training works this way: |
| * - start from the DQS delay values (DQ is always later than DQS) |
| * - increment the DQ delay until a succeeding write is found on all bytelayes, |
| * on all ranks on a channel and save these values |
| * - again increment the DQ delay until write start to fail on all bytelanes and |
| * save that value |
| * - use the mean between the saved succeeding and failing value |
| * - note: bytelanes cannot be trained independently, so the delays need to be |
| * adjusted and tested for all of them at the same time |
| */ |
| int do_write_training(struct sysinfo *s) |
| { |
| int i; |
| u8 channel, lane; |
| u8 dq_lower[TOTAL_BYTELANES]; |
| u8 dq_upper[TOTAL_BYTELANES]; |
| struct dll_setting dq_setting[TOTAL_BYTELANES]; |
| |
| printk(BIOS_DEBUG, "Starting DQ write training\n"); |
| |
| FOR_EACH_POPULATED_CHANNEL(s->dimms, channel) { |
| printk(BIOS_DEBUG, "Doing DQ write training on CH%d\n", channel); |
| |
| /* Start all lanes at DQS values */ |
| FOR_EACH_BYTELANE(lane) { |
| dqset(channel, lane, &s->dqs_settings[channel][lane]); |
| s->dq_settings[channel][lane] = s->dqs_settings[channel][lane]; |
| } |
| memset(dq_lower, 0, sizeof(dq_lower)); |
| /* Start from DQS settings */ |
| memcpy(dq_setting, s->dqs_settings[channel], sizeof(dq_setting)); |
| |
| if (find_dq_limit(s, channel, dq_setting, dq_lower, |
| SUCCEEDING)) { |
| printk(BIOS_CRIT, |
| "Could not find working lower limit DQ setting\n"); |
| return CB_ERR; |
| } |
| |
| memcpy(dq_upper, dq_lower, sizeof(dq_lower)); |
| |
| if (find_dq_limit(s, channel, dq_setting, dq_upper, |
| FAILING)) { |
| printk(BIOS_WARNING, |
| "Could not find failing upper limit DQ setting\n"); |
| return CB_ERR; |
| } |
| |
| FOR_EACH_BYTELANE(lane) { |
| dq_lower[lane] -= CONSISTENCY - 1; |
| dq_upper[lane] -= CONSISTENCY - 1; |
| u8 dq_center = (dq_upper[lane] + dq_lower[lane]) / 2; |
| |
| printk(RAM_DEBUG, "Centered value for DQ DLL:" |
| " ch%d, lane %d, #steps = %d\n", |
| channel, lane, dq_center); |
| for (i = 0; i < dq_center; i++) { |
| /* Should never happen */ |
| if (increment_dq_dqs(s, &s->dq_settings[channel][lane]) |
| == CB_ERR) |
| printk(BIOS_ERR, |
| "Huh? write training overflowed!!\n"); |
| } |
| } |
| |
| /* Reset DQ DLL settings and increment with centered value*/ |
| printk(BIOS_DEBUG, "Final DQ timings on CH%d\n", channel); |
| FOR_EACH_BYTELANE(lane) { |
| printk(BIOS_DEBUG, "\tlane%d: ", lane); |
| print_dll_setting(&s->dq_settings[channel][lane], 1); |
| dqset(channel, lane, &s->dq_settings[channel][lane]); |
| } |
| } |
| printk(BIOS_DEBUG, "Done DQ write training\n"); |
| return CB_SUCCESS; |
| } |
| |
| #define RT_PATTERN_SIZE 40 |
| |
| static const u32 read_training_schedule[RT_PATTERN_SIZE] = { |
| 0xffffffff, 0x00000000, 0xffffffff, 0x00000000, |
| 0xffffffff, 0x00000000, 0xffffffff, 0x00000000, |
| 0xefefefef, 0x10101010, 0xefefefef, 0x10101010, |
| 0xefefefef, 0x10101010, 0xefefefef, 0x10101010, |
| 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010, |
| 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010, |
| 0x03030303, 0x04040404, 0x09090909, 0x10101010, |
| 0x21212121, 0x40404040, 0x81818181, 0x00000000, |
| 0xfdfdfdfd, 0xfafafafa, 0xf7f7f7f7, 0xeeeeeeee, |
| 0xdfdfdfdf, 0xbebebebe, 0x7f7f7f7f, 0xfefefefe |
| }; |
| |
| static int rt_increment_dqs(struct rt_dqs_setting *setting) |
| { |
| if (setting->pi < 7) { |
| setting->pi++; |
| } else if (setting->tap < 14) { |
| setting->pi = 0; |
| setting->tap++; |
| } else { |
| return CB_ERR; |
| } |
| return CB_SUCCESS; |
| } |
| |
| static u8 test_dqs_aligned(const struct sysinfo *s, const u8 channel) |
| { |
| int i, rank, lane; |
| volatile u8 data[8]; |
| u32 address; |
| u8 bytelane_error = 0; |
| |
| FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, channel, rank) { |
| address = test_address(channel, rank); |
| for (i = 0; i < RT_PATTERN_SIZE; i++) { |
| const u32 good = read_training_schedule[i]; |
| write32(&data[0], read32((u32 *)address + i * 8)); |
| write32(&data[4], read32((u32 *)address + i * 8 + 4)); |
| |
| FOR_EACH_BYTELANE(lane) { |
| if (data[lane] != (good & 0xff)) |
| bytelane_error |= 1 << lane; |
| } |
| } |
| } |
| return bytelane_error; |
| } |
| |
| static int rt_find_dqs_limit(struct sysinfo *s, u8 channel, |
| struct rt_dqs_setting dqs_setting[TOTAL_BYTELANES], |
| u8 dqs_lim[TOTAL_BYTELANES], |
| const enum training_modes expected_result) |
| { |
| int lane; |
| u8 test_result; |
| int status = CB_SUCCESS; |
| |
| FOR_EACH_BYTELANE(lane) |
| rt_set_dqs(channel, lane, 0, &dqs_setting[lane]); |
| |
| while(status == CB_SUCCESS) { |
| test_result = test_dqs_aligned(s, channel); |
| if (test_result == (expected_result == SUCCEEDING ? 0 : 0xff)) |
| return CB_SUCCESS; |
| FOR_EACH_BYTELANE(lane) { |
| if (((test_result >> lane) & 1) != expected_result) { |
| status = rt_increment_dqs(&dqs_setting[lane]); |
| dqs_lim[lane]++; |
| rt_set_dqs(channel, lane, 0, &dqs_setting[lane]); |
| } |
| } |
| } |
| |
| if (expected_result == SUCCEEDING) { |
| printk(BIOS_CRIT, |
| "Could not find RT DQS setting\n"); |
| return CB_ERR; |
| } else { |
| printk(RAM_DEBUG, |
| "Read succeeded over all DQS" |
| " settings, continuing\n"); |
| return CB_SUCCESS; |
| } |
| } |
| |
| #define RT_LOOPS 3 |
| |
| /* |
| * This attempts to find the ideal delay for DQS on reads (rx). |
| * The training works this way: |
| * - start from the lowest possible delay (0) on all bytelanes |
| * - increment the DQS rx delays until a succeeding write is found on all |
| * bytelayes, on all ranks on a channel and save these values |
| * - again increment the DQS rx delay until write start to fail on all bytelanes |
| * and save that value |
| * - use the mean between the saved succeeding and failing value |
| * - note0: bytelanes cannot be trained independently, so the delays need to be |
| * adjusted and tested for all of them at the same time |
| * - note1: At this stage all ranks effectively use the rank0's rt_dqs settings, |
| * but later on their respective settings are used (TODO where is the |
| * 'switch' register??). So programming the results for all ranks at the end |
| * of the training. Programming on all ranks instead of all populated ranks, |
| * seems to be required, most likely because the signals can't really be generated |
| * separately. |
| */ |
| int do_read_training(struct sysinfo *s) |
| { |
| int loop, channel, i, lane, rank; |
| u32 address, content; |
| u8 dqs_lower[TOTAL_BYTELANES]; |
| u8 dqs_upper[TOTAL_BYTELANES]; |
| struct rt_dqs_setting dqs_setting[TOTAL_BYTELANES]; |
| u16 saved_dqs_center[TOTAL_CHANNELS][TOTAL_BYTELANES]; |
| |
| memset(saved_dqs_center, 0, sizeof(saved_dqs_center)); |
| |
| printk(BIOS_DEBUG, "Starting DQS read training\n"); |
| |
| for (loop = 0; loop < RT_LOOPS; loop++) { |
| FOR_EACH_POPULATED_CHANNEL(s->dimms, channel) { |
| printk(RAM_DEBUG, "Doing DQS read training on CH%d\n", |
| channel); |
| |
| /* Write pattern to strobe address */ |
| FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, channel, rank) { |
| address = test_address(channel, rank); |
| for (i = 0; i < RT_PATTERN_SIZE; i++) { |
| content = read_training_schedule[i]; |
| write32((u32 *)address + 8 * i, content); |
| write32((u32 *)address + 8 * i + 4, content); |
| } |
| } |
| |
| memset(dqs_lower, 0, sizeof(dqs_lower)); |
| memset(&dqs_setting, 0, sizeof(dqs_setting)); |
| if (rt_find_dqs_limit(s, channel, dqs_setting, dqs_lower, |
| SUCCEEDING)) { |
| printk(BIOS_CRIT, |
| "Could not find working lower limit DQS setting\n"); |
| return CB_ERR; |
| } |
| |
| FOR_EACH_BYTELANE(lane) |
| dqs_upper[lane] = dqs_lower[lane]; |
| |
| if (rt_find_dqs_limit(s, channel, dqs_setting, dqs_upper, |
| FAILING)) { |
| printk(BIOS_CRIT, |
| "Could not find failing upper limit DQ setting\n"); |
| return CB_ERR; |
| } |
| |
| printk(RAM_DEBUG, "Centered values, loop %d:\n", loop); |
| FOR_EACH_BYTELANE(lane) { |
| u8 center = (dqs_lower[lane] + dqs_upper[lane]) / 2; |
| printk(RAM_DEBUG, "\t lane%d: #%d\n", lane, center); |
| saved_dqs_center[channel][lane] += center; |
| } |
| } /* END FOR_EACH_POPULATED_CHANNEL */ |
| } /* end RT_LOOPS */ |
| |
| memset(s->rt_dqs, 0, sizeof(s->rt_dqs)); |
| |
| FOR_EACH_POPULATED_CHANNEL(s->dimms, channel) { |
| printk(BIOS_DEBUG, "Final timings on CH%d:\n", channel); |
| FOR_EACH_BYTELANE(lane) { |
| saved_dqs_center[channel][lane] /= RT_LOOPS; |
| while (saved_dqs_center[channel][lane]--) { |
| if(rt_increment_dqs(&s->rt_dqs[channel][lane]) |
| == CB_ERR) |
| /* Should never happen */ |
| printk(BIOS_ERR, |
| "Huh? read training overflowed!!\n"); |
| } |
| /* Later on separate settings for each rank are used so program |
| all of them */ |
| FOR_EACH_RANK_IN_CHANNEL(rank) |
| rt_set_dqs(channel, lane, rank, |
| &s->rt_dqs[channel][lane]); |
| printk(BIOS_DEBUG, "\tlane%d: %d.%d\n", |
| lane, s->rt_dqs[channel][lane].tap, |
| s->rt_dqs[channel][lane].pi); |
| } |
| } |
| printk(BIOS_DEBUG, "Done DQS read training\n"); |
| return CB_SUCCESS; |
| } |
| |
| /* Enable write leveling on selected rank and disable output on other ranks */ |
| static void set_rank_write_level(struct sysinfo *s, u8 channel, u8 config, |
| u8 config_rank, u8 target_rank, int wl_enable) |
| { |
| u32 emrs1; |
| |
| /* Is shifted by bits 2 later so u8 can be used to reduce size */ |
| const static u8 emrs1_lut[8][4][4]={ /* [Config][Leveling Rank][Rank] */ |
| { /* Config 0: 2R2R */ |
| {0x11, 0x00, 0x91, 0x00}, |
| {0x00, 0x11, 0x91, 0x00}, |
| {0x91, 0x00, 0x11, 0x00}, |
| {0x91, 0x00, 0x00, 0x11} |
| }, |
| { // Config 1: 2R1R |
| {0x11, 0x00, 0x91, 0x00}, |
| {0x00, 0x11, 0x91, 0x00}, |
| {0x91, 0x00, 0x11, 0x00}, |
| {0x00, 0x00, 0x00, 0x00} |
| }, |
| { // Config 2: 1R2R |
| {0x11, 0x00, 0x91, 0x00}, |
| {0x00, 0x00, 0x00, 0x00}, |
| {0x91, 0x00, 0x11, 0x00}, |
| {0x91, 0x00, 0x00, 0x11} |
| }, |
| { // Config 3: 1R1R |
| {0x11, 0x00, 0x91, 0x00}, |
| {0x00, 0x00, 0x00, 0x00}, |
| {0x91, 0x00, 0x11, 0x00}, |
| {0x00, 0x00, 0x00, 0x00} |
| }, |
| { // Config 4: 2R0R |
| {0x11, 0x00, 0x00, 0x00}, |
| {0x00, 0x11, 0x00, 0x00}, |
| {0x00, 0x00, 0x00, 0x00}, |
| {0x00, 0x00, 0x00, 0x00} |
| }, |
| { // Config 5: 0R2R |
| {0x00, 0x00, 0x00, 0x00}, |
| {0x00, 0x00, 0x00, 0x00}, |
| {0x00, 0x00, 0x11, 0x00}, |
| {0x00, 0x00, 0x00, 0x11} |
| }, |
| { // Config 6: 1R0R |
| {0x11, 0x00, 0x00, 0x00}, |
| {0x00, 0x00, 0x00, 0x00}, |
| {0x00, 0x00, 0x00, 0x00}, |
| {0x00, 0x00, 0x00, 0x00} |
| }, |
| { // Config 7: 0R1R |
| {0x00, 0x00, 0x00, 0x00}, |
| {0x00, 0x00, 0x00, 0x00}, |
| {0x00, 0x00, 0x11, 0x00}, |
| {0x00, 0x00, 0x00, 0x00} |
| } |
| }; |
| |
| if (wl_enable) { |
| printk(RAM_DEBUG, "Entering WL mode\n"); |
| printk(RAM_DEBUG, "Using WL ODT values\n"); |
| emrs1 = emrs1_lut[config][target_rank][config_rank]; |
| } else { |
| printk(RAM_DEBUG, "Exiting WL mode\n"); |
| emrs1 = ddr3_emrs1_rtt_nom_config[s->dimm_config[channel]][config_rank]; |
| } |
| printk(RAM_DEBUG, "Setting ODT for rank%d to ", config_rank); |
| switch (emrs1) { |
| case 0: |
| printk(RAM_DEBUG, "High-Z\n"); |
| break; |
| case 0x11: |
| printk(RAM_DEBUG, "40 Ohm\n"); |
| break; |
| case 0x81: |
| printk(RAM_DEBUG, "30 Ohm\n"); |
| break; |
| case 0x80: |
| printk(RAM_DEBUG, "20 Ohm\n"); |
| break; |
| case 0x10: |
| printk(RAM_DEBUG, "120 Ohm\n"); |
| break; |
| case 0x01: |
| printk(RAM_DEBUG, "60 Ohm\n"); |
| break; |
| default: |
| printk(BIOS_WARNING, "ODT value Undefined!\n"); |
| break; |
| } |
| |
| emrs1 <<= 2; |
| /* Set output drive strength to 34 Ohm during write levelling */ |
| emrs1 |= (1 << 1); |
| |
| if (wl_enable && (target_rank != config_rank)) { |
| printk(RAM_DEBUG, "Disabling output for rank%d\n", config_rank); |
| emrs1 |= (1 << 12); |
| } |
| if (wl_enable && (target_rank == config_rank)) { |
| printk(RAM_DEBUG, "Enabling WL for rank%d\n", config_rank); |
| emrs1 |= (1 << 7); |
| } |
| send_jedec_cmd(s, config_rank, channel, EMRS1_CMD, emrs1); |
| } |
| |
| #define N_SAMPLES 5 |
| |
| static void sample_dq(const struct sysinfo *s, u8 channel, u8 rank, |
| u8 high_found[8]) { |
| u32 address = test_address(channel, rank); |
| int samples, lane; |
| |
| memset(high_found, 0, TOTAL_BYTELANES * sizeof(high_found[0])); |
| for (samples = 0; samples < N_SAMPLES; samples++) { |
| write32((u32 *)address, 0x12341234); |
| write32((u32 *)address + 4, 0x12341234); |
| udelay(5); |
| FOR_EACH_BYTELANE(lane) { |
| u8 dq_high = (MCHBAR8(0x561 + 0x400 * channel |
| + (lane * 4)) >> 7) & 1; |
| high_found[lane] += dq_high; |
| } |
| } |
| } |
| |
| static enum cb_err increment_to_dqs_edge(struct sysinfo *s, u8 channel, u8 rank) |
| { |
| int lane; |
| u8 saved_24d; |
| struct dll_setting dqs_setting[TOTAL_BYTELANES]; |
| u8 bytelane_ok = 0; |
| u8 dq_sample[TOTAL_BYTELANES]; |
| |
| memcpy(dqs_setting, s->dqs_settings[channel], sizeof(dqs_setting)); |
| FOR_EACH_BYTELANE(lane) |
| dqsset(channel, lane, &dqs_setting[lane]); |
| |
| saved_24d = MCHBAR8(0x24d + 0x400 * channel); |
| |
| /* Loop 0: Find DQ sample low, by decreasing */ |
| while (bytelane_ok != 0xff) { |
| sample_dq(s, channel, rank, dq_sample); |
| FOR_EACH_BYTELANE(lane) { |
| if (bytelane_ok & (1 << lane)) |
| continue; |
| |
| printk(RAM_SPEW, "%d, %d, %02d, %d," |
| " lane%d sample: %d\n", |
| dqs_setting[lane].coarse, |
| dqs_setting[lane].clk_delay, |
| dqs_setting[lane].tap, |
| dqs_setting[lane].pi, |
| lane, |
| dq_sample[lane]); |
| |
| if (dq_sample[lane] > 0) { |
| if (decrement_dq_dqs(s, &dqs_setting[lane])) { |
| printk(BIOS_EMERG, |
| "DQS setting channel%d, " |
| "lane %d reached a minimum!\n", |
| channel, lane); |
| return CB_ERR; |
| } |
| } else { |
| bytelane_ok |= (1 << lane); |
| } |
| dqsset(channel, lane, &dqs_setting[lane]); |
| } |
| } |
| |
| printk(RAM_DEBUG, "DQS settings on PASS #0:\n"); |
| FOR_EACH_BYTELANE(lane) { |
| printk(RAM_DEBUG, "lane %d: ", lane); |
| print_dll_setting(&dqs_setting[lane], 0); |
| } |
| |
| /* Loop 1: Find DQ sample high, by increasing */ |
| bytelane_ok = 0; |
| while (bytelane_ok != 0xff) { |
| sample_dq(s, channel, rank, dq_sample); |
| FOR_EACH_BYTELANE(lane) { |
| if (bytelane_ok & (1 << lane)) |
| continue; |
| |
| printk(RAM_SPEW, "%d, %d, %02d, %d, lane%d sample: %d\n", |
| dqs_setting[lane].coarse, |
| dqs_setting[lane].clk_delay, |
| dqs_setting[lane].tap, |
| dqs_setting[lane].pi, |
| lane, |
| dq_sample[lane]); |
| |
| if (dq_sample[lane] == N_SAMPLES) { |
| bytelane_ok |= (1 << lane); |
| } else { |
| if (increment_dq_dqs(s, &dqs_setting[lane])) { |
| printk(BIOS_EMERG, |
| "DQS setting channel%d, " |
| "lane %d reached a maximum!\n", |
| channel, lane); |
| return CB_ERR; |
| } |
| } |
| dqsset(channel, lane, &dqs_setting[lane]); |
| } |
| } |
| |
| printk(RAM_DEBUG, "DQS settings on PASS #1:\n"); |
| FOR_EACH_BYTELANE(lane) { |
| printk(RAM_DEBUG, "lane %d: ", lane); |
| print_dll_setting(&dqs_setting[lane], 0); |
| } |
| |
| printk(BIOS_DEBUG, "final WL DQS settings on CH%d\n", channel); |
| FOR_EACH_BYTELANE(lane) { |
| printk(BIOS_DEBUG, "\tlane%d: ", lane); |
| print_dll_setting(&dqs_setting[lane], 1); |
| s->dqs_settings[channel][lane] = dqs_setting[lane]; |
| } |
| |
| MCHBAR8(0x24d + 0x400 * channel) = saved_24d; |
| return CB_SUCCESS; |
| } |
| |
| /* |
| * DDR3 uses flyby topology where the clock signal takes a different path |
| * than the data signal, to allow for better signal intergrity. |
| * Therefore the delay on the data signals needs to account for this. |
| * This is done by by sampleling the the DQS write (tx) signal back over |
| * the DQ signal and looking for delay values where the sample transitions |
| * from high to low. |
| * Here the following is done: |
| * - enable write levelling on the first populated rank |
| * - disable output on other populated ranks |
| * - start from safe DQS (tx) delays (other transitions can be |
| * found at different starting values but are generally bad) |
| * - loop0: decrease DQS (tx) delays until low is sampled, |
| * loop1: increase DQS (tx) delays until high is sampled, |
| * That way we are sure to hit a low-high transition |
| * - put all ranks in normal mode of operation again |
| * - note: All ranks need to be leveled together |
| */ |
| void search_write_leveling(struct sysinfo *s) |
| { |
| int i, ch, count; |
| u8 config, rank0, rank1, lane; |
| struct dll_setting dq_setting; |
| |
| u8 chanconfig_lut[16]={0, 6, 4, 6, 7, 3, 1, 3, 5, 2, 0, 2, 7, 3, 1, 3}; |
| |
| u8 odt_force[8][4] = { /* [Config][leveling rank] */ |
| {0x5, 0x6, 0x5, 0x9}, |
| {0x5, 0x6, 0x5, 0x0}, |
| {0x5, 0x0, 0x5, 0x9}, |
| {0x5, 0x0, 0x5, 0x0}, |
| {0x1, 0x2, 0x0, 0x0}, |
| {0x0, 0x0, 0x4, 0x8}, |
| {0x1, 0x0, 0x0, 0x0}, |
| {0x0, 0x0, 0x4, 0x0} |
| }; |
| |
| printk(BIOS_DEBUG, "Starting write levelling.\n"); |
| |
| FOR_EACH_POPULATED_CHANNEL(s->dimms, ch) { |
| printk(BIOS_DEBUG, "\tCH%d\n", ch); |
| config = chanconfig_lut[s->dimm_config[ch]]; |
| |
| MCHBAR8(0x5d8 + 0x400 * ch) = |
| MCHBAR8(0x5d8 + 0x400 * ch) & ~0x0e; |
| MCHBAR16(0x5c4 + 0x400 * ch) = (MCHBAR16(0x5c4 + 0x400 * ch) & |
| ~0x3fff) | 0x3fff; |
| MCHBAR8(0x265 + 0x400 * ch) = |
| MCHBAR8(0x265 + 0x400 * ch) & ~0x1f; |
| /* find the first populated rank */ |
| FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, ch, rank0) |
| break; |
| |
| /* Enable WL for the first populated rank and disable output |
| for others */ |
| FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, ch, rank1) |
| set_rank_write_level(s, ch, config, rank1, rank0, 1); |
| |
| MCHBAR8(0x298 + 2 + 0x400 * ch) = |
| (MCHBAR8(0x298 + 2 + 0x400 * ch) & ~0x0f) |
| | odt_force[config][rank0]; |
| MCHBAR8(0x271 + 0x400 * ch) = (MCHBAR8(0x271 + 0x400 * ch) |
| & ~0x7e) | 0x4e; |
| MCHBAR8(0x5d9 + 0x400 * ch) = |
| (MCHBAR8(0x5d9 + 0x400 * ch) & ~0x04) | 0x04; |
| MCHBAR32(0x1a0) = (MCHBAR32(0x1a0) & ~0x07ffffff) |
| | 0x00014000; |
| |
| if (increment_to_dqs_edge(s, ch, rank0)) |
| die("Write Leveling failed!"); |
| |
| MCHBAR8(0x298 + 2 + 0x400 * ch) = |
| MCHBAR8(0x298 + 2 + 0x400 * ch) & ~0x0f; |
| MCHBAR8(0x271 + 0x400 * ch) = |
| (MCHBAR8(0x271 + 0x400 * ch) & ~0x7e) |
| | 0x0e; |
| MCHBAR8(0x5d9 + 0x400 * ch) = |
| (MCHBAR8(0x5d9 + 0x400 * ch) & ~0x04); |
| MCHBAR32(0x1a0) = (MCHBAR32(0x1a0) |
| & ~0x07ffffff) | 0x00555801; |
| |
| /* Disable WL on the trained rank */ |
| set_rank_write_level(s, ch, config, rank0, rank0, 0); |
| send_jedec_cmd(s, rank0, ch, NORMALOP_CMD, 1 << 12); |
| |
| MCHBAR8(0x5d8 + 0x400 * ch) = (MCHBAR8(0x5d8 + 0x400 * ch) |
| & ~0x0e) | 0x0e; |
| MCHBAR16(0x5c4 + 0x400 * ch) = (MCHBAR16(0x5c4 + 0x400 * ch) |
| & ~0x3fff) | 0x1807; |
| MCHBAR8(0x265 + 0x400 * ch) = MCHBAR8(0x265 + 0x400 * ch) & ~0x1f; |
| |
| /* Disable write level mode for all ranks */ |
| FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, ch, rank0) |
| set_rank_write_level(s, ch, config, rank0, rank0, 0); |
| } |
| |
| MCHBAR8(0x5dc) = (MCHBAR8(0x5dc) & ~0x80) | 0x80; |
| |
| /* Increment DQ (rx) dll setting by a standard amount past DQS, |
| This is further trained in write training. */ |
| switch (s->selected_timings.mem_clk) { |
| default: |
| case MEM_CLOCK_800MHz: |
| count = 39; |
| break; |
| case MEM_CLOCK_1066MHz: |
| count = 32; |
| break; |
| case MEM_CLOCK_1333MHz: |
| count = 42; |
| break; |
| } |
| |
| FOR_EACH_POPULATED_CHANNEL_AND_BYTELANE(s->dimms, ch, lane) { |
| dq_setting = s->dqs_settings[ch][lane]; |
| for (i = 0; i < count; i++) |
| if (increment_dq_dqs(s, &dq_setting)) |
| die("Can't further increase DQ past DQS delay"); |
| dqset(ch, lane, &dq_setting); |
| } |
| |
| printk(BIOS_DEBUG, "Done write levelling.\n"); |
| } |