| /* SPDX-License-Identifier: GPL-2.0-or-later */ |
| |
| #include <console/console.h> |
| #include <commonlib/helpers.h> |
| #include <string.h> |
| #include <arch/io.h> |
| #include <device/mmio.h> |
| #include <device/pci_ops.h> |
| #include <device/smbus_host.h> |
| #include <cpu/x86/msr.h> |
| #include <cpu/x86/cache.h> |
| #include <cbmem.h> |
| #include <cf9_reset.h> |
| #include <ip_checksum.h> |
| #include <option.h> |
| #include <device/pci_def.h> |
| #include <device/device.h> |
| #include <halt.h> |
| #include <spd.h> |
| #include <timestamp.h> |
| #include <cpu/x86/mtrr.h> |
| #include <cpu/intel/speedstep.h> |
| #include <cpu/intel/turbo.h> |
| #include <mrc_cache.h> |
| #include <southbridge/intel/ibexpeak/me.h> |
| #include <southbridge/intel/common/pmbase.h> |
| #include <delay.h> |
| #include <types.h> |
| |
| #include "chip.h" |
| #include "ironlake.h" |
| #include "raminit.h" |
| #include "raminit_tables.h" |
| |
| #define NORTHBRIDGE PCI_DEV(0, 0, 0) |
| #define SOUTHBRIDGE PCI_DEV(0, 0x1f, 0) |
| #define GMA PCI_DEV(0, 0x2, 0x0) |
| |
| #define FOR_ALL_RANKS \ |
| for (channel = 0; channel < NUM_CHANNELS; channel++) \ |
| for (slot = 0; slot < NUM_SLOTS; slot++) \ |
| for (rank = 0; rank < NUM_RANKS; rank++) |
| |
| #define FOR_POPULATED_RANKS \ |
| for (channel = 0; channel < NUM_CHANNELS; channel++) \ |
| for (slot = 0; slot < NUM_SLOTS; slot++) \ |
| for (rank = 0; rank < NUM_RANKS; rank++) \ |
| if (info->populated_ranks[channel][slot][rank]) |
| |
| #define FOR_POPULATED_RANKS_BACKWARDS \ |
| for (channel = NUM_CHANNELS - 1; channel >= 0; channel--) \ |
| for (slot = 0; slot < NUM_SLOTS; slot++) \ |
| for (rank = 0; rank < NUM_RANKS; rank++) \ |
| if (info->populated_ranks[channel][slot][rank]) |
| |
| #include <lib.h> /* Prototypes */ |
| |
| typedef struct _u128 { |
| u64 lo; |
| u64 hi; |
| } u128; |
| |
| static void read128(u32 addr, u64 * out) |
| { |
| u128 ret; |
| u128 stor; |
| asm volatile ("movdqu %%xmm0, %0\n" |
| "movdqa (%2), %%xmm0\n" |
| "movdqu %%xmm0, %1\n" |
| "movdqu %0, %%xmm0":"+m" (stor), "=m"(ret):"r"(addr)); |
| out[0] = ret.lo; |
| out[1] = ret.hi; |
| } |
| |
| /* |
| * Ironlake memory I/O timings are located in scan chains, accessible |
| * through MCHBAR register groups. Each channel has a scan chain, and |
| * there's a global scan chain too. Each chain is broken into smaller |
| * sections of N bits, where N <= 32. Each section allows reading and |
| * writing a certain parameter. Each section contains N - 2 data bits |
| * and two additional bits: a Mask bit, and a Halt bit. |
| */ |
| |
| /* OK */ |
| static void write_1d0(u32 val, u16 addr, int bits, int flag) |
| { |
| mchbar_write32(0x1d0, 0); |
| while (mchbar_read32(0x1d0) & (1 << 23)) |
| ; |
| mchbar_write32(0x1d4, (val & ((1 << bits) - 1)) | 2 << bits | flag << bits); |
| mchbar_write32(0x1d0, 1 << 30 | addr); |
| while (mchbar_read32(0x1d0) & (1 << 23)) |
| ; |
| } |
| |
| /* OK */ |
| static u16 read_1d0(u16 addr, int split) |
| { |
| u32 val; |
| mchbar_write32(0x1d0, 0); |
| while (mchbar_read32(0x1d0) & (1 << 23)) |
| ; |
| mchbar_write32(0x1d0, 1 << 31 | (((mchbar_read8(0x246) >> 2) & 3) + 0x361 - addr)); |
| while (mchbar_read32(0x1d0) & (1 << 23)) |
| ; |
| val = mchbar_read32(0x1d8); |
| write_1d0(0, 0x33d, 0, 0); |
| write_1d0(0, 0x33d, 0, 0); |
| val &= ((1 << split) - 1); |
| // printk (BIOS_ERR, "R1D0C [%x] => %x\n", addr, val); |
| return val; |
| } |
| |
| static void sfence(void) |
| { |
| asm volatile ("sfence"); |
| } |
| |
| static inline u16 get_lane_offset(int slot, int rank, int lane) |
| { |
| return 0x124 * lane + ((lane & 4) ? 0x23e : 0) + 11 * rank + 22 * slot - |
| 0x452 * (lane == 8); |
| } |
| |
| static inline u16 get_timing_register_addr(int lane, int tm, int slot, int rank) |
| { |
| const u16 offs[] = { 0x1d, 0xa8, 0xe6, 0x5c }; |
| return get_lane_offset(slot, rank, lane) + offs[(tm + 3) % 4]; |
| } |
| |
| static u32 gav_real(int line, u32 in) |
| { |
| // printk (BIOS_DEBUG, "%d: GAV: %x\n", line, in); |
| return in; |
| } |
| |
| #define gav(x) gav_real(__LINE__, (x)) |
| |
| /* Global allocation of timings_car */ |
| timing_bounds_t timings_car[64]; |
| |
| /* OK */ |
| static u16 |
| read_500(struct raminfo *info, int channel, u16 addr, int split) |
| { |
| u32 val; |
| info->last_500_command[channel] = 1 << 31; |
| mchbar_write32(0x500 + (channel << 10), 0); |
| while (mchbar_read32(0x500 + (channel << 10)) & (1 << 23)) |
| ; |
| mchbar_write32(0x500 + (channel << 10), |
| 1 << 31 | (((mchbar_read8(0x246 + (channel << 10)) >> 2) & 3) + 0xb88 - addr)); |
| while (mchbar_read32(0x500 + (channel << 10)) & (1 << 23)) |
| ; |
| val = mchbar_read32(0x508 + (channel << 10)); |
| return val & ((1 << split) - 1); |
| } |
| |
| /* OK */ |
| static void |
| write_500(struct raminfo *info, int channel, u32 val, u16 addr, int bits, |
| int flag) |
| { |
| if (info->last_500_command[channel] == 1 << 31) { |
| info->last_500_command[channel] = 1 << 30; |
| write_500(info, channel, 0, 0xb61, 0, 0); |
| } |
| mchbar_write32(0x500 + (channel << 10), 0); |
| while (mchbar_read32(0x500 + (channel << 10)) & (1 << 23)) |
| ; |
| mchbar_write32(0x504 + (channel << 10), |
| (val & ((1 << bits) - 1)) | 2 << bits | flag << bits); |
| mchbar_write32(0x500 + (channel << 10), 1 << 30 | addr); |
| while (mchbar_read32(0x500 + (channel << 10)) & (1 << 23)) |
| ; |
| } |
| |
| static void rmw_500(struct raminfo *info, int channel, u16 addr, int bits, u32 and, u32 or) |
| { |
| const u32 val = read_500(info, channel, addr, bits) & and; |
| write_500(info, channel, val | or, addr, bits, 1); |
| } |
| |
| static int rw_test(int rank) |
| { |
| const u32 mask = 0xf00fc33c; |
| int ok = 0xff; |
| int i; |
| for (i = 0; i < 64; i++) |
| write32p((rank << 28) | (i << 2), 0); |
| sfence(); |
| for (i = 0; i < 64; i++) |
| gav(read32p((rank << 28) | (i << 2))); |
| sfence(); |
| for (i = 0; i < 32; i++) { |
| u32 pat = (((mask >> i) & 1) ? 0xffffffff : 0); |
| write32p((rank << 28) | (i << 3), pat); |
| write32p((rank << 28) | (i << 3) | 4, pat); |
| } |
| sfence(); |
| for (i = 0; i < 32; i++) { |
| u8 pat = (((mask >> i) & 1) ? 0xff : 0); |
| int j; |
| u32 val; |
| gav(val = read32p((rank << 28) | (i << 3))); |
| for (j = 0; j < 4; j++) |
| if (((val >> (j * 8)) & 0xff) != pat) |
| ok &= ~(1 << j); |
| gav(val = read32p((rank << 28) | (i << 3) | 4)); |
| for (j = 0; j < 4; j++) |
| if (((val >> (j * 8)) & 0xff) != pat) |
| ok &= ~(16 << j); |
| } |
| sfence(); |
| for (i = 0; i < 64; i++) |
| write32p((rank << 28) | (i << 2), 0); |
| sfence(); |
| for (i = 0; i < 64; i++) |
| gav(read32p((rank << 28) | (i << 2))); |
| |
| return ok; |
| } |
| |
| static void |
| program_timings(struct raminfo *info, u16 base, int channel, int slot, int rank) |
| { |
| int lane; |
| for (lane = 0; lane < 8; lane++) { |
| write_500(info, channel, |
| base + |
| info->training. |
| lane_timings[2][channel][slot][rank][lane], |
| get_timing_register_addr(lane, 2, slot, rank), 9, 0); |
| write_500(info, channel, |
| base + |
| info->training. |
| lane_timings[3][channel][slot][rank][lane], |
| get_timing_register_addr(lane, 3, slot, rank), 9, 0); |
| } |
| } |
| |
| static void write_26c(int channel, u16 si) |
| { |
| mchbar_write32(0x26c + (channel << 10), 0x03243f35); |
| mchbar_write32(0x268 + (channel << 10), 0xcfc00000 | si << 9); |
| mchbar_write16(0x2b9 + (channel << 10), si); |
| } |
| |
| static void toggle_1d0_142_5ff(void) |
| { |
| u32 reg32 = gav(read_1d0(0x142, 3)); |
| if (reg32 & (1 << 1)) |
| write_1d0(0, 0x142, 3, 1); |
| |
| mchbar_write8(0x5ff, 0); |
| mchbar_write8(0x5ff, 1 << 7); |
| if (reg32 & (1 << 1)) |
| write_1d0(0x2, 0x142, 3, 1); |
| } |
| |
| static u32 get_580(int channel, u8 addr) |
| { |
| u32 ret; |
| toggle_1d0_142_5ff(); |
| mchbar_write32(0x580 + (channel << 10), 0x8493c012 | addr); |
| mchbar_setbits8(0x580 + (channel << 10), 1 << 0); |
| while (!((ret = mchbar_read32(0x580 + (channel << 10))) & (1 << 16))) |
| ; |
| mchbar_clrbits8(0x580 + (channel << 10), 1 << 0); |
| return ret; |
| } |
| |
| #define RANK_SHIFT 28 |
| #define CHANNEL_SHIFT 10 |
| |
| static void seq9(struct raminfo *info, int channel, int slot, int rank) |
| { |
| int i, lane; |
| |
| for (i = 0; i < 2; i++) |
| for (lane = 0; lane < 8; lane++) |
| write_500(info, channel, |
| info->training.lane_timings[i + |
| 1][channel][slot] |
| [rank][lane], get_timing_register_addr(lane, |
| i + 1, |
| slot, |
| rank), |
| 9, 0); |
| |
| write_1d0(1, 0x103, 6, 1); |
| for (lane = 0; lane < 8; lane++) |
| write_500(info, channel, |
| info->training. |
| lane_timings[0][channel][slot][rank][lane], |
| get_timing_register_addr(lane, 0, slot, rank), 9, 0); |
| |
| for (i = 0; i < 2; i++) { |
| for (lane = 0; lane < 8; lane++) |
| write_500(info, channel, |
| info->training.lane_timings[i + |
| 1][channel][slot] |
| [rank][lane], get_timing_register_addr(lane, |
| i + 1, |
| slot, |
| rank), |
| 9, 0); |
| gav(get_580(channel, ((i + 1) << 2) | (rank << 5))); |
| } |
| |
| toggle_1d0_142_5ff(); |
| write_1d0(0x2, 0x142, 3, 1); |
| |
| for (lane = 0; lane < 8; lane++) { |
| // printk (BIOS_ERR, "before: %x\n", info->training.lane_timings[2][channel][slot][rank][lane]); |
| info->training.lane_timings[2][channel][slot][rank][lane] = |
| read_500(info, channel, |
| get_timing_register_addr(lane, 2, slot, rank), 9); |
| //printk (BIOS_ERR, "after: %x\n", info->training.lane_timings[2][channel][slot][rank][lane]); |
| info->training.lane_timings[3][channel][slot][rank][lane] = |
| info->training.lane_timings[2][channel][slot][rank][lane] + |
| 0x20; |
| } |
| } |
| |
| static int count_ranks_in_channel(struct raminfo *info, int channel) |
| { |
| int slot, rank; |
| int res = 0; |
| for (slot = 0; slot < NUM_SLOTS; slot++) |
| for (rank = 0; rank < NUM_SLOTS; rank++) |
| res += info->populated_ranks[channel][slot][rank]; |
| return res; |
| } |
| |
| static void |
| config_rank(struct raminfo *info, int s3resume, int channel, int slot, int rank) |
| { |
| int add; |
| |
| write_1d0(0, 0x178, 7, 1); |
| seq9(info, channel, slot, rank); |
| program_timings(info, 0x80, channel, slot, rank); |
| |
| if (channel == 0) |
| add = count_ranks_in_channel(info, 1); |
| else |
| add = 0; |
| if (!s3resume) |
| gav(rw_test(rank + add)); |
| program_timings(info, 0x00, channel, slot, rank); |
| if (!s3resume) |
| gav(rw_test(rank + add)); |
| if (!s3resume) |
| gav(rw_test(rank + add)); |
| write_1d0(0, 0x142, 3, 1); |
| write_1d0(0, 0x103, 6, 1); |
| |
| gav(get_580(channel, 0xc | (rank << 5))); |
| gav(read_1d0(0x142, 3)); |
| |
| mchbar_write8(0x5ff, 0); |
| mchbar_write8(0x5ff, 1 << 7); |
| } |
| |
| static void set_4cf(struct raminfo *info, int channel, u8 bit, u8 val) |
| { |
| const u16 regtable[] = { 0x4cf, 0x659, 0x697 }; |
| |
| val &= 1; |
| for (int i = 0; i < ARRAY_SIZE(regtable); i++) |
| rmw_500(info, channel, regtable[i], 4, ~(1 << bit), val << bit); |
| } |
| |
| static void set_334(int zero) |
| { |
| int j, k, channel; |
| const u32 val3[] = { 0x2a2b2a2b, 0x26272627, 0x2e2f2e2f, 0x2a2b }; |
| u32 vd8[2][16]; |
| |
| for (channel = 0; channel < NUM_CHANNELS; channel++) { |
| for (j = 0; j < 4; j++) { |
| u32 a = (j == 1) ? 0x29292929 : 0x31313131; |
| u32 lmask = (j == 3) ? 0xffff : 0xffffffff; |
| u16 c; |
| if ((j == 0 || j == 3) && zero) |
| c = 0; |
| else if (j == 3) |
| c = 0x5f; |
| else |
| c = 0x5f5f; |
| |
| for (k = 0; k < 2; k++) { |
| mchbar_write32(0x138 + 8 * k, channel << 26 | j << 24); |
| gav(vd8[1][(channel << 3) | (j << 1) | k] = |
| mchbar_read32(0x138 + 8 * k)); |
| gav(vd8[0][(channel << 3) | (j << 1) | k] = |
| mchbar_read32(0x13c + 8 * k)); |
| } |
| |
| mchbar_write32(0x334 + (channel << 10) + j * 0x44, zero ? 0 : val3[j]); |
| mchbar_write32(0x32c + (channel << 10) + j * 0x44, |
| zero ? 0 : 0x18191819 & lmask); |
| mchbar_write16(0x34a + (channel << 10) + j * 0x44, c); |
| mchbar_write32(0x33c + (channel << 10) + j * 0x44, |
| zero ? 0 : a & lmask); |
| mchbar_write32(0x344 + (channel << 10) + j * 0x44, |
| zero ? 0 : a & lmask); |
| } |
| } |
| |
| mchbar_setbits32(0x130, 1 << 0); |
| while (mchbar_read8(0x130) & 1) |
| ; |
| } |
| |
| static void rmw_1d0(u16 addr, u32 and, u32 or, int split) |
| { |
| u32 v; |
| v = read_1d0(addr, split); |
| write_1d0((v & and) | or, addr, split, 1); |
| } |
| |
| static int find_highest_bit_set(u16 val) |
| { |
| int i; |
| for (i = 15; i >= 0; i--) |
| if (val & (1 << i)) |
| return i; |
| return -1; |
| } |
| |
| static int find_lowest_bit_set32(u32 val) |
| { |
| int i; |
| for (i = 0; i < 32; i++) |
| if (val & (1 << i)) |
| return i; |
| return -1; |
| } |
| |
| enum { |
| DEVICE_TYPE = 2, |
| MODULE_TYPE = 3, |
| DENSITY = 4, |
| RANKS_AND_DQ = 7, |
| MEMORY_BUS_WIDTH = 8, |
| TIMEBASE_DIVIDEND = 10, |
| TIMEBASE_DIVISOR = 11, |
| CYCLETIME = 12, |
| |
| CAS_LATENCIES_LSB = 14, |
| CAS_LATENCIES_MSB = 15, |
| CAS_LATENCY_TIME = 16, |
| THERMAL_AND_REFRESH = 31, |
| REFERENCE_RAW_CARD_USED = 62, |
| RANK1_ADDRESS_MAPPING = 63 |
| }; |
| |
| static void calculate_timings(struct raminfo *info) |
| { |
| unsigned int cycletime; |
| unsigned int cas_latency_time; |
| unsigned int supported_cas_latencies; |
| unsigned int channel, slot; |
| unsigned int clock_speed_index; |
| unsigned int min_cas_latency; |
| unsigned int cas_latency; |
| unsigned int max_clock_index; |
| |
| /* Find common CAS latency */ |
| supported_cas_latencies = 0x3fe; |
| for (channel = 0; channel < NUM_CHANNELS; channel++) |
| for (slot = 0; slot < NUM_SLOTS; slot++) |
| if (info->populated_ranks[channel][slot][0]) |
| supported_cas_latencies &= |
| 2 * |
| (info-> |
| spd[channel][slot][CAS_LATENCIES_LSB] | |
| (info-> |
| spd[channel][slot][CAS_LATENCIES_MSB] << |
| 8)); |
| |
| max_clock_index = MIN(3, info->max_supported_clock_speed_index); |
| |
| cycletime = min_cycletime[max_clock_index]; |
| cas_latency_time = min_cas_latency_time[max_clock_index]; |
| |
| for (channel = 0; channel < NUM_CHANNELS; channel++) |
| for (slot = 0; slot < NUM_SLOTS; slot++) |
| if (info->populated_ranks[channel][slot][0]) { |
| unsigned int timebase; |
| timebase = |
| 1000 * |
| info-> |
| spd[channel][slot][TIMEBASE_DIVIDEND] / |
| info->spd[channel][slot][TIMEBASE_DIVISOR]; |
| cycletime = |
| MAX(cycletime, |
| timebase * |
| info->spd[channel][slot][CYCLETIME]); |
| cas_latency_time = |
| MAX(cas_latency_time, |
| timebase * |
| info-> |
| spd[channel][slot][CAS_LATENCY_TIME]); |
| } |
| if (cycletime > min_cycletime[0]) |
| die("RAM init: Decoded SPD DRAM freq is slower than the controller minimum!"); |
| for (clock_speed_index = 0; clock_speed_index < 3; clock_speed_index++) { |
| if (cycletime == min_cycletime[clock_speed_index]) |
| break; |
| if (cycletime > min_cycletime[clock_speed_index]) { |
| clock_speed_index--; |
| cycletime = min_cycletime[clock_speed_index]; |
| break; |
| } |
| } |
| min_cas_latency = DIV_ROUND_UP(cas_latency_time, cycletime); |
| cas_latency = 0; |
| while (supported_cas_latencies) { |
| cas_latency = find_highest_bit_set(supported_cas_latencies) + 3; |
| if (cas_latency <= min_cas_latency) |
| break; |
| supported_cas_latencies &= |
| ~(1 << find_highest_bit_set(supported_cas_latencies)); |
| } |
| |
| if (cas_latency != min_cas_latency && clock_speed_index) |
| clock_speed_index--; |
| |
| if (cas_latency * min_cycletime[clock_speed_index] > 20000) |
| die("Couldn't configure DRAM"); |
| info->clock_speed_index = clock_speed_index; |
| info->cas_latency = cas_latency; |
| } |
| |
| static void program_base_timings(struct raminfo *info) |
| { |
| unsigned int channel; |
| unsigned int slot, rank, lane; |
| unsigned int extended_silicon_revision; |
| int i; |
| |
| extended_silicon_revision = info->silicon_revision; |
| if (info->silicon_revision == 0) |
| for (channel = 0; channel < NUM_CHANNELS; channel++) |
| for (slot = 0; slot < NUM_SLOTS; slot++) |
| if ((info-> |
| spd[channel][slot][MODULE_TYPE] & 0xF) == |
| 3) |
| extended_silicon_revision = 4; |
| |
| for (channel = 0; channel < NUM_CHANNELS; channel++) { |
| for (slot = 0; slot < NUM_SLOTS; slot++) |
| for (rank = 0; rank < NUM_SLOTS; rank++) { |
| int card_timing_2; |
| if (!info->populated_ranks[channel][slot][rank]) |
| continue; |
| |
| for (lane = 0; lane < 9; lane++) { |
| int tm_reg; |
| int card_timing; |
| |
| card_timing = 0; |
| if ((info-> |
| spd[channel][slot][MODULE_TYPE] & |
| 0xF) == 3) { |
| int reference_card; |
| reference_card = |
| info-> |
| spd[channel][slot] |
| [REFERENCE_RAW_CARD_USED] & |
| 0x1f; |
| if (reference_card == 3) |
| card_timing = |
| u16_ffd1188[0][lane] |
| [info-> |
| clock_speed_index]; |
| if (reference_card == 5) |
| card_timing = |
| u16_ffd1188[1][lane] |
| [info-> |
| clock_speed_index]; |
| } |
| |
| info->training. |
| lane_timings[0][channel][slot][rank] |
| [lane] = |
| u8_FFFD1218[info-> |
| clock_speed_index]; |
| info->training. |
| lane_timings[1][channel][slot][rank] |
| [lane] = 256; |
| |
| for (tm_reg = 2; tm_reg < 4; tm_reg++) |
| info->training. |
| lane_timings[tm_reg] |
| [channel][slot][rank][lane] |
| = |
| u8_FFFD1240[channel] |
| [extended_silicon_revision] |
| [lane][2 * slot + |
| rank][info-> |
| clock_speed_index] |
| + info->max4048[channel] |
| + |
| u8_FFFD0C78[channel] |
| [extended_silicon_revision] |
| [info-> |
| mode4030[channel]][slot] |
| [rank][info-> |
| clock_speed_index] |
| + card_timing; |
| for (tm_reg = 0; tm_reg < 4; tm_reg++) |
| write_500(info, channel, |
| info->training. |
| lane_timings[tm_reg] |
| [channel][slot][rank] |
| [lane], |
| get_timing_register_addr |
| (lane, tm_reg, slot, |
| rank), 9, 0); |
| } |
| |
| card_timing_2 = 0; |
| if (!(extended_silicon_revision != 4 |
| || (info-> |
| populated_ranks_mask[channel] & 5) == |
| 5)) { |
| if ((info-> |
| spd[channel][slot] |
| [REFERENCE_RAW_CARD_USED] & 0x1F) |
| == 3) |
| card_timing_2 = |
| u16_FFFE0EB8[0][info-> |
| clock_speed_index]; |
| if ((info-> |
| spd[channel][slot] |
| [REFERENCE_RAW_CARD_USED] & 0x1F) |
| == 5) |
| card_timing_2 = |
| u16_FFFE0EB8[1][info-> |
| clock_speed_index]; |
| } |
| |
| for (i = 0; i < 3; i++) |
| write_500(info, channel, |
| (card_timing_2 + |
| info->max4048[channel] |
| + |
| u8_FFFD0EF8[channel] |
| [extended_silicon_revision] |
| [info-> |
| mode4030[channel]][info-> |
| clock_speed_index]), |
| u16_fffd0c50[i][slot][rank], |
| 8, 1); |
| write_500(info, channel, |
| (info->max4048[channel] + |
| u8_FFFD0C78[channel] |
| [extended_silicon_revision][info-> |
| mode4030 |
| [channel]] |
| [slot][rank][info-> |
| clock_speed_index]), |
| u16_fffd0c70[slot][rank], 7, 1); |
| } |
| if (!info->populated_ranks_mask[channel]) |
| continue; |
| for (i = 0; i < 3; i++) |
| write_500(info, channel, |
| (info->max4048[channel] + |
| info->avg4044[channel] |
| + |
| u8_FFFD17E0[channel] |
| [extended_silicon_revision][info-> |
| mode4030 |
| [channel]][info-> |
| clock_speed_index]), |
| u16_fffd0c68[i], 8, 1); |
| } |
| } |
| |
| /* The time of clock cycle in ps. */ |
| static unsigned int cycle_ps(struct raminfo *info) |
| { |
| return 2 * halfcycle_ps(info); |
| } |
| |
| /* Frequency in 0.1 MHz units. */ |
| static unsigned int frequency_01(struct raminfo *info) |
| { |
| return 100 * frequency_11(info) / 9; |
| } |
| |
| static unsigned int ps_to_halfcycles(struct raminfo *info, unsigned int ps) |
| { |
| return (frequency_11(info) * 2) * ps / 900000; |
| } |
| |
| static unsigned int ns_to_cycles(struct raminfo *info, unsigned int ns) |
| { |
| return (frequency_11(info)) * ns / 900; |
| } |
| |
| static void compute_derived_timings(struct raminfo *info) |
| { |
| unsigned int channel, slot, rank; |
| int extended_silicon_revision; |
| int some_delay_1_ps; |
| int some_delay_2_ps; |
| int some_delay_2_halfcycles_ceil; |
| int some_delay_2_halfcycles_floor; |
| int some_delay_3_ps; |
| int some_delay_3_ps_rounded; |
| int some_delay_1_cycle_ceil; |
| int some_delay_1_cycle_floor; |
| |
| some_delay_3_ps_rounded = 0; |
| extended_silicon_revision = info->silicon_revision; |
| if (!info->silicon_revision) |
| for (channel = 0; channel < NUM_CHANNELS; channel++) |
| for (slot = 0; slot < NUM_SLOTS; slot++) |
| if ((info-> |
| spd[channel][slot][MODULE_TYPE] & 0xF) == |
| 3) |
| extended_silicon_revision = 4; |
| if (info->board_lane_delay[7] < 5) |
| info->board_lane_delay[7] = 5; |
| info->revision_flag_1 = 2; |
| if (info->silicon_revision == 2 || info->silicon_revision == 3) |
| info->revision_flag_1 = 0; |
| if (info->revision < 16) |
| info->revision_flag_1 = 0; |
| |
| if (info->revision < 8) |
| info->revision_flag_1 = 0; |
| if (info->revision >= 8 && (info->silicon_revision == 0 |
| || info->silicon_revision == 1)) |
| some_delay_2_ps = 735; |
| else |
| some_delay_2_ps = 750; |
| |
| if (info->revision >= 0x10 && (info->silicon_revision == 0 |
| || info->silicon_revision == 1)) |
| some_delay_1_ps = 3929; |
| else |
| some_delay_1_ps = 3490; |
| |
| some_delay_1_cycle_floor = some_delay_1_ps / cycle_ps(info); |
| some_delay_1_cycle_ceil = some_delay_1_ps / cycle_ps(info); |
| if (some_delay_1_ps % cycle_ps(info)) |
| some_delay_1_cycle_ceil++; |
| else |
| some_delay_1_cycle_floor--; |
| info->some_delay_1_cycle_floor = some_delay_1_cycle_floor; |
| if (info->revision_flag_1) |
| some_delay_2_ps = halfcycle_ps(info) >> 6; |
| some_delay_2_ps += |
| MAX(some_delay_1_ps - 30, |
| 2 * halfcycle_ps(info) * (some_delay_1_cycle_ceil - 1) + 1000) + |
| 375; |
| some_delay_3_ps = |
| halfcycle_ps(info) - some_delay_2_ps % halfcycle_ps(info); |
| if (info->revision_flag_1) { |
| if (some_delay_3_ps >= 150) { |
| const int some_delay_3_halfcycles = |
| (some_delay_3_ps << 6) / halfcycle_ps(info); |
| some_delay_3_ps_rounded = |
| halfcycle_ps(info) * some_delay_3_halfcycles >> 6; |
| } |
| } |
| some_delay_2_halfcycles_ceil = |
| (some_delay_2_ps + halfcycle_ps(info) - 1) / halfcycle_ps(info) - |
| 2 * (some_delay_1_cycle_ceil - 1); |
| if (info->revision_flag_1 && some_delay_3_ps < 150) |
| some_delay_2_halfcycles_ceil++; |
| some_delay_2_halfcycles_floor = some_delay_2_halfcycles_ceil; |
| if (info->revision < 0x10) |
| some_delay_2_halfcycles_floor = |
| some_delay_2_halfcycles_ceil - 1; |
| if (!info->revision_flag_1) |
| some_delay_2_halfcycles_floor++; |
| /* FIXME: this variable is unused. Should it be used? */ |
| (void)some_delay_2_halfcycles_floor; |
| info->some_delay_2_halfcycles_ceil = some_delay_2_halfcycles_ceil; |
| info->some_delay_3_ps_rounded = some_delay_3_ps_rounded; |
| if ((info->populated_ranks[0][0][0] && info->populated_ranks[0][1][0]) |
| || (info->populated_ranks[1][0][0] |
| && info->populated_ranks[1][1][0])) |
| info->max_slots_used_in_channel = 2; |
| else |
| info->max_slots_used_in_channel = 1; |
| for (channel = 0; channel < NUM_CHANNELS; channel++) |
| mchbar_write32(0x244 + (channel << 10), |
| ((info->revision < 8) ? 1 : 0x200) | |
| ((2 - info->max_slots_used_in_channel) << 17) | |
| (channel << 21) | |
| (info->some_delay_1_cycle_floor << 18) | 0x9510); |
| if (info->max_slots_used_in_channel == 1) { |
| info->mode4030[0] = (count_ranks_in_channel(info, 0) == 2); |
| info->mode4030[1] = (count_ranks_in_channel(info, 1) == 2); |
| } else { |
| info->mode4030[0] = ((count_ranks_in_channel(info, 0) == 1) || (count_ranks_in_channel(info, 0) == 2)) ? 2 : 3; /* 2 if 1 or 2 ranks */ |
| info->mode4030[1] = ((count_ranks_in_channel(info, 1) == 1) |
| || (count_ranks_in_channel(info, 1) == |
| 2)) ? 2 : 3; |
| } |
| for (channel = 0; channel < NUM_CHANNELS; channel++) { |
| int max_of_unk; |
| int min_of_unk_2; |
| |
| int i, count; |
| int sum; |
| |
| if (!info->populated_ranks_mask[channel]) |
| continue; |
| |
| max_of_unk = 0; |
| min_of_unk_2 = 32767; |
| |
| sum = 0; |
| count = 0; |
| for (i = 0; i < 3; i++) { |
| int unk1; |
| if (info->revision < 8) |
| unk1 = |
| u8_FFFD1891[0][channel][info-> |
| clock_speed_index] |
| [i]; |
| else if (! |
| (info->revision >= 0x10 |
| || info->revision_flag_1)) |
| unk1 = |
| u8_FFFD1891[1][channel][info-> |
| clock_speed_index] |
| [i]; |
| else |
| unk1 = 0; |
| for (slot = 0; slot < NUM_SLOTS; slot++) |
| for (rank = 0; rank < NUM_RANKS; rank++) { |
| int a = 0; |
| int b = 0; |
| |
| if (!info-> |
| populated_ranks[channel][slot] |
| [rank]) |
| continue; |
| if (extended_silicon_revision == 4 |
| && (info-> |
| populated_ranks_mask[channel] & |
| 5) != 5) { |
| if ((info-> |
| spd[channel][slot] |
| [REFERENCE_RAW_CARD_USED] & |
| 0x1F) == 3) { |
| a = u16_ffd1178[0] |
| [info-> |
| clock_speed_index]; |
| b = u16_fe0eb8[0][info-> |
| clock_speed_index]; |
| } else |
| if ((info-> |
| spd[channel][slot] |
| [REFERENCE_RAW_CARD_USED] |
| & 0x1F) == 5) { |
| a = u16_ffd1178[1] |
| [info-> |
| clock_speed_index]; |
| b = u16_fe0eb8[1][info-> |
| clock_speed_index]; |
| } |
| } |
| min_of_unk_2 = MIN(min_of_unk_2, a); |
| min_of_unk_2 = MIN(min_of_unk_2, b); |
| if (rank == 0) { |
| sum += a; |
| count++; |
| } |
| { |
| int t; |
| t = b + |
| u8_FFFD0EF8[channel] |
| [extended_silicon_revision] |
| [info-> |
| mode4030[channel]][info-> |
| clock_speed_index]; |
| if (unk1 >= t) |
| max_of_unk = |
| MAX(max_of_unk, |
| unk1 - t); |
| } |
| } |
| { |
| int t = |
| u8_FFFD17E0[channel] |
| [extended_silicon_revision][info-> |
| mode4030 |
| [channel]] |
| [info->clock_speed_index] + min_of_unk_2; |
| if (unk1 >= t) |
| max_of_unk = MAX(max_of_unk, unk1 - t); |
| } |
| } |
| |
| if (count == 0) |
| die("No memory ranks found for channel %u\n", channel); |
| |
| info->avg4044[channel] = sum / count; |
| info->max4048[channel] = max_of_unk; |
| } |
| } |
| |
| static void jedec_read(struct raminfo *info, |
| int channel, int slot, int rank, |
| int total_rank, u8 addr3, unsigned int value) |
| { |
| /* Handle mirrored mapping. */ |
| if ((rank & 1) && (info->spd[channel][slot][RANK1_ADDRESS_MAPPING] & 1)) { |
| addr3 = (addr3 & 0xCF) | ((addr3 & 0x10) << 1) | ((addr3 >> 1) & 0x10); |
| value = (value & ~0x1f8) | ((value >> 1) & 0xa8) | ((value & 0xa8) << 1); |
| } |
| |
| mchbar_clrsetbits8(0x271, 0x1f << 1, addr3); |
| mchbar_clrsetbits8(0x671, 0x1f << 1, addr3); |
| |
| read32p((value << 3) | (total_rank << 28)); |
| |
| mchbar_clrsetbits8(0x271, 0x1f << 1, 1 << 1); |
| mchbar_clrsetbits8(0x671, 0x1f << 1, 1 << 1); |
| |
| read32p(total_rank << 28); |
| } |
| |
| enum { |
| MR1_RZQ12 = 512, |
| MR1_RZQ2 = 64, |
| MR1_RZQ4 = 4, |
| MR1_ODS34OHM = 2 |
| }; |
| |
| enum { |
| MR0_BT_INTERLEAVED = 8, |
| MR0_DLL_RESET_ON = 256 |
| }; |
| |
| enum { |
| MR2_RTT_WR_DISABLED = 0, |
| MR2_RZQ2 = 1 << 10 |
| }; |
| |
| static void jedec_init(struct raminfo *info) |
| { |
| int write_recovery; |
| int channel, slot, rank; |
| int total_rank; |
| int dll_on; |
| int self_refresh_temperature; |
| int auto_self_refresh; |
| |
| auto_self_refresh = 1; |
| self_refresh_temperature = 1; |
| if (info->board_lane_delay[3] <= 10) { |
| if (info->board_lane_delay[3] <= 8) |
| write_recovery = info->board_lane_delay[3] - 4; |
| else |
| write_recovery = 5; |
| } else { |
| write_recovery = 6; |
| } |
| FOR_POPULATED_RANKS { |
| auto_self_refresh &= |
| (info->spd[channel][slot][THERMAL_AND_REFRESH] >> 2) & 1; |
| self_refresh_temperature &= |
| info->spd[channel][slot][THERMAL_AND_REFRESH] & 1; |
| } |
| if (auto_self_refresh == 1) |
| self_refresh_temperature = 0; |
| |
| dll_on = ((info->silicon_revision != 2 && info->silicon_revision != 3) |
| || (info->populated_ranks[0][0][0] |
| && info->populated_ranks[0][1][0]) |
| || (info->populated_ranks[1][0][0] |
| && info->populated_ranks[1][1][0])); |
| |
| total_rank = 0; |
| |
| for (channel = NUM_CHANNELS - 1; channel >= 0; channel--) { |
| int rtt, rtt_wr = MR2_RTT_WR_DISABLED; |
| int rzq_reg58e; |
| |
| if (info->silicon_revision == 2 || info->silicon_revision == 3) { |
| rzq_reg58e = 64; |
| rtt = MR1_RZQ2; |
| if (info->clock_speed_index != 0) { |
| rzq_reg58e = 4; |
| if (info->populated_ranks_mask[channel] == 3) |
| rtt = MR1_RZQ4; |
| } |
| } else { |
| if ((info->populated_ranks_mask[channel] & 5) == 5) { |
| rtt = MR1_RZQ12; |
| rzq_reg58e = 64; |
| rtt_wr = MR2_RZQ2; |
| } else { |
| rzq_reg58e = 4; |
| rtt = MR1_RZQ4; |
| } |
| } |
| |
| mchbar_write16(0x588 + (channel << 10), 0); |
| mchbar_write16(0x58a + (channel << 10), 4); |
| mchbar_write16(0x58c + (channel << 10), rtt | MR1_ODS34OHM); |
| mchbar_write16(0x58e + (channel << 10), rzq_reg58e | 0x82); |
| mchbar_write16(0x590 + (channel << 10), 0x1282); |
| |
| for (slot = 0; slot < NUM_SLOTS; slot++) |
| for (rank = 0; rank < NUM_RANKS; rank++) |
| if (info->populated_ranks[channel][slot][rank]) { |
| jedec_read(info, channel, slot, rank, |
| total_rank, 0x28, |
| rtt_wr | (info-> |
| clock_speed_index |
| << 3) |
| | (auto_self_refresh << 6) | |
| (self_refresh_temperature << |
| 7)); |
| jedec_read(info, channel, slot, rank, |
| total_rank, 0x38, 0); |
| jedec_read(info, channel, slot, rank, |
| total_rank, 0x18, |
| rtt | MR1_ODS34OHM); |
| jedec_read(info, channel, slot, rank, |
| total_rank, 6, |
| (dll_on << 12) | |
| (write_recovery << 9) |
| | ((info->cas_latency - 4) << |
| 4) | MR0_BT_INTERLEAVED | |
| MR0_DLL_RESET_ON); |
| total_rank++; |
| } |
| } |
| } |
| |
| static void program_modules_memory_map(struct raminfo *info, int pre_jedec) |
| { |
| unsigned int channel, slot, rank; |
| unsigned int total_mb[2] = { 0, 0 }; /* total memory per channel in MB */ |
| unsigned int channel_0_non_interleaved; |
| |
| FOR_ALL_RANKS { |
| if (info->populated_ranks[channel][slot][rank]) { |
| total_mb[channel] += |
| pre_jedec ? 256 : (256 << info-> |
| density[channel][slot] >> info-> |
| is_x16_module[channel][slot]); |
| mchbar_write8(0x208 + rank + 2 * slot + (channel << 10), |
| (pre_jedec ? (1 | ((1 + 1) << 1)) : |
| (info->is_x16_module[channel][slot] | |
| ((info->density[channel][slot] + 1) << 1))) | |
| 0x80); |
| } |
| mchbar_write16(0x200 + (channel << 10) + 4 * slot + 2 * rank, |
| total_mb[channel] >> 6); |
| } |
| |
| info->total_memory_mb = total_mb[0] + total_mb[1]; |
| |
| info->interleaved_part_mb = |
| pre_jedec ? 0 : 2 * MIN(total_mb[0], total_mb[1]); |
| info->non_interleaved_part_mb = |
| total_mb[0] + total_mb[1] - info->interleaved_part_mb; |
| channel_0_non_interleaved = total_mb[0] - info->interleaved_part_mb / 2; |
| mchbar_write32(0x100, channel_0_non_interleaved | info->non_interleaved_part_mb << 16); |
| if (!pre_jedec) |
| mchbar_write16(0x104, info->interleaved_part_mb); |
| } |
| |
| static void program_board_delay(struct raminfo *info) |
| { |
| int cas_latency_shift; |
| int some_delay_ns; |
| int some_delay_3_half_cycles; |
| |
| unsigned int channel, i; |
| int high_multiplier; |
| int lane_3_delay; |
| int cas_latency_derived; |
| |
| high_multiplier = 0; |
| some_delay_ns = 200; |
| some_delay_3_half_cycles = 4; |
| cas_latency_shift = info->silicon_revision == 0 |
| || info->silicon_revision == 1 ? 1 : 0; |
| if (info->revision < 8) { |
| some_delay_ns = 600; |
| cas_latency_shift = 0; |
| } |
| { |
| int speed_bit; |
| speed_bit = |
| ((info->clock_speed_index > 1 |
| || (info->silicon_revision != 2 |
| && info->silicon_revision != 3))) ^ (info->revision >= |
| 0x10); |
| write_500(info, 0, speed_bit | ((!info->use_ecc) << 1), 0x60e, |
| 3, 1); |
| write_500(info, 1, speed_bit | ((!info->use_ecc) << 1), 0x60e, |
| 3, 1); |
| if (info->revision >= 0x10 && info->clock_speed_index <= 1 |
| && (info->silicon_revision == 2 |
| || info->silicon_revision == 3)) |
| rmw_1d0(0x116, 5, 2, 4); |
| } |
| mchbar_write32(0x120, 1 << (info->max_slots_used_in_channel + 28) | 0x188e7f9f); |
| |
| mchbar_write8(0x124, info->board_lane_delay[4] + (frequency_01(info) + 999) / 1000); |
| mchbar_write16(0x125, 0x1360); |
| mchbar_write8(0x127, 0x40); |
| if (info->fsb_frequency < frequency_11(info) / 2) { |
| unsigned int some_delay_2_half_cycles; |
| high_multiplier = 1; |
| some_delay_2_half_cycles = ps_to_halfcycles(info, |
| ((3 * |
| fsbcycle_ps(info)) |
| >> 1) + |
| (halfcycle_ps(info) |
| * |
| reg178_min[info-> |
| clock_speed_index] |
| >> 6) |
| + |
| 4 * |
| halfcycle_ps(info) |
| + 2230); |
| some_delay_3_half_cycles = |
| MIN((some_delay_2_half_cycles + |
| (frequency_11(info) * 2) * (28 - |
| some_delay_2_half_cycles) / |
| (frequency_11(info) * 2 - |
| 4 * (info->fsb_frequency))) >> 3, 7); |
| } |
| if (mchbar_read8(0x2ca9) & 1) |
| some_delay_3_half_cycles = 3; |
| for (channel = 0; channel < NUM_CHANNELS; channel++) { |
| mchbar_setbits32(0x220 + (channel << 10), 0x18001117); |
| mchbar_write32(0x224 + (channel << 10), |
| (info->max_slots_used_in_channel - 1) | |
| (info->cas_latency - 5 - info->clock_speed_index) |
| << 21 | (info->max_slots_used_in_channel + |
| info->cas_latency - cas_latency_shift - 4) << 16 | |
| (info->cas_latency - cas_latency_shift - 4) << 26 | |
| (info->cas_latency - info->clock_speed_index + |
| info->max_slots_used_in_channel - 6) << 8); |
| mchbar_write32(0x228 + (channel << 10), info->max_slots_used_in_channel); |
| mchbar_write8(0x239 + (channel << 10), 32); |
| mchbar_write32(0x248 + (channel << 10), high_multiplier << 24 | |
| some_delay_3_half_cycles << 25 | 0x840000); |
| mchbar_write32(0x278 + (channel << 10), 0xc362042); |
| mchbar_write32(0x27c + (channel << 10), 0x8b000062); |
| mchbar_write32(0x24c + (channel << 10), |
| (!!info->clock_speed_index) << 17 | |
| ((2 + info->clock_speed_index - |
| (!!info->clock_speed_index))) << 12 | 0x10200); |
| |
| mchbar_write8(0x267 + (channel << 10), 4); |
| mchbar_write16(0x272 + (channel << 10), 0x155); |
| mchbar_clrsetbits32(0x2bc + (channel << 10), 0xffffff, 0x707070); |
| |
| write_500(info, channel, |
| ((!info->populated_ranks[channel][1][1]) |
| | (!info->populated_ranks[channel][1][0] << 1) |
| | (!info->populated_ranks[channel][0][1] << 2) |
| | (!info->populated_ranks[channel][0][0] << 3)), |
| 0x4c9, 4, 1); |
| } |
| |
| mchbar_write8(0x2c4, (1 + (info->clock_speed_index != 0)) << 6 | 0xc); |
| { |
| u8 freq_divisor = 2; |
| if (info->fsb_frequency == frequency_11(info)) |
| freq_divisor = 3; |
| else if (2 * info->fsb_frequency < 3 * (frequency_11(info) / 2)) |
| freq_divisor = 1; |
| else |
| freq_divisor = 2; |
| mchbar_write32(0x2c0, freq_divisor << 11 | 0x6009c400); |
| } |
| |
| if (info->board_lane_delay[3] <= 10) { |
| if (info->board_lane_delay[3] <= 8) |
| lane_3_delay = info->board_lane_delay[3]; |
| else |
| lane_3_delay = 10; |
| } else { |
| lane_3_delay = 12; |
| } |
| cas_latency_derived = info->cas_latency - info->clock_speed_index + 2; |
| if (info->clock_speed_index > 1) |
| cas_latency_derived++; |
| for (channel = 0; channel < NUM_CHANNELS; channel++) { |
| mchbar_write32(0x240 + (channel << 10), |
| ((info->clock_speed_index == 0) * 0x11000) | |
| 0x1002100 | (2 + info->clock_speed_index) << 4 | |
| (info->cas_latency - 3)); |
| write_500(info, channel, (info->clock_speed_index << 1) | 1, |
| 0x609, 6, 1); |
| write_500(info, channel, |
| info->clock_speed_index + 2 * info->cas_latency - 7, |
| 0x601, 6, 1); |
| |
| mchbar_write32(0x250 + (channel << 10), |
| (lane_3_delay + info->clock_speed_index + 9) << 6 | |
| info->board_lane_delay[7] << 2 | |
| info->board_lane_delay[4] << 16 | |
| info->board_lane_delay[1] << 25 | |
| info->board_lane_delay[1] << 29 | 1); |
| mchbar_write32(0x254 + (channel << 10), |
| info->board_lane_delay[1] >> 3 | |
| (info->board_lane_delay[8] + 4 * info->use_ecc) << 6 | |
| 0x80 | info->board_lane_delay[6] << 1 | |
| info->board_lane_delay[2] << 28 | |
| cas_latency_derived << 16 | 0x4700000); |
| mchbar_write32(0x258 + (channel << 10), |
| (info->board_lane_delay[5] + info->clock_speed_index + 9) << 12 | |
| (info->clock_speed_index - info->cas_latency + 12) << 8 | |
| info->board_lane_delay[2] << 17 | |
| info->board_lane_delay[4] << 24 | 0x47); |
| mchbar_write32(0x25c + (channel << 10), |
| info->board_lane_delay[1] << 1 | |
| info->board_lane_delay[0] << 8 | 0x1da50000); |
| mchbar_write8(0x264 + (channel << 10), 0xff); |
| mchbar_write8(0x5f8 + (channel << 10), cas_latency_shift << 3 | info->use_ecc); |
| } |
| |
| program_modules_memory_map(info, 1); |
| |
| mchbar_clrsetbits16(0x610, 0xfe3c, |
| MIN(ns_to_cycles(info, some_delay_ns) / 2, 127) << 9 | 0x3c); |
| mchbar_setbits16(0x612, 1 << 8); |
| mchbar_setbits16(0x214, 0x3e00); |
| for (i = 0; i < 8; i++) { |
| pci_write_config32(QPI_SAD, SAD_DRAM_RULE(i), |
| (info->total_memory_mb - 64) | !i | 2); |
| pci_write_config32(QPI_SAD, SAD_INTERLEAVE_LIST(i), 0); |
| } |
| } |
| |
| #define DEFAULT_PCI_MMIO_SIZE 2048 |
| |
| static void program_total_memory_map(struct raminfo *info) |
| { |
| unsigned int tom, tolud, touud; |
| unsigned int quickpath_reserved; |
| unsigned int remap_base; |
| unsigned int uma_base_igd; |
| unsigned int uma_base_gtt; |
| unsigned int mmio_size; |
| int memory_remap; |
| unsigned int memory_map[8]; |
| int i; |
| unsigned int current_limit; |
| unsigned int tseg_base; |
| int uma_size_igd = 0, uma_size_gtt = 0; |
| |
| memset(memory_map, 0, sizeof(memory_map)); |
| |
| if (info->uma_enabled) { |
| u16 t = pci_read_config16(NORTHBRIDGE, GGC); |
| gav(t); |
| const int uma_sizes_gtt[16] = |
| { 0, 1, 0, 2, 0, 0, 0, 0, 0, 2, 3, 4, 42, 42, 42, 42 }; |
| /* Igd memory */ |
| const int uma_sizes_igd[16] = { |
| 0, 0, 0, 0, 0, 32, 48, 64, 128, 256, 96, 160, 224, 352, |
| 256, 512 |
| }; |
| |
| uma_size_igd = uma_sizes_igd[(t >> 4) & 0xF]; |
| uma_size_gtt = uma_sizes_gtt[(t >> 8) & 0xF]; |
| } |
| |
| mmio_size = DEFAULT_PCI_MMIO_SIZE; |
| |
| tom = info->total_memory_mb; |
| if (tom == 4096) |
| tom = 4032; |
| touud = ALIGN_DOWN(tom - info->memory_reserved_for_heci_mb, 64); |
| tolud = ALIGN_DOWN(MIN(4096 - mmio_size + ALIGN_UP(uma_size_igd + uma_size_gtt, 64) |
| , touud), 64); |
| memory_remap = 0; |
| if (touud - tolud > 64) { |
| memory_remap = 1; |
| remap_base = MAX(4096, touud); |
| touud = touud - tolud + 4096; |
| } |
| if (touud > 4096) |
| memory_map[2] = touud | 1; |
| quickpath_reserved = 0; |
| |
| u32 t = pci_read_config32(QPI_SAD, 0x68); |
| |
| gav(t); |
| |
| if (t & 0x800) { |
| u32 shift = t >> 20; |
| if (shift == 0) |
| die("Quickpath value is 0\n"); |
| quickpath_reserved = (u32)1 << find_lowest_bit_set32(shift); |
| } |
| |
| if (memory_remap) |
| touud -= quickpath_reserved; |
| |
| uma_base_igd = tolud - uma_size_igd; |
| uma_base_gtt = uma_base_igd - uma_size_gtt; |
| tseg_base = ALIGN_DOWN(uma_base_gtt, 64) - (CONFIG_SMM_TSEG_SIZE >> 20); |
| if (!memory_remap) |
| tseg_base -= quickpath_reserved; |
| tseg_base = ALIGN_DOWN(tseg_base, 8); |
| |
| pci_write_config16(NORTHBRIDGE, TOLUD, tolud << 4); |
| pci_write_config16(NORTHBRIDGE, TOM, tom >> 6); |
| if (memory_remap) { |
| pci_write_config16(NORTHBRIDGE, REMAPBASE, remap_base >> 6); |
| pci_write_config16(NORTHBRIDGE, REMAPLIMIT, (touud - 64) >> 6); |
| } |
| pci_write_config16(NORTHBRIDGE, TOUUD, touud); |
| |
| if (info->uma_enabled) { |
| pci_write_config32(NORTHBRIDGE, IGD_BASE, uma_base_igd << 20); |
| pci_write_config32(NORTHBRIDGE, GTT_BASE, uma_base_gtt << 20); |
| } |
| pci_write_config32(NORTHBRIDGE, TSEG, tseg_base << 20); |
| |
| current_limit = 0; |
| memory_map[0] = ALIGN_DOWN(uma_base_gtt, 64) | 1; |
| memory_map[1] = 4096; |
| for (i = 0; i < ARRAY_SIZE(memory_map); i++) { |
| current_limit = MAX(current_limit, memory_map[i] & ~1); |
| pci_write_config32(QPI_SAD, SAD_DRAM_RULE(i), |
| (memory_map[i] & 1) | ALIGN_DOWN(current_limit - |
| 1, 64) | 2); |
| pci_write_config32(QPI_SAD, SAD_INTERLEAVE_LIST(i), 0); |
| } |
| } |
| |
| static void collect_system_info(struct raminfo *info) |
| { |
| u32 capid0[3]; |
| int i; |
| unsigned int channel; |
| |
| for (i = 0; i < 3; i++) { |
| capid0[i] = pci_read_config32(NORTHBRIDGE, CAPID0 | (i << 2)); |
| printk(BIOS_DEBUG, "CAPID0[%d] = 0x%08x\n", i, capid0[i]); |
| } |
| info->revision = pci_read_config8(NORTHBRIDGE, PCI_REVISION_ID); |
| printk(BIOS_DEBUG, "Revision ID: 0x%x\n", info->revision); |
| printk(BIOS_DEBUG, "Device ID: 0x%x\n", pci_read_config16(NORTHBRIDGE, PCI_DEVICE_ID)); |
| |
| info->max_supported_clock_speed_index = (~capid0[1] & 7); |
| |
| if ((capid0[1] >> 11) & 1) |
| info->uma_enabled = 0; |
| else |
| gav(info->uma_enabled = |
| pci_read_config8(NORTHBRIDGE, DEVEN) & 8); |
| /* Unrecognised: [0000:fffd3d2d] 37f81.37f82 ! CPUID: eax: 00000001; ecx: 00000e00 => 00020655.00010800.029ae3ff.bfebfbff */ |
| info->silicon_revision = 0; |
| |
| if (capid0[2] & 2) { |
| info->silicon_revision = 0; |
| info->max_supported_clock_speed_index = 2; |
| for (channel = 0; channel < NUM_CHANNELS; channel++) |
| if (info->populated_ranks[channel][0][0] |
| && (info->spd[channel][0][MODULE_TYPE] & 0xf) == |
| 3) { |
| info->silicon_revision = 2; |
| info->max_supported_clock_speed_index = 1; |
| } |
| } else { |
| switch (((capid0[2] >> 18) & 1) + 2 * ((capid0[1] >> 3) & 1)) { |
| case 1: |
| case 2: |
| info->silicon_revision = 3; |
| break; |
| case 3: |
| info->silicon_revision = 0; |
| break; |
| case 0: |
| info->silicon_revision = 2; |
| break; |
| } |
| switch (pci_read_config16(NORTHBRIDGE, PCI_DEVICE_ID)) { |
| case 0x40: |
| info->silicon_revision = 0; |
| break; |
| case 0x48: |
| info->silicon_revision = 1; |
| break; |
| } |
| } |
| } |
| |
| static void write_training_data(struct raminfo *info) |
| { |
| int tm, channel, slot, rank, lane; |
| if (info->revision < 8) |
| return; |
| |
| for (tm = 0; tm < 4; tm++) |
| for (channel = 0; channel < NUM_CHANNELS; channel++) |
| for (slot = 0; slot < NUM_SLOTS; slot++) |
| for (rank = 0; rank < NUM_RANKS; rank++) |
| for (lane = 0; lane < 9; lane++) |
| write_500(info, channel, |
| info-> |
| cached_training-> |
| lane_timings[tm] |
| [channel][slot][rank] |
| [lane], |
| get_timing_register_addr |
| (lane, tm, slot, |
| rank), 9, 0); |
| write_1d0(info->cached_training->reg_178, 0x178, 7, 1); |
| write_1d0(info->cached_training->reg_10b, 0x10b, 6, 1); |
| } |
| |
| static void dump_timings(struct raminfo *info) |
| { |
| int channel, slot, rank, lane, i; |
| printk(RAM_SPEW, "Timings:\n"); |
| FOR_POPULATED_RANKS { |
| printk(RAM_SPEW, "channel %d, slot %d, rank %d\n", channel, |
| slot, rank); |
| for (lane = 0; lane < 9; lane++) { |
| printk(RAM_SPEW, "lane %d: ", lane); |
| for (i = 0; i < 4; i++) { |
| printk(RAM_SPEW, "%x (%x) ", |
| read_500(info, channel, |
| get_timing_register_addr |
| (lane, i, slot, rank), |
| 9), |
| info->training. |
| lane_timings[i][channel][slot][rank] |
| [lane]); |
| } |
| printk(RAM_SPEW, "\n"); |
| } |
| } |
| printk(RAM_SPEW, "[178] = %x (%x)\n", read_1d0(0x178, 7), |
| info->training.reg_178); |
| printk(RAM_SPEW, "[10b] = %x (%x)\n", read_1d0(0x10b, 6), |
| info->training.reg_10b); |
| } |
| |
| /* Read timings and other registers that need to be restored verbatim and |
| put them to CBMEM. |
| */ |
| static void save_timings(struct raminfo *info) |
| { |
| struct ram_training train; |
| int channel, slot, rank, lane, i; |
| |
| train = info->training; |
| FOR_POPULATED_RANKS for (lane = 0; lane < 9; lane++) |
| for (i = 0; i < 4; i++) |
| train.lane_timings[i][channel][slot][rank][lane] = |
| read_500(info, channel, |
| get_timing_register_addr(lane, i, slot, |
| rank), 9); |
| train.reg_178 = read_1d0(0x178, 7); |
| train.reg_10b = read_1d0(0x10b, 6); |
| |
| for (channel = 0; channel < NUM_CHANNELS; channel++) { |
| u32 reg32; |
| reg32 = mchbar_read32((channel << 10) + 0x274); |
| train.reg274265[channel][0] = reg32 >> 16; |
| train.reg274265[channel][1] = reg32 & 0xffff; |
| train.reg274265[channel][2] = mchbar_read16((channel << 10) + 0x265) >> 8; |
| } |
| train.reg2ca9_bit0 = mchbar_read8(0x2ca9) & 1; |
| train.reg_6dc = mchbar_read32(0x6dc); |
| train.reg_6e8 = mchbar_read32(0x6e8); |
| |
| printk(RAM_SPEW, "[6dc] = %x\n", train.reg_6dc); |
| printk(RAM_SPEW, "[6e8] = %x\n", train.reg_6e8); |
| |
| /* Save the MRC S3 restore data to cbmem */ |
| mrc_cache_stash_data(MRC_TRAINING_DATA, MRC_CACHE_VERSION, |
| &train, sizeof(train)); |
| } |
| |
| static const struct ram_training *get_cached_training(void) |
| { |
| return mrc_cache_current_mmap_leak(MRC_TRAINING_DATA, |
| MRC_CACHE_VERSION, |
| NULL); |
| } |
| |
| static int have_match_ranks(struct raminfo *info, int channel, int ranks) |
| { |
| int ranks_in_channel; |
| ranks_in_channel = info->populated_ranks[channel][0][0] |
| + info->populated_ranks[channel][0][1] |
| + info->populated_ranks[channel][1][0] |
| + info->populated_ranks[channel][1][1]; |
| |
| /* empty channel */ |
| if (ranks_in_channel == 0) |
| return 1; |
| |
| if (ranks_in_channel != ranks) |
| return 0; |
| /* single slot */ |
| if (info->populated_ranks[channel][0][0] != |
| info->populated_ranks[channel][1][0]) |
| return 1; |
| if (info->populated_ranks[channel][0][1] != |
| info->populated_ranks[channel][1][1]) |
| return 1; |
| if (info->is_x16_module[channel][0] != info->is_x16_module[channel][1]) |
| return 0; |
| if (info->density[channel][0] != info->density[channel][1]) |
| return 0; |
| return 1; |
| } |
| |
| static void read_4090(struct raminfo *info) |
| { |
| int i, channel, slot, rank, lane; |
| for (i = 0; i < 2; i++) |
| for (slot = 0; slot < NUM_SLOTS; slot++) |
| for (rank = 0; rank < NUM_RANKS; rank++) |
| for (lane = 0; lane < 9; lane++) |
| info->training. |
| lane_timings[0][i][slot][rank][lane] |
| = 32; |
| |
| for (i = 1; i < 4; i++) |
| for (channel = 0; channel < NUM_CHANNELS; channel++) |
| for (slot = 0; slot < NUM_SLOTS; slot++) |
| for (rank = 0; rank < NUM_RANKS; rank++) |
| for (lane = 0; lane < 9; lane++) { |
| info->training. |
| lane_timings[i][channel] |
| [slot][rank][lane] = |
| read_500(info, channel, |
| get_timing_register_addr |
| (lane, i, slot, |
| rank), 9) |
| + (i == 1) * 11; // !!!! |
| } |
| |
| } |
| |
| static u32 get_etalon2(int flip, u32 addr) |
| { |
| const u16 invmask[] = { |
| 0xaaaa, 0x6db6, 0x4924, 0xeeee, 0xcccc, 0x8888, 0x7bde, 0x739c, |
| 0x6318, 0x4210, 0xefbe, 0xcf3c, 0x8e38, 0x0c30, 0x0820 |
| }; |
| u32 ret; |
| u32 comp4 = addr / 480; |
| addr %= 480; |
| u32 comp1 = addr & 0xf; |
| u32 comp2 = (addr >> 4) & 1; |
| u32 comp3 = addr >> 5; |
| |
| if (comp4) |
| ret = 0x1010101 << (comp4 - 1); |
| else |
| ret = 0; |
| if (flip ^ (((invmask[comp3] >> comp1) ^ comp2) & 1)) |
| ret = ~ret; |
| |
| return ret; |
| } |
| |
| static void disable_cache_region(void) |
| { |
| msr_t msr = {.lo = 0, .hi = 0 }; |
| |
| wrmsr(MTRR_PHYS_BASE(3), msr); |
| wrmsr(MTRR_PHYS_MASK(3), msr); |
| } |
| |
| static void enable_cache_region(unsigned int base, unsigned int size) |
| { |
| msr_t msr; |
| msr.lo = base | MTRR_TYPE_WRPROT; |
| msr.hi = 0; |
| wrmsr(MTRR_PHYS_BASE(3), msr); |
| msr.lo = ((~(ALIGN_DOWN(size + 4096, 4096) - 1) | MTRR_DEF_TYPE_EN) |
| & 0xffffffff); |
| msr.hi = 0x0000000f; |
| wrmsr(MTRR_PHYS_MASK(3), msr); |
| } |
| |
| static void flush_cache(u32 start, u32 size) |
| { |
| u32 end; |
| u32 addr; |
| |
| end = start + (ALIGN_DOWN(size + 4096, 4096)); |
| for (addr = start; addr < end; addr += 64) |
| clflush((void *)(uintptr_t)addr); |
| } |
| |
| static void clear_errors(void) |
| { |
| pci_write_config8(NORTHBRIDGE, 0xc0, 0x01); |
| } |
| |
| static void write_testing(struct raminfo *info, int totalrank, int flip) |
| { |
| int nwrites = 0; |
| /* in 8-byte units. */ |
| u32 offset; |
| u8 *base; |
| |
| base = (u8 *)(uintptr_t)(totalrank << 28); |
| for (offset = 0; offset < 9 * 480; offset += 2) { |
| write32(base + offset * 8, get_etalon2(flip, offset)); |
| write32(base + offset * 8 + 4, get_etalon2(flip, offset)); |
| write32(base + offset * 8 + 8, get_etalon2(flip, offset + 1)); |
| write32(base + offset * 8 + 12, get_etalon2(flip, offset + 1)); |
| nwrites += 4; |
| if (nwrites >= 320) { |
| clear_errors(); |
| nwrites = 0; |
| } |
| } |
| } |
| |
| static u8 check_testing(struct raminfo *info, u8 total_rank, int flip) |
| { |
| u8 failmask = 0; |
| int i; |
| int comp1, comp2, comp3; |
| u32 failxor[2] = { 0, 0 }; |
| |
| enable_cache_region((total_rank << 28), 1728 * 5 * 4); |
| |
| for (comp3 = 0; comp3 < 9 && failmask != 0xff; comp3++) { |
| for (comp1 = 0; comp1 < 4; comp1++) |
| for (comp2 = 0; comp2 < 60; comp2++) { |
| u32 re[4]; |
| u32 curroffset = |
| comp3 * 8 * 60 + 2 * comp1 + 8 * comp2; |
| read128((total_rank << 28) | (curroffset << 3), |
| (u64 *)re); |
| failxor[0] |= |
| get_etalon2(flip, curroffset) ^ re[0]; |
| failxor[1] |= |
| get_etalon2(flip, curroffset) ^ re[1]; |
| failxor[0] |= |
| get_etalon2(flip, curroffset | 1) ^ re[2]; |
| failxor[1] |= |
| get_etalon2(flip, curroffset | 1) ^ re[3]; |
| } |
| for (i = 0; i < 8; i++) |
| if ((0xff << (8 * (i % 4))) & failxor[i / 4]) |
| failmask |= 1 << i; |
| } |
| disable_cache_region(); |
| flush_cache((total_rank << 28), 1728 * 5 * 4); |
| return failmask; |
| } |
| |
| const u32 seed1[0x18] = { |
| 0x3a9d5ab5, 0x576cb65b, 0x555773b6, 0x2ab772ee, |
| 0x555556ee, 0x3a9d5ab5, 0x576cb65b, 0x555773b6, |
| 0x2ab772ee, 0x555556ee, 0x5155a555, 0x5155a555, |
| 0x5155a555, 0x5155a555, 0x3a9d5ab5, 0x576cb65b, |
| 0x555773b6, 0x2ab772ee, 0x555556ee, 0x55d6b4a5, |
| 0x366d6b3a, 0x2ae5ddbb, 0x3b9ddbb7, 0x55d6b4a5, |
| }; |
| |
| static u32 get_seed2(int a, int b) |
| { |
| const u32 seed2[5] = { |
| 0x55555555, 0x33333333, 0x2e555a55, 0x55555555, |
| 0x5b6db6db, |
| }; |
| u32 r; |
| r = seed2[(a + (a >= 10)) / 5]; |
| return b ? ~r : r; |
| } |
| |
| static int make_shift(int comp2, int comp5, int x) |
| { |
| const u8 seed3[32] = { |
| 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| 0x00, 0x00, 0x38, 0x1c, 0x3c, 0x18, 0x38, 0x38, |
| 0x38, 0x38, 0x38, 0x38, 0x0f, 0x0f, 0x0f, 0x0f, |
| 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, |
| }; |
| |
| return (comp2 - ((seed3[comp5] >> (x & 7)) & 1)) & 0x1f; |
| } |
| |
| static u32 get_etalon(int flip, u32 addr) |
| { |
| u32 mask_byte = 0; |
| int comp1 = (addr >> 1) & 1; |
| int comp2 = (addr >> 3) & 0x1f; |
| int comp3 = (addr >> 8) & 0xf; |
| int comp4 = (addr >> 12) & 0xf; |
| int comp5 = (addr >> 16) & 0x1f; |
| u32 mask_bit = ~(0x10001 << comp3); |
| u32 part1; |
| u32 part2; |
| int byte; |
| |
| part2 = |
| ((seed1[comp5] >> |
| make_shift(comp2, comp5, |
| (comp3 >> 3) | (comp1 << 2) | 2)) & 1) ^ flip; |
| part1 = |
| ((seed1[comp5] >> |
| make_shift(comp2, comp5, |
| (comp3 >> 3) | (comp1 << 2) | 0)) & 1) ^ flip; |
| |
| for (byte = 0; byte < 4; byte++) |
| if ((get_seed2(comp5, comp4) >> |
| make_shift(comp2, comp5, (byte | (comp1 << 2)))) & 1) |
| mask_byte |= 0xff << (8 * byte); |
| |
| return (mask_bit & mask_byte) | (part1 << comp3) | (part2 << |
| (comp3 + 16)); |
| } |
| |
| static void |
| write_testing_type2(struct raminfo *info, u8 totalrank, u8 region, u8 block, |
| char flip) |
| { |
| int i; |
| for (i = 0; i < 2048; i++) |
| write32p((totalrank << 28) | (region << 25) | (block << 16) | |
| (i << 2), get_etalon(flip, (block << 16) | (i << 2))); |
| } |
| |
| static u8 |
| check_testing_type2(struct raminfo *info, u8 totalrank, u8 region, u8 block, |
| char flip) |
| { |
| u8 failmask = 0; |
| u32 failxor[2]; |
| int i; |
| int comp1, comp2, comp3; |
| |
| failxor[0] = 0; |
| failxor[1] = 0; |
| |
| enable_cache_region(totalrank << 28, 134217728); |
| for (comp3 = 0; comp3 < 2 && failmask != 0xff; comp3++) { |
| for (comp1 = 0; comp1 < 16; comp1++) |
| for (comp2 = 0; comp2 < 64; comp2++) { |
| u32 addr = |
| (totalrank << 28) | (region << 25) | (block |
| << 16) |
| | (comp3 << 12) | (comp2 << 6) | (comp1 << |
| 2); |
| failxor[comp1 & 1] |= |
| read32p(addr) ^ get_etalon(flip, addr); |
| } |
| for (i = 0; i < 8; i++) |
| if ((0xff << (8 * (i % 4))) & failxor[i / 4]) |
| failmask |= 1 << i; |
| } |
| disable_cache_region(); |
| flush_cache((totalrank << 28) | (region << 25) | (block << 16), 16384); |
| return failmask; |
| } |
| |
| static int check_bounded(unsigned short *vals, u16 bound) |
| { |
| int i; |
| |
| for (i = 0; i < 8; i++) |
| if (vals[i] < bound) |
| return 0; |
| return 1; |
| } |
| |
| enum state { |
| BEFORE_USABLE = 0, AT_USABLE = 1, AT_MARGIN = 2, COMPLETE = 3 |
| }; |
| |
| static int validate_state(enum state *in) |
| { |
| int i; |
| for (i = 0; i < 8; i++) |
| if (in[i] != COMPLETE) |
| return 0; |
| return 1; |
| } |
| |
| static void |
| do_fsm(enum state *state, u16 *counter, |
| u8 fail_mask, int margin, int uplimit, |
| u8 *res_low, u8 *res_high, u8 val) |
| { |
| int lane; |
| |
| for (lane = 0; lane < 8; lane++) { |
| int is_fail = (fail_mask >> lane) & 1; |
| switch (state[lane]) { |
| case BEFORE_USABLE: |
| if (!is_fail) { |
| counter[lane] = 1; |
| state[lane] = AT_USABLE; |
| break; |
| } |
| counter[lane] = 0; |
| state[lane] = BEFORE_USABLE; |
| break; |
| case AT_USABLE: |
| if (!is_fail) { |
| ++counter[lane]; |
| if (counter[lane] >= margin) { |
| state[lane] = AT_MARGIN; |
| res_low[lane] = val - margin + 1; |
| break; |
| } |
| state[lane] = 1; |
| break; |
| } |
| counter[lane] = 0; |
| state[lane] = BEFORE_USABLE; |
| break; |
| case AT_MARGIN: |
| if (is_fail) { |
| state[lane] = COMPLETE; |
| res_high[lane] = val - 1; |
| } else { |
| counter[lane]++; |
| state[lane] = AT_MARGIN; |
| if (val == uplimit) { |
| state[lane] = COMPLETE; |
| res_high[lane] = uplimit; |
| } |
| } |
| break; |
| case COMPLETE: |
| break; |
| } |
| } |
| } |
| |
| static void |
| train_ram_at_178(struct raminfo *info, u8 channel, int slot, int rank, |
| u8 total_rank, u8 reg_178, int first_run, int niter, |
| timing_bounds_t * timings) |
| { |
| int lane; |
| enum state state[8]; |
| u16 count[8]; |
| u8 lower_usable[8]; |
| u8 upper_usable[8]; |
| unsigned short num_successfully_checked[8]; |
| u8 reg1b3; |
| int i; |
| |
| for (i = 0; i < 8; i++) |
| state[i] = BEFORE_USABLE; |
| |
| if (!first_run) { |
| int is_all_ok = 1; |
| for (lane = 0; lane < 8; lane++) |
| if (timings[reg_178][channel][slot][rank][lane]. |
| smallest == |
| timings[reg_178][channel][slot][rank][lane]. |
| largest) { |
| timings[reg_178][channel][slot][rank][lane]. |
| smallest = 0; |
| timings[reg_178][channel][slot][rank][lane]. |
| largest = 0; |
| is_all_ok = 0; |
| } |
| if (is_all_ok) { |
| for (i = 0; i < 8; i++) |
| state[i] = COMPLETE; |
| } |
| } |
| |
| for (reg1b3 = 0; reg1b3 < 0x30 && !validate_state(state); reg1b3++) { |
| u8 failmask = 0; |
| write_1d0(reg1b3 ^ 32, 0x1b3, 6, 1); |
| write_1d0(reg1b3 ^ 32, 0x1a3, 6, 1); |
| failmask = check_testing(info, total_rank, 0); |
| mchbar_setbits32(0xfb0, 3 << 16); |
| do_fsm(state, count, failmask, 5, 47, lower_usable, |
| upper_usable, reg1b3); |
| } |
| |
| if (reg1b3) { |
| write_1d0(0, 0x1b3, 6, 1); |
| write_1d0(0, 0x1a3, 6, 1); |
| for (lane = 0; lane < 8; lane++) { |
| if (state[lane] == COMPLETE) { |
| timings[reg_178][channel][slot][rank][lane]. |
| smallest = |
| lower_usable[lane] + |
| (info->training. |
| lane_timings[0][channel][slot][rank][lane] |
| & 0x3F) - 32; |
| timings[reg_178][channel][slot][rank][lane]. |
| largest = |
| upper_usable[lane] + |
| (info->training. |
| lane_timings[0][channel][slot][rank][lane] |
| & 0x3F) - 32; |
| } |
| } |
| } |
| |
| if (!first_run) { |
| for (lane = 0; lane < 8; lane++) |
| if (state[lane] == COMPLETE) { |
| write_500(info, channel, |
| timings[reg_178][channel][slot][rank] |
| [lane].smallest, |
| get_timing_register_addr(lane, 0, |
| slot, rank), |
| 9, 1); |
| write_500(info, channel, |
| timings[reg_178][channel][slot][rank] |
| [lane].smallest + |
| info->training. |
| lane_timings[1][channel][slot][rank] |
| [lane] |
| - |
| info->training. |
| lane_timings[0][channel][slot][rank] |
| [lane], get_timing_register_addr(lane, |
| 1, |
| slot, |
| rank), |
| 9, 1); |
| num_successfully_checked[lane] = 0; |
| } else |
| num_successfully_checked[lane] = -1; |
| |
| do { |
| u8 failmask = 0; |
| for (i = 0; i < niter; i++) { |
| if (failmask == 0xFF) |
| break; |
| failmask |= |
| check_testing_type2(info, total_rank, 2, i, |
| 0); |
| failmask |= |
| check_testing_type2(info, total_rank, 3, i, |
| 1); |
| } |
| mchbar_setbits32(0xfb0, 3 << 16); |
| for (lane = 0; lane < 8; lane++) |
| if (num_successfully_checked[lane] != 0xffff) { |
| if ((1 << lane) & failmask) { |
| if (timings[reg_178][channel] |
| [slot][rank][lane]. |
| largest <= |
| timings[reg_178][channel] |
| [slot][rank][lane].smallest) |
| num_successfully_checked |
| [lane] = -1; |
| else { |
| num_successfully_checked |
| [lane] = 0; |
| timings[reg_178] |
| [channel][slot] |
| [rank][lane]. |
| smallest++; |
| write_500(info, channel, |
| timings |
| [reg_178] |
| [channel] |
| [slot][rank] |
| [lane]. |
| smallest, |
| get_timing_register_addr |
| (lane, 0, |
| slot, rank), |
| 9, 1); |
| write_500(info, channel, |
| timings |
| [reg_178] |
| [channel] |
| [slot][rank] |
| [lane]. |
| smallest + |
| info-> |
| training. |
| lane_timings |
| [1][channel] |
| [slot][rank] |
| [lane] |
| - |
| info-> |
| training. |
| lane_timings |
| [0][channel] |
| [slot][rank] |
| [lane], |
| get_timing_register_addr |
| (lane, 1, |
| slot, rank), |
| 9, 1); |
| } |
| } else |
| num_successfully_checked[lane] |
| ++; |
| } |
| } |
| while (!check_bounded(num_successfully_checked, 2)) |
| ; |
| |
| for (lane = 0; lane < 8; lane++) |
| if (state[lane] == COMPLETE) { |
| write_500(info, channel, |
| timings[reg_178][channel][slot][rank] |
| [lane].largest, |
| get_timing_register_addr(lane, 0, |
| slot, rank), |
| 9, 1); |
| write_500(info, channel, |
| timings[reg_178][channel][slot][rank] |
| [lane].largest + |
| info->training. |
| lane_timings[1][channel][slot][rank] |
| [lane] |
| - |
| info->training. |
| lane_timings[0][channel][slot][rank] |
| [lane], get_timing_register_addr(lane, |
| 1, |
| slot, |
| rank), |
| 9, 1); |
| num_successfully_checked[lane] = 0; |
| } else |
| num_successfully_checked[lane] = -1; |
| |
| do { |
| int failmask = 0; |
| for (i = 0; i < niter; i++) { |
| if (failmask == 0xFF) |
| break; |
| failmask |= |
| check_testing_type2(info, total_rank, 2, i, |
| 0); |
| failmask |= |
| check_testing_type2(info, total_rank, 3, i, |
| 1); |
| } |
| |
| mchbar_setbits32(0xfb0, 3 << 16); |
| for (lane = 0; lane < 8; lane++) { |
| if (num_successfully_checked[lane] != 0xffff) { |
| if ((1 << lane) & failmask) { |
| if (timings[reg_178][channel] |
| [slot][rank][lane]. |
| largest <= |
| timings[reg_178][channel] |
| [slot][rank][lane]. |
| smallest) { |
| num_successfully_checked |
| [lane] = -1; |
| } else { |
| num_successfully_checked |
| [lane] = 0; |
| timings[reg_178] |
| [channel][slot] |
| [rank][lane]. |
| largest--; |
| write_500(info, channel, |
| timings |
| [reg_178] |
| [channel] |
| [slot][rank] |
| [lane]. |
| largest, |
| get_timing_register_addr |
| (lane, 0, |
| slot, rank), |
| 9, 1); |
| write_500(info, channel, |
| timings |
| [reg_178] |
| [channel] |
| [slot][rank] |
| [lane]. |
| largest + |
| info-> |
| training. |
| lane_timings |
| [1][channel] |
| [slot][rank] |
| [lane] |
| - |
| info-> |
| training. |
| lane_timings |
| [0][channel] |
| [slot][rank] |
| [lane], |
| get_timing_register_addr |
| (lane, 1, |
| slot, rank), |
| 9, 1); |
| } |
| } else |
| num_successfully_checked[lane] |
| ++; |
| } |
| } |
| } |
| while (!check_bounded(num_successfully_checked, 3)) |
| ; |
| |
| for (lane = 0; lane < 8; lane++) { |
| write_500(info, channel, |
| info->training. |
| lane_timings[0][channel][slot][rank][lane], |
| get_timing_register_addr(lane, 0, slot, rank), |
| 9, 1); |
| write_500(info, channel, |
| info->training. |
| lane_timings[1][channel][slot][rank][lane], |
| get_timing_register_addr(lane, 1, slot, rank), |
| 9, 1); |
| if (timings[reg_178][channel][slot][rank][lane]. |
| largest <= |
| timings[reg_178][channel][slot][rank][lane]. |
| smallest) { |
| timings[reg_178][channel][slot][rank][lane]. |
| largest = 0; |
| timings[reg_178][channel][slot][rank][lane]. |
| smallest = 0; |
| } |
| } |
| } |
| } |
| |
| static void set_10b(struct raminfo *info, u8 val) |
| { |
| int channel; |
| int slot, rank; |
| int lane; |
| |
| if (read_1d0(0x10b, 6) == val) |
| return; |
| |
| write_1d0(val, 0x10b, 6, 1); |
| |
| FOR_POPULATED_RANKS_BACKWARDS for (lane = 0; lane < 9; lane++) { |
| u16 reg_500; |
| reg_500 = read_500(info, channel, |
| get_timing_register_addr(lane, 0, slot, |
| rank), 9); |
| if (val == 1) { |
| if (lut16[info->clock_speed_index] <= reg_500) |
| reg_500 -= lut16[info->clock_speed_index]; |
| else |
| reg_500 = 0; |
| } else { |
| reg_500 += lut16[info->clock_speed_index]; |
| } |
| write_500(info, channel, reg_500, |
| get_timing_register_addr(lane, 0, slot, rank), 9, 1); |
| } |
| } |
| |
| static void set_ecc(int onoff) |
| { |
| int channel; |
| for (channel = 0; channel < NUM_CHANNELS; channel++) { |
| u8 t; |
| t = mchbar_read8((channel << 10) + 0x5f8); |
| if (onoff) |
| t |= 1; |
| else |
| t &= ~1; |
| mchbar_write8((channel << 10) + 0x5f8, t); |
| } |
| } |
| |
| static void set_178(u8 val) |
| { |
| if (val >= 31) |
| val = val - 31; |
| else |
| val = 63 - val; |
| |
| write_1d0(2 * val, 0x178, 7, 1); |
| } |
| |
| static void |
| write_500_timings_type(struct raminfo *info, int channel, int slot, int rank, |
| int type) |
| { |
| int lane; |
| |
| for (lane = 0; lane < 8; lane++) |
| write_500(info, channel, |
| info->training. |
| lane_timings[type][channel][slot][rank][lane], |
| get_timing_register_addr(lane, type, slot, rank), 9, |
| 0); |
| } |
| |
| static void |
| try_timing_offsets(struct raminfo *info, int channel, |
| int slot, int rank, int totalrank) |
| { |
| u16 count[8]; |
| enum state state[8]; |
| u8 lower_usable[8], upper_usable[8]; |
| int lane; |
| int i; |
| int flip = 1; |
| int timing_offset; |
| |
| for (i = 0; i < 8; i++) |
| state[i] = BEFORE_USABLE; |
| |
| memset(count, 0, sizeof(count)); |
| |
| for (lane = 0; lane < 8; lane++) |
| write_500(info, channel, |
| info->training. |
| lane_timings[2][channel][slot][rank][lane] + 32, |
| get_timing_register_addr(lane, 3, slot, rank), 9, 1); |
| |
| for (timing_offset = 0; !validate_state(state) && timing_offset < 64; |
| timing_offset++) { |
| u8 failmask; |
| write_1d0(timing_offset ^ 32, 0x1bb, 6, 1); |
| failmask = 0; |
| for (i = 0; i < 2 && failmask != 0xff; i++) { |
| flip = !flip; |
| write_testing(info, totalrank, flip); |
| failmask |= check_testing(info, totalrank, flip); |
| } |
| do_fsm(state, count, failmask, 10, 63, lower_usable, |
| upper_usable, timing_offset); |
| } |
| write_1d0(0, 0x1bb, 6, 1); |
| dump_timings(info); |
| if (!validate_state(state)) |
| die("Couldn't discover DRAM timings (1)\n"); |
| |
| for (lane = 0; lane < 8; lane++) { |
| u8 bias = 0; |
| |
| if (info->silicon_revision) { |
| int usable_length; |
| |
| usable_length = upper_usable[lane] - lower_usable[lane]; |
| if (usable_length >= 20) { |
| bias = usable_length / 2 - 10; |
| if (bias >= 2) |
| bias = 2; |
| } |
| } |
| write_500(info, channel, |
| info->training. |
| lane_timings[2][channel][slot][rank][lane] + |
| (upper_usable[lane] + lower_usable[lane]) / 2 - bias, |
| get_timing_register_addr(lane, 3, slot, rank), 9, 1); |
| info->training.timing2_bounds[channel][slot][rank][lane][0] = |
| info->training.lane_timings[2][channel][slot][rank][lane] + |
| lower_usable[lane]; |
| info->training.timing2_bounds[channel][slot][rank][lane][1] = |
| info->training.lane_timings[2][channel][slot][rank][lane] + |
| upper_usable[lane]; |
| info->training.timing2_offset[channel][slot][rank][lane] = |
| info->training.lane_timings[2][channel][slot][rank][lane]; |
| } |
| } |
| |
| static u8 |
| choose_training(struct raminfo *info, int channel, int slot, int rank, |
| int lane, timing_bounds_t * timings, u8 center_178) |
| { |
| u16 central_weight; |
| u16 side_weight; |
| unsigned int sum = 0, count = 0; |
| u8 span; |
| u8 lower_margin, upper_margin; |
| u8 reg_178; |
| u8 result; |
| |
| span = 12; |
| central_weight = 20; |
| side_weight = 20; |
| if (info->silicon_revision == 1 && channel == 1) { |
| central_weight = 5; |
| side_weight = 20; |
| if ((info-> |
| populated_ranks_mask[1] ^ (info-> |
| populated_ranks_mask[1] >> 2)) & |
| 1) |
| span = 18; |
| } |
| if ((info->populated_ranks_mask[0] & 5) == 5) { |
| central_weight = 20; |
| side_weight = 20; |
| } |
| if (info->clock_speed_index >= 2 |
| && (info->populated_ranks_mask[0] & 5) == 5 && slot == 1) { |
| if (info->silicon_revision == 1) { |
| switch (channel) { |
| case 0: |
| if (lane == 1) { |
| central_weight = 10; |
| side_weight = 20; |
| } |
| break; |
| case 1: |
| if (lane == 6) { |
| side_weight = 5; |
| central_weight = 20; |
| } |
| break; |
| } |
| } |
| if (info->silicon_revision == 0 && channel == 0 && lane == 0) { |
| side_weight = 5; |
| central_weight = 20; |
| } |
| } |
| for (reg_178 = center_178 - span; reg_178 <= center_178 + span; |
| reg_178 += span) { |
| u8 smallest; |
| u8 largest; |
| largest = timings[reg_178][channel][slot][rank][lane].largest; |
| smallest = timings[reg_178][channel][slot][rank][lane].smallest; |
| if (largest - smallest + 1 >= 5) { |
| unsigned int weight; |
| if (reg_178 == center_178) |
| weight = central_weight; |
| else |
| weight = side_weight; |
| sum += weight * (largest + smallest); |
| count += weight; |
| } |
| } |
| dump_timings(info); |
| if (count == 0) |
| die("Couldn't discover DRAM timings (2)\n"); |
| result = sum / (2 * count); |
| lower_margin = |
| result - timings[center_178][channel][slot][rank][lane].smallest; |
| upper_margin = |
| timings[center_178][channel][slot][rank][lane].largest - result; |
| if (upper_margin < 10 && lower_margin > 10) |
| result -= MIN(lower_margin - 10, 10 - upper_margin); |
| if (upper_margin > 10 && lower_margin < 10) |
| result += MIN(upper_margin - 10, 10 - lower_margin); |
| return result; |
| } |
| |
| #define STANDARD_MIN_MARGIN 5 |
| |
| static u8 choose_reg178(struct raminfo *info, timing_bounds_t * timings) |
| { |
| u16 margin[64]; |
| int lane, rank, slot, channel; |
| u8 reg178; |
| int count = 0, sum = 0; |
| |
| for (reg178 = reg178_min[info->clock_speed_index]; |
| reg178 < reg178_max[info->clock_speed_index]; |
| reg178 += reg178_step[info->clock_speed_index]) { |
| margin[reg178] = -1; |
| FOR_POPULATED_RANKS_BACKWARDS for (lane = 0; lane < 8; lane++) { |
| int curmargin = |
| timings[reg178][channel][slot][rank][lane].largest - |
| timings[reg178][channel][slot][rank][lane]. |
| smallest + 1; |
| if (curmargin < margin[reg178]) |
| margin[reg178] = curmargin; |
| } |
| if (margin[reg178] >= STANDARD_MIN_MARGIN) { |
| u16 weight; |
| weight = margin[reg178] - STANDARD_MIN_MARGIN; |
| sum += weight * reg178; |
| count += weight; |
| } |
| } |
| dump_timings(info); |
| if (count == 0) |
| die("Couldn't discover DRAM timings (3)\n"); |
| |
| u8 threshold; |
| |
| for (threshold = 30; threshold >= 5; threshold--) { |
| int usable_length = 0; |
| int smallest_fount = 0; |
| for (reg178 = reg178_min[info->clock_speed_index]; |
| reg178 < reg178_max[info->clock_speed_index]; |
| reg178 += reg178_step[info->clock_speed_index]) |
| if (margin[reg178] >= threshold) { |
| usable_length += |
| reg178_step[info->clock_speed_index]; |
| info->training.reg178_largest = |
| reg178 - |
| 2 * reg178_step[info->clock_speed_index]; |
| |
| if (!smallest_fount) { |
| smallest_fount = 1; |
| info->training.reg178_smallest = |
| reg178 + |
| reg178_step[info-> |
| clock_speed_index]; |
| } |
| } |
| if (usable_length >= 0x21) |
| break; |
| } |
| |
| return sum / count; |
| } |
| |
| static int check_cached_sanity(struct raminfo *info) |
| { |
| int lane; |
| int slot, rank; |
| int channel; |
| |
| if (!info->cached_training) |
| return 0; |
| |
| for (channel = 0; channel < NUM_CHANNELS; channel++) |
| for (slot = 0; slot < NUM_SLOTS; slot++) |
| for (rank = 0; rank < NUM_RANKS; rank++) |
| for (lane = 0; lane < 8 + info->use_ecc; lane++) { |
| u16 cached_value, estimation_value; |
| cached_value = |
| info->cached_training-> |
| lane_timings[1][channel][slot][rank] |
| [lane]; |
| if (cached_value >= 0x18 |
| && cached_value <= 0x1E7) { |
| estimation_value = |
| info->training. |
| lane_timings[1][channel] |
| [slot][rank][lane]; |
| if (estimation_value < |
| cached_value - 24) |
| return 0; |
| if (estimation_value > |
| cached_value + 24) |
| return 0; |
| } |
| } |
| return 1; |
| } |
| |
| static int try_cached_training(struct raminfo *info) |
| { |
| u8 saved_243[2]; |
| u8 tm; |
| |
| int channel, slot, rank, lane; |
| int flip = 1; |
| int i, j; |
| |
| if (!check_cached_sanity(info)) |
| return 0; |
| |
| info->training.reg178_center = info->cached_training->reg178_center; |
| info->training.reg178_smallest = info->cached_training->reg178_smallest; |
| info->training.reg178_largest = info->cached_training->reg178_largest; |
| memcpy(&info->training.timing_bounds, |
| &info->cached_training->timing_bounds, |
| sizeof(info->training.timing_bounds)); |
| memcpy(&info->training.timing_offset, |
| &info->cached_training->timing_offset, |
| sizeof(info->training.timing_offset)); |
| |
| write_1d0(2, 0x142, 3, 1); |
| saved_243[0] = mchbar_read8(0x243); |
| saved_243[1] = mchbar_read8(0x643); |
| mchbar_write8(0x243, saved_243[0] | 2); |
| mchbar_write8(0x643, saved_243[1] | 2); |
| set_ecc(0); |
| pci_write_config16(NORTHBRIDGE, 0xc8, 3); |
| if (read_1d0(0x10b, 6) & 1) |
| set_10b(info, 0); |
| for (tm = 0; tm < 2; tm++) { |
| int totalrank; |
| |
| set_178(tm ? info->cached_training->reg178_largest : info-> |
| cached_training->reg178_smallest); |
| |
| totalrank = 0; |
| /* Check timing ranges. With i == 0 we check smallest one and with |
| i == 1 the largest bound. With j == 0 we check that on the bound |
| it still works whereas with j == 1 we check that just outside of |
| bound we fail. |
| */ |
| FOR_POPULATED_RANKS_BACKWARDS { |
| for (i = 0; i < 2; i++) { |
| for (lane = 0; lane < 8; lane++) { |
| write_500(info, channel, |
| info->cached_training-> |
| timing2_bounds[channel][slot] |
| [rank][lane][i], |
| get_timing_register_addr(lane, |
| 3, |
| slot, |
| rank), |
| 9, 1); |
| |
| if (!i) |
| write_500(info, channel, |
| info-> |
| cached_training-> |
| timing2_offset |
| [channel][slot][rank] |
| [lane], |
| get_timing_register_addr |
| (lane, 2, slot, rank), |
| 9, 1); |
| write_500(info, channel, |
| i ? info->cached_training-> |
| timing_bounds[tm][channel] |
| [slot][rank][lane]. |
| largest : info-> |
| cached_training-> |
| timing_bounds[tm][channel] |
| [slot][rank][lane].smallest, |
| get_timing_register_addr(lane, |
| 0, |
| slot, |
| rank), |
| 9, 1); |
| write_500(info, channel, |
| info->cached_training-> |
| timing_offset[channel][slot] |
| [rank][lane] + |
| (i ? info->cached_training-> |
| timing_bounds[tm][channel] |
| [slot][rank][lane]. |
| largest : info-> |
| cached_training-> |
| timing_bounds[tm][channel] |
| [slot][rank][lane]. |
| smallest) - 64, |
| get_timing_register_addr(lane, |
| 1, |
| slot, |
| rank), |
| 9, 1); |
| } |
| for (j = 0; j < 2; j++) { |
| u8 failmask; |
| u8 expected_failmask; |
| char reg1b3; |
| |
| reg1b3 = (j == 1) + 4; |
| reg1b3 = |
| j == i ? reg1b3 : (-reg1b3) & 0x3f; |
| write_1d0(reg1b3, 0x1bb, 6, 1); |
| write_1d0(reg1b3, 0x1b3, 6, 1); |
| write_1d0(reg1b3, 0x1a3, 6, 1); |
| |
| flip = !flip; |
| write_testing(info, totalrank, flip); |
| failmask = |
| check_testing(info, totalrank, |
| flip); |
| expected_failmask = |
| j == 0 ? 0x00 : 0xff; |
| if (failmask != expected_failmask) |
| goto fail; |
| } |
| } |
| totalrank++; |
| } |
| } |
| |
| set_178(info->cached_training->reg178_center); |
| if (info->use_ecc) |
| set_ecc(1); |
| write_training_data(info); |
| write_1d0(0, 322, 3, 1); |
| info->training = *info->cached_training; |
| |
| write_1d0(0, 0x1bb, 6, 1); |
| write_1d0(0, 0x1b3, 6, 1); |
| write_1d0(0, 0x1a3, 6, 1); |
| mchbar_write8(0x243, saved_243[0]); |
| mchbar_write8(0x643, saved_243[1]); |
| |
| return 1; |
| |
| fail: |
| FOR_POPULATED_RANKS { |
| write_500_timings_type(info, channel, slot, rank, 1); |
| write_500_timings_type(info, channel, slot, rank, 2); |
| write_500_timings_type(info, channel, slot, rank, 3); |
| } |
| |
| write_1d0(0, 0x1bb, 6, 1); |
| write_1d0(0, 0x1b3, 6, 1); |
| write_1d0(0, 0x1a3, 6, 1); |
| mchbar_write8(0x243, saved_243[0]); |
| mchbar_write8(0x643, saved_243[1]); |
| |
| return 0; |
| } |
| |
| static void do_ram_training(struct raminfo *info) |
| { |
| u8 saved_243[2]; |
| int totalrank = 0; |
| u8 reg_178; |
| int niter; |
| |
| timing_bounds_t *timings = timings_car; |
| int lane, rank, slot, channel; |
| u8 reg178_center; |
| |
| write_1d0(2, 0x142, 3, 1); |
| saved_243[0] = mchbar_read8(0x243); |
| saved_243[1] = mchbar_read8(0x643); |
| mchbar_write8(0x243, saved_243[0] | 2); |
| mchbar_write8(0x643, saved_243[1] | 2); |
| switch (info->clock_speed_index) { |
| case 0: |
| niter = 5; |
| break; |
| case 1: |
| niter = 10; |
| break; |
| default: |
| niter = 19; |
| break; |
| } |
| set_ecc(0); |
| |
| FOR_POPULATED_RANKS_BACKWARDS { |
| int i; |
| |
| write_500_timings_type(info, channel, slot, rank, 0); |
| |
| write_testing(info, totalrank, 0); |
| for (i = 0; i < niter; i++) { |
| write_testing_type2(info, totalrank, 2, i, 0); |
| write_testing_type2(info, totalrank, 3, i, 1); |
| } |
| pci_write_config8(NORTHBRIDGE, 0xc0, 0x01); |
| totalrank++; |
| } |
| |
| if (reg178_min[info->clock_speed_index] < |
| reg178_max[info->clock_speed_index]) |
| memset(timings[reg178_min[info->clock_speed_index]], 0, |
| sizeof(timings[0]) * |
| (reg178_max[info->clock_speed_index] - |
| reg178_min[info->clock_speed_index])); |
| for (reg_178 = reg178_min[info->clock_speed_index]; |
| reg_178 < reg178_max[info->clock_speed_index]; |
| reg_178 += reg178_step[info->clock_speed_index]) { |
| totalrank = 0; |
| set_178(reg_178); |
| for (channel = NUM_CHANNELS - 1; channel >= 0; channel--) |
| for (slot = 0; slot < NUM_SLOTS; slot++) |
| for (rank = 0; rank < NUM_RANKS; rank++) { |
| memset(&timings[reg_178][channel][slot] |
| [rank][0].smallest, 0, 16); |
| if (info-> |
| populated_ranks[channel][slot] |
| [rank]) { |
| train_ram_at_178(info, channel, |
| slot, rank, |
| totalrank, |
| reg_178, 1, |
| niter, |
| timings); |
| totalrank++; |
| } |
| } |
| } |
| |
| reg178_center = choose_reg178(info, timings); |
| |
| FOR_POPULATED_RANKS_BACKWARDS for (lane = 0; lane < 8; lane++) { |
| info->training.timing_bounds[0][channel][slot][rank][lane]. |
| smallest = |
| timings[info->training. |
| reg178_smallest][channel][slot][rank][lane]. |
| smallest; |
| info->training.timing_bounds[0][channel][slot][rank][lane]. |
| largest = |
| timings[info->training. |
| reg178_smallest][channel][slot][rank][lane].largest; |
| info->training.timing_bounds[1][channel][slot][rank][lane]. |
| smallest = |
| timings[info->training. |
| reg178_largest][channel][slot][rank][lane].smallest; |
| info->training.timing_bounds[1][channel][slot][rank][lane]. |
| largest = |
| timings[info->training. |
| reg178_largest][channel][slot][rank][lane].largest; |
| info->training.timing_offset[channel][slot][rank][lane] = |
| info->training.lane_timings[1][channel][slot][rank][lane] |
| - |
| info->training.lane_timings[0][channel][slot][rank][lane] + |
| 64; |
| } |
| |
| if (info->silicon_revision == 1 |
| && (info-> |
| populated_ranks_mask[1] ^ (info-> |
| populated_ranks_mask[1] >> 2)) & 1) { |
| int ranks_after_channel1; |
| |
| totalrank = 0; |
| for (reg_178 = reg178_center - 18; |
| reg_178 <= reg178_center + 18; reg_178 += 18) { |
| totalrank = 0; |
| set_178(reg_178); |
| for (slot = 0; slot < NUM_SLOTS; slot++) |
| for (rank = 0; rank < NUM_RANKS; rank++) { |
| if (info-> |
| populated_ranks[1][slot][rank]) { |
| train_ram_at_178(info, 1, slot, |
| rank, |
| totalrank, |
| reg_178, 0, |
| niter, |
| timings); |
| totalrank++; |
| } |
| } |
| } |
| ranks_after_channel1 = totalrank; |
| |
| for (reg_178 = reg178_center - 12; |
| reg_178 <= reg178_center + 12; reg_178 += 12) { |
| totalrank = ranks_after_channel1; |
| set_178(reg_178); |
| for (slot = 0; slot < NUM_SLOTS; slot++) |
| for (rank = 0; rank < NUM_RANKS; rank++) |
| if (info-> |
| populated_ranks[0][slot][rank]) { |
| train_ram_at_178(info, 0, slot, |
| rank, |
| totalrank, |
| reg_178, 0, |
| niter, |
| timings); |
| totalrank++; |
| } |
| |
| } |
| } else { |
| for (reg_178 = reg178_center - 12; |
| reg_178 <= reg178_center + 12; reg_178 += 12) { |
| totalrank = 0; |
| set_178(reg_178); |
| FOR_POPULATED_RANKS_BACKWARDS { |
| train_ram_at_178(info, channel, slot, rank, |
| totalrank, reg_178, 0, niter, |
| timings); |
| totalrank++; |
| } |
| } |
| } |
| |
| set_178(reg178_center); |
| FOR_POPULATED_RANKS_BACKWARDS for (lane = 0; lane < 8; lane++) { |
| u16 tm0; |
| |
| tm0 = |
| choose_training(info, channel, slot, rank, lane, timings, |
| reg178_center); |
| write_500(info, channel, tm0, |
| get_timing_register_addr(lane, 0, slot, rank), 9, 1); |
| write_500(info, channel, |
| tm0 + |
| info->training. |
| lane_timings[1][channel][slot][rank][lane] - |
| info->training. |
| lane_timings[0][channel][slot][rank][lane], |
| get_timing_register_addr(lane, 1, slot, rank), 9, 1); |
| } |
| |
| totalrank = 0; |
| FOR_POPULATED_RANKS_BACKWARDS { |
| try_timing_offsets(info, channel, slot, rank, totalrank); |
| totalrank++; |
| } |
| mchbar_write8(0x243, saved_243[0]); |
| mchbar_write8(0x643, saved_243[1]); |
| write_1d0(0, 0x142, 3, 1); |
| info->training.reg178_center = reg178_center; |
| } |
| |
| static void ram_training(struct raminfo *info) |
| { |
| u16 saved_fc4; |
| |
| saved_fc4 = mchbar_read16(0xfc4); |
| mchbar_write16(0xfc4, 0xffff); |
| |
| if (info->revision >= 8) |
| read_4090(info); |
| |
| if (!try_cached_training(info)) |
| do_ram_training(info); |
| if ((info->silicon_revision == 2 || info->silicon_revision == 3) |
| && info->clock_speed_index < 2) |
| set_10b(info, 1); |
| mchbar_write16(0xfc4, saved_fc4); |
| } |
| |
| u16 get_max_timing(struct raminfo *info, int channel) |
| { |
| int slot, rank, lane; |
| u16 ret = 0; |
| |
| if ((mchbar_read8(0x2ca8) >> 2) < 1) |
| return 384; |
| |
| if (info->revision < 8) |
| return 256; |
| |
| for (slot = 0; slot < NUM_SLOTS; slot++) |
| for (rank = 0; rank < NUM_RANKS; rank++) |
| if (info->populated_ranks[channel][slot][rank]) |
| for (lane = 0; lane < 8 + info->use_ecc; lane++) |
| ret = MAX(ret, read_500(info, channel, |
| get_timing_register_addr |
| (lane, 0, slot, |
| rank), 9)); |
| return ret; |
| } |
| |
| static void dmi_setup(void) |
| { |
| gav(dmibar_read8(0x254)); |
| dmibar_write8(0x254, 1 << 0); |
| dmibar_write16(0x1b8, 0x18f2); |
| mchbar_clrsetbits16(0x48, ~0, 1 << 1); |
| |
| dmibar_setbits32(0xd68, 1 << 27); |
| |
| outl((gav(inl(DEFAULT_GPIOBASE | 0x38)) & ~0x140000) | 0x400000, |
| DEFAULT_GPIOBASE | 0x38); |
| gav(inb(DEFAULT_GPIOBASE | 0xe)); // = 0xfdcaff6e |
| } |
| |
| void chipset_init(const int s3resume) |
| { |
| u8 x2ca8; |
| u16 ggc; |
| u8 gfxsize; |
| |
| x2ca8 = mchbar_read8(0x2ca8); |
| if ((x2ca8 & 1) || (x2ca8 == 8 && !s3resume)) { |
| printk(BIOS_DEBUG, "soft reset detected, rebooting properly\n"); |
| mchbar_write8(0x2ca8, 0); |
| system_reset(); |
| } |
| |
| dmi_setup(); |
| |
| mchbar_write16(0x1170, 0xa880); |
| mchbar_write8(0x11c1, 1 << 0); |
| mchbar_write16(0x1170, 0xb880); |
| mchbar_clrsetbits8(0x1210, ~0, 0x84); |
| |
| gfxsize = get_uint_option("gfx_uma_size", 0); /* 0 for 32MB */ |
| |
| ggc = 0xb00 | ((gfxsize + 5) << 4); |
| |
| pci_write_config16(NORTHBRIDGE, GGC, ggc | 2); |
| |
| u16 deven; |
| deven = pci_read_config16(NORTHBRIDGE, DEVEN); // = 0x3 |
| |
| if (deven & 8) { |
| mchbar_write8(0x2c30, 1 << 5); |
| pci_read_config8(NORTHBRIDGE, 0x8); // = 0x18 |
| mchbar_setbits16(0x2c30, 1 << 9); |
| mchbar_write16(0x2c32, 0x434); |
| mchbar_clrsetbits32(0x2c44, ~0, 0x1053687); |
| pci_read_config8(GMA, MSAC); // = 0x2 |
| pci_write_config8(GMA, MSAC, 0x2); |
| RCBA8(0x2318); |
| RCBA8(0x2318) = 0x47; |
| RCBA8(0x2320); |
| RCBA8(0x2320) = 0xfc; |
| } |
| |
| mchbar_clrsetbits32(0x30, ~0, 0x40); |
| |
| pci_write_config16(NORTHBRIDGE, GGC, ggc); |
| gav(RCBA32(0x3428)); |
| RCBA32(0x3428) = 0x1d; |
| } |
| |
| static u8 get_bits_420(const u32 reg32) |
| { |
| u8 val = 0; |
| val |= (reg32 >> 4) & (1 << 0); |
| val |= (reg32 >> 2) & (1 << 1); |
| val |= (reg32 >> 0) & (1 << 2); |
| return val; |
| } |
| |
| void raminit(const int s3resume, const u8 *spd_addrmap) |
| { |
| unsigned int channel, slot, lane, rank; |
| struct raminfo info; |
| u8 x2ca8; |
| int cbmem_wasnot_inited; |
| |
| x2ca8 = mchbar_read8(0x2ca8); |
| |
| printk(RAM_DEBUG, "Scratchpad MCHBAR8(0x2ca8): 0x%04x\n", x2ca8); |
| |
| memset(&info, 0x5a, sizeof(info)); |
| |
| info.last_500_command[0] = 0; |
| info.last_500_command[1] = 0; |
| |
| info.board_lane_delay[0] = 0x14; |
| info.board_lane_delay[1] = 0x07; |
| info.board_lane_delay[2] = 0x07; |
| info.board_lane_delay[3] = 0x08; |
| info.board_lane_delay[4] = 0x56; |
| info.board_lane_delay[5] = 0x04; |
| info.board_lane_delay[6] = 0x04; |
| info.board_lane_delay[7] = 0x05; |
| info.board_lane_delay[8] = 0x10; |
| |
| info.training.reg_178 = 0; |
| info.training.reg_10b = 0; |
| |
| /* Wait for some bit, maybe TXT clear. */ |
| while (!(read8((u8 *)0xfed40000) & (1 << 7))) |
| ; |
| |
| /* Wait for ME to be ready */ |
| intel_early_me_init(); |
| info.memory_reserved_for_heci_mb = intel_early_me_uma_size(); |
| |
| /* before SPD */ |
| timestamp_add_now(101); |
| |
| if (!s3resume || 1) { // possible error |
| memset(&info.populated_ranks, 0, sizeof(info.populated_ranks)); |
| |
| info.use_ecc = 1; |
| for (channel = 0; channel < NUM_CHANNELS; channel++) |
| for (slot = 0; slot < NUM_SLOTS; slot++) { |
| int v; |
| int try; |
| int addr; |
| const u8 useful_addresses[] = { |
| DEVICE_TYPE, |
| MODULE_TYPE, |
| DENSITY, |
| RANKS_AND_DQ, |
| MEMORY_BUS_WIDTH, |
| TIMEBASE_DIVIDEND, |
| TIMEBASE_DIVISOR, |
| CYCLETIME, |
| CAS_LATENCIES_LSB, |
| CAS_LATENCIES_MSB, |
| CAS_LATENCY_TIME, |
| 0x11, 0x12, 0x13, 0x14, 0x15, |
| 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, |
| 0x1c, 0x1d, |
| THERMAL_AND_REFRESH, |
| 0x20, |
| REFERENCE_RAW_CARD_USED, |
| RANK1_ADDRESS_MAPPING, |
| 0x75, 0x76, 0x77, 0x78, |
| 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, |
| 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, |
| 0x85, 0x86, 0x87, 0x88, |
| 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, |
| 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, |
| 0x95 |
| }; |
| if (!spd_addrmap[2 * channel + slot]) |
| continue; |
| for (try = 0; try < 5; try++) { |
| v = smbus_read_byte(spd_addrmap[2 * channel + slot], |
| DEVICE_TYPE); |
| if (v >= 0) |
| break; |
| } |
| if (v < 0) |
| continue; |
| for (addr = 0; |
| addr < |
| ARRAY_SIZE(useful_addresses); addr++) |
| gav(info. |
| spd[channel][0][useful_addresses |
| [addr]] = |
| smbus_read_byte(spd_addrmap[2 * channel + slot], |
| useful_addresses |
| [addr])); |
| if (info.spd[channel][0][DEVICE_TYPE] != 11) |
| die("Only DDR3 is supported"); |
| |
| v = info.spd[channel][0][RANKS_AND_DQ]; |
| info.populated_ranks[channel][0][0] = 1; |
| info.populated_ranks[channel][0][1] = |
| ((v >> 3) & 7); |
| if (((v >> 3) & 7) > 1) |
| die("At most 2 ranks are supported"); |
| if ((v & 7) == 0 || (v & 7) > 2) |
| die("Only x8 and x16 modules are supported"); |
| if ((info. |
| spd[channel][slot][MODULE_TYPE] & 0xF) != 2 |
| && (info. |
| spd[channel][slot][MODULE_TYPE] & 0xF) |
| != 3) |
| die("Registered memory is not supported"); |
| info.is_x16_module[channel][0] = (v & 7) - 1; |
| info.density[channel][slot] = |
| info.spd[channel][slot][DENSITY] & 0xF; |
| if (! |
| (info. |
| spd[channel][slot][MEMORY_BUS_WIDTH] & |
| 0x18)) |
| info.use_ecc = 0; |
| } |
| |
| gav(0x55); |
| |
| for (channel = 0; channel < NUM_CHANNELS; channel++) { |
| int v = 0; |
| for (slot = 0; slot < NUM_SLOTS; slot++) |
| for (rank = 0; rank < NUM_RANKS; rank++) |
| v |= info. |
| populated_ranks[channel][slot][rank] |
| << (2 * slot + rank); |
| info.populated_ranks_mask[channel] = v; |
| } |
| |
| gav(0x55); |
| |
| gav(pci_read_config32(NORTHBRIDGE, CAPID0 + 4)); |
| } |
| |
| /* after SPD */ |
| timestamp_add_now(102); |
| |
| mchbar_clrbits8(0x2ca8, 1 << 1 | 1 << 0); |
| |
| collect_system_info(&info); |
| calculate_timings(&info); |
| |
| if (!s3resume) { |
| u8 reg8 = pci_read_config8(SOUTHBRIDGE, GEN_PMCON_2); |
| if (x2ca8 == 0 && (reg8 & 0x80)) { |
| /* Don't enable S4-assertion stretch. Makes trouble on roda/rk9. |
| reg8 = pci_read_config8(PCI_DEV(0, 0x1f, 0), 0xa4); |
| pci_write_config8(PCI_DEV(0, 0x1f, 0), 0xa4, reg8 | 0x08); |
| */ |
| |
| /* Clear bit7. */ |
| |
| pci_write_config8(SOUTHBRIDGE, GEN_PMCON_2, |
| (reg8 & ~(1 << 7))); |
| |
| printk(BIOS_INFO, |
| "Interrupted RAM init, reset required.\n"); |
| system_reset(); |
| } |
| } |
| |
| if (!s3resume && x2ca8 == 0) |
| pci_write_config8(SOUTHBRIDGE, GEN_PMCON_2, |
| pci_read_config8(SOUTHBRIDGE, GEN_PMCON_2) | 0x80); |
| |
| compute_derived_timings(&info); |
| |
| early_quickpath_init(&info, x2ca8); |
| |
| info.cached_training = get_cached_training(); |
| |
| if (x2ca8 == 0) |
| late_quickpath_init(&info, s3resume); |
| |
| mchbar_setbits32(0x2c80, 1 << 24); |
| mchbar_write32(0x1804, mchbar_read32(0x1c04) & ~(1 << 27)); |
| |
| mchbar_read8(0x2ca8); // !!!! |
| |
| if (x2ca8 == 0) { |
| mchbar_clrbits8(0x2ca8, 3); |
| mchbar_write8(0x2ca8, mchbar_read8(0x2ca8) + 4); // "+" or "|"? |
| /* This issues a CPU reset without resetting the platform */ |
| printk(BIOS_DEBUG, "Issuing a CPU reset\n"); |
| /* Write back the S3 state to PM1_CNT to let the reset CPU |
| know it also needs to take the s3 path. */ |
| if (s3resume) |
| write_pmbase32(PM1_CNT, read_pmbase32(PM1_CNT) |
| | (SLP_TYP_S3 << 10)); |
| mchbar_setbits32(0x1af0, 1 << 4); |
| halt(); |
| } |
| |
| mchbar_clrbits8(0x2ca8, 0); // !!!! |
| |
| mchbar_clrbits32(0x2c80, 1 << 24); |
| |
| pci_write_config32(QPI_NON_CORE, MAX_RTIDS, 0x20220); |
| |
| { |
| u8 x2c20 = (mchbar_read16(0x2c20) >> 8) & 3; |
| u16 x2c10 = mchbar_read16(0x2c10); |
| u16 value = mchbar_read16(0x2c00); |
| if (x2c20 == 0 && (x2c10 & 0x300) == 0) |
| value |= (1 << 7); |
| else |
| value &= ~(1 << 0); |
| |
| mchbar_write16(0x2c00, value); |
| } |
| |
| udelay(1000); // !!!! |
| |
| write_1d0(0, 0x33d, 0, 0); |
| write_500(&info, 0, 0, 0xb61, 0, 0); |
| write_500(&info, 1, 0, 0xb61, 0, 0); |
| mchbar_write32(0x1a30, 0); |
| mchbar_write32(0x1a34, 0); |
| mchbar_write16(0x614, 0xb5b | (info.populated_ranks[1][0][0] * 0x404) | |
| (info.populated_ranks[0][0][0] * 0xa0)); |
| mchbar_write16(0x616, 0x26a); |
| mchbar_write32(0x134, 0x856000); |
| mchbar_write32(0x160, 0x5ffffff); |
| mchbar_clrsetbits32(0x114, ~0, 0xc2024440); // !!!! |
| mchbar_clrsetbits32(0x118, ~0, 0x4); // !!!! |
| for (channel = 0; channel < NUM_CHANNELS; channel++) |
| mchbar_write32(0x260 + (channel << 10), 0x30809ff | |
| (info.populated_ranks_mask[channel] & 3) << 20); |
| for (channel = 0; channel < NUM_CHANNELS; channel++) { |
| mchbar_write16(0x31c + (channel << 10), 0x101); |
| mchbar_write16(0x360 + (channel << 10), 0x909); |
| mchbar_write16(0x3a4 + (channel << 10), 0x101); |
| mchbar_write16(0x3e8 + (channel << 10), 0x101); |
| mchbar_write32(0x320 + (channel << 10), 0x29002900); |
| mchbar_write32(0x324 + (channel << 10), 0); |
| mchbar_write32(0x368 + (channel << 10), 0x32003200); |
| mchbar_write16(0x352 + (channel << 10), 0x505); |
| mchbar_write16(0x354 + (channel << 10), 0x3c3c); |
| mchbar_write16(0x356 + (channel << 10), 0x1040); |
| mchbar_write16(0x39a + (channel << 10), 0x73e4); |
| mchbar_write16(0x3de + (channel << 10), 0x77ed); |
| mchbar_write16(0x422 + (channel << 10), 0x1040); |
| } |
| |
| write_1d0(0x4, 0x151, 4, 1); |
| write_1d0(0, 0x142, 3, 1); |
| rdmsr(0x1ac); // !!!! |
| write_500(&info, 1, 1, 0x6b3, 4, 1); |
| write_500(&info, 1, 1, 0x6cf, 4, 1); |
| |
| rmw_1d0(0x21c, 0x38, 0, 6); |
| |
| write_1d0(((!info.populated_ranks[1][0][0]) << 1) | ((!info. |
| populated_ranks[0] |
| [0][0]) << 0), |
| 0x1d1, 3, 1); |
| for (channel = 0; channel < NUM_CHANNELS; channel++) { |
| mchbar_write16(0x38e + (channel << 10), 0x5f5f); |
| mchbar_write16(0x3d2 + (channel << 10), 0x5f5f); |
| } |
| |
| set_334(0); |
| |
| program_base_timings(&info); |
| |
| mchbar_setbits8(0x5ff, 1 << 7); |
| |
| write_1d0(0x2, 0x1d5, 2, 1); |
| write_1d0(0x20, 0x166, 7, 1); |
| write_1d0(0x0, 0xeb, 3, 1); |
| write_1d0(0x0, 0xf3, 6, 1); |
| |
| for (channel = 0; channel < NUM_CHANNELS; channel++) { |
| u8 a = 0; |
| if (info.populated_ranks[channel][0][1] && info.clock_speed_index > 1) |
| a = 3; |
| if (info.silicon_revision == 0 || info.silicon_revision == 1) |
| a = 3; |
| |
| for (lane = 0; lane < 9; lane++) { |
| const u16 addr = 0x125 + get_lane_offset(0, 0, lane); |
| rmw_500(&info, channel, addr, 6, 0xf, a); |
| } |
| } |
| |
| if (s3resume) { |
| if (!info.cached_training) { |
| u32 reg32; |
| printk(BIOS_ERR, |
| "Couldn't find training data. Rebooting\n"); |
| reg32 = inl(DEFAULT_PMBASE + 0x04); |
| outl(reg32 & ~(7 << 10), DEFAULT_PMBASE + 0x04); |
| full_reset(); |
| } |
| int tm; |
| info.training = *info.cached_training; |
| for (tm = 0; tm < 4; tm++) |
| for (channel = 0; channel < NUM_CHANNELS; channel++) |
| for (slot = 0; slot < NUM_SLOTS; slot++) |
| for (rank = 0; rank < NUM_RANKS; rank++) |
| for (lane = 0; lane < 9; lane++) |
| write_500(&info, |
| channel, |
| info.training. |
| lane_timings |
| [tm][channel] |
| [slot][rank] |
| [lane], |
| get_timing_register_addr |
| (lane, tm, |
| slot, rank), |
| 9, 0); |
| write_1d0(info.cached_training->reg_178, 0x178, 7, 1); |
| write_1d0(info.cached_training->reg_10b, 0x10b, 6, 1); |
| } |
| |
| mchbar_clrsetbits32(0x1f4, ~0, 1 << 17); // !!!! |
| mchbar_write32(0x1f0, 0x1d000200); |
| mchbar_setbits8(0x1f0, 1 << 0); |
| while (mchbar_read8(0x1f0) & 1) |
| ; |
| |
| program_board_delay(&info); |
| |
| mchbar_write8(0x5ff, 0); |
| mchbar_write8(0x5ff, 1 << 7); |
| mchbar_write8(0x5f4, 1 << 0); |
| |
| mchbar_clrbits32(0x130, 1 << 1); // | 2 when ? |
| while (mchbar_read32(0x130) & 1) |
| ; |
| |
| rmw_1d0(0x14b, 0x47, 0x30, 7); |
| rmw_1d0(0xd6, 0x38, 7, 6); |
| rmw_1d0(0x328, 0x38, 7, 6); |
| |
| for (channel = 0; channel < NUM_CHANNELS; channel++) |
| set_4cf(&info, channel, 1, 0); |
| |
| rmw_1d0(0x116, 0xe, 0, 4); |
| rmw_1d0(0xae, 0x3e, 0, 6); |
| rmw_1d0(0x300, 0x3e, 0, 6); |
| mchbar_clrbits16(0x356, 1 << 15); |
| mchbar_clrbits16(0x756, 1 << 15); |
| mchbar_clrbits32(0x140, 7 << 24); |
| mchbar_clrbits32(0x138, 7 << 24); |
| mchbar_write32(0x130, 0x31111301); |
| /* Wait until REG130b0 is 1. */ |
| while (mchbar_read32(0x130) & 1) |
| ; |
| |
| u8 value_a1; |
| { |
| const u8 val_xa1 = get_bits_420(read_1d0(0xa1, 6)); // = 0x1cf4040 // !!!! |
| const u8 val_2f3 = get_bits_420(read_1d0(0x2f3, 6)); // = 0x10a4040 // !!!! |
| value_a1 = val_xa1; |
| rmw_1d0(0x320, 0x38, val_2f3, 6); |
| rmw_1d0(0x14b, 0x78, val_xa1, 7); |
| rmw_1d0(0xce, 0x38, val_xa1, 6); |
| } |
| |
| for (channel = 0; channel < NUM_CHANNELS; channel++) |
| set_4cf(&info, channel, 1, 1); |
| |
| rmw_1d0(0x116, 0xe, 1, 4); // = 0x4040432 // !!!! |
| { |
| if ((mchbar_read32(0x144) & 0x1f) < 0x13) |
| value_a1 += 2; |
| else |
| value_a1 += 1; |
| |
| if (value_a1 > 7) |
| value_a1 = 7; |
| |
| write_1d0(2, 0xae, 6, 1); |
| write_1d0(2, 0x300, 6, 1); |
| write_1d0(value_a1, 0x121, 3, 1); |
| rmw_1d0(0xd6, 0x38, 4, 6); |
| rmw_1d0(0x328, 0x38, 4, 6); |
| } |
| |
| for (channel = 0; channel < NUM_CHANNELS; channel++) |
| set_4cf(&info, channel, 2, 0); |
| |
| mchbar_write32(0x130, 0x11111301 | info.populated_ranks[1][0][0] << 30 | |
| info.populated_ranks[0][0][0] << 29); |
| while (mchbar_read8(0x130) & 1) |
| ; |
| |
| { |
| const u8 val_xa1 = get_bits_420(read_1d0(0xa1, 6)); |
| read_1d0(0x2f3, 6); // = 0x10a4054 // !!!! |
| rmw_1d0(0x21c, 0x38, 0, 6); |
| rmw_1d0(0x14b, 0x78, val_xa1, 7); |
| } |
| |
| for (channel = 0; channel < NUM_CHANNELS; channel++) |
| set_4cf(&info, channel, 2, 1); |
| |
| set_334(1); |
| |
| mchbar_write8(0x1e8, 1 << 2); |
| |
| for (channel = 0; channel < NUM_CHANNELS; channel++) { |
| write_500(&info, channel, |
| 0x3 & ~(info.populated_ranks_mask[channel]), 0x6b7, 2, |
| 1); |
| write_500(&info, channel, 0x3, 0x69b, 2, 1); |
| } |
| mchbar_clrsetbits32(0x2d0, ~0xff0c01ff, 0x200000); |
| mchbar_write16(0x6c0, 0x14a0); |
| mchbar_clrsetbits32(0x6d0, ~0xff0000ff, 0x8000); |
| mchbar_write16(0x232, 1 << 3); |
| /* 0x40004 or 0 depending on ? */ |
| mchbar_clrsetbits32(0x234, 0x40004, 0x40004); |
| mchbar_clrsetbits32(0x34, 0x7, 5); |
| mchbar_write32(0x128, 0x2150d05); |
| mchbar_write8(0x12c, 0x1f); |
| mchbar_write8(0x12d, 0x56); |
| mchbar_write8(0x12e, 0x31); |
| mchbar_write8(0x12f, 0); |
| mchbar_write8(0x271, 1 << 1); |
| mchbar_write8(0x671, 1 << 1); |
| mchbar_write8(0x1e8, 1 << 2); |
| for (channel = 0; channel < NUM_CHANNELS; channel++) |
| mchbar_write32(0x294 + (channel << 10), |
| (info.populated_ranks_mask[channel] & 3) << 16); |
| mchbar_clrsetbits32(0x134, ~0xfc01ffff, 0x10000); |
| mchbar_clrsetbits32(0x134, ~0xfc85ffff, 0x850000); |
| for (channel = 0; channel < NUM_CHANNELS; channel++) |
| mchbar_clrsetbits32(0x260 + (channel << 10), 0xf << 20, 1 << 27 | |
| (info.populated_ranks_mask[channel] & 3) << 20); |
| |
| if (!s3resume) |
| jedec_init(&info); |
| |
| int totalrank = 0; |
| for (channel = 0; channel < NUM_CHANNELS; channel++) |
| for (slot = 0; slot < NUM_SLOTS; slot++) |
| for (rank = 0; rank < NUM_RANKS; rank++) |
| if (info.populated_ranks[channel][slot][rank]) { |
| jedec_read(&info, channel, slot, rank, |
| totalrank, 0xa, 0x400); |
| totalrank++; |
| } |
| |
| mchbar_write8(0x12c, 0x9f); |
| |
| mchbar_clrsetbits8(0x271, 0x3e, 0x0e); |
| mchbar_clrsetbits8(0x671, 0x3e, 0x0e); |
| |
| if (!s3resume) { |
| for (channel = 0; channel < NUM_CHANNELS; channel++) { |
| mchbar_write32(0x294 + (channel << 10), |
| (info.populated_ranks_mask[channel] & 3) << 16); |
| mchbar_write16(0x298 + (channel << 10), |
| info.populated_ranks[channel][0][0] | |
| info.populated_ranks[channel][0][1] << 5); |
| mchbar_write32(0x29c + (channel << 10), 0x77a); |
| } |
| mchbar_clrsetbits32(0x2c0, ~0, 0x6009cc00); // !!!! |
| |
| { |
| u8 a, b; |
| a = mchbar_read8(0x243); |
| b = mchbar_read8(0x643); |
| mchbar_write8(0x243, a | 2); |
| mchbar_write8(0x643, b | 2); |
| } |
| |
| write_1d0(7, 0x19b, 3, 1); |
| write_1d0(7, 0x1c0, 3, 1); |
| write_1d0(4, 0x1c6, 4, 1); |
| write_1d0(4, 0x1cc, 4, 1); |
| rmw_1d0(0x151, 0xf, 0x4, 4); |
| mchbar_write32(0x584, 0xfffff); |
| mchbar_write32(0x984, 0xfffff); |
| |
| for (channel = 0; channel < NUM_CHANNELS; channel++) |
| for (slot = 0; slot < NUM_SLOTS; slot++) |
| for (rank = 0; rank < NUM_RANKS; rank++) |
| if (info. |
| populated_ranks[channel][slot] |
| [rank]) |
| config_rank(&info, s3resume, |
| channel, slot, |
| rank); |
| |
| mchbar_write8(0x243, 1); |
| mchbar_write8(0x643, 1); |
| } |
| |
| /* was == 1 but is common */ |
| pci_write_config16(NORTHBRIDGE, 0xc8, 3); |
| write_26c(0, 0x820); |
| write_26c(1, 0x820); |
| mchbar_setbits32(0x130, 1 << 1); |
| /* end */ |
| |
| if (s3resume) { |
| for (channel = 0; channel < NUM_CHANNELS; channel++) { |
| mchbar_write32(0x294 + (channel << 10), |
| (info.populated_ranks_mask[channel] & 3) << 16); |
| mchbar_write16(0x298 + (channel << 10), |
| info.populated_ranks[channel][0][0] | |
| info.populated_ranks[channel][0][1] << 5); |
| mchbar_write32(0x29c + (channel << 10), 0x77a); |
| } |
| mchbar_clrsetbits32(0x2c0, ~0, 0x6009cc00); // !!!! |
| } |
| |
| mchbar_clrbits32(0xfa4, 1 << 24 | 1 << 1); |
| mchbar_write32(0xfb0, 0x2000e019); |
| |
| /* Before training. */ |
| timestamp_add_now(103); |
| |
| if (!s3resume) |
| ram_training(&info); |
| |
| /* After training. */ |
| timestamp_add_now(104); |
| |
| dump_timings(&info); |
| |
| program_modules_memory_map(&info, 0); |
| program_total_memory_map(&info); |
| |
| if (info.non_interleaved_part_mb != 0 && info.interleaved_part_mb != 0) |
| mchbar_write8(0x111, 0 << 2 | 1 << 5 | 1 << 6 | 0 << 7); |
| else if (have_match_ranks(&info, 0, 4) && have_match_ranks(&info, 1, 4)) |
| mchbar_write8(0x111, 3 << 2 | 1 << 5 | 0 << 6 | 1 << 7); |
| else if (have_match_ranks(&info, 0, 2) && have_match_ranks(&info, 1, 2)) |
| mchbar_write8(0x111, 3 << 2 | 1 << 5 | 0 << 6 | 0 << 7); |
| else |
| mchbar_write8(0x111, 3 << 2 | 1 << 5 | 1 << 6 | 0 << 7); |
| |
| mchbar_clrbits32(0xfac, 1 << 31); |
| mchbar_write32(0xfb4, 0x4800); |
| mchbar_write32(0xfb8, (info.revision < 8) ? 0x20 : 0x0); |
| mchbar_write32(0xe94, 0x7ffff); |
| mchbar_write32(0xfc0, 0x80002040); |
| mchbar_write32(0xfc4, 0x701246); |
| mchbar_clrbits8(0xfc8, 0x70); |
| mchbar_setbits32(0xe5c, 1 << 24); |
| mchbar_clrsetbits32(0x1a70, 3 << 20, 2 << 20); |
| mchbar_write32(0x50, 0x700b0); |
| mchbar_write32(0x3c, 0x10); |
| mchbar_clrsetbits8(0x1aa8, 0x3f, 0xa); |
| mchbar_setbits8(0xff4, 1 << 1); |
| mchbar_clrsetbits32(0xff8, 0xe008, 0x1020); |
| |
| mchbar_write32(0xd00, IOMMU_BASE2 | 1); |
| mchbar_write32(0xd40, IOMMU_BASE1 | 1); |
| mchbar_write32(0xdc0, IOMMU_BASE4 | 1); |
| |
| write32p(IOMMU_BASE1 | 0xffc, 0x80000000); |
| write32p(IOMMU_BASE2 | 0xffc, 0xc0000000); |
| write32p(IOMMU_BASE4 | 0xffc, 0x80000000); |
| |
| { |
| u32 eax; |
| |
| eax = info.fsb_frequency / 9; |
| mchbar_clrsetbits32(0xfcc, 0x3ffff, |
| (eax * 0x280) | (eax * 0x5000) | eax | 0x40000); |
| mchbar_write32(0x20, 0x33001); |
| } |
| |
| for (channel = 0; channel < NUM_CHANNELS; channel++) { |
| mchbar_clrbits32(0x220 + (channel << 10), 0x7770); |
| if (info.max_slots_used_in_channel == 1) |
| mchbar_setbits16(0x237 + (channel << 10), 0x0201); |
| else |
| mchbar_clrbits16(0x237 + (channel << 10), 0x0201); |
| |
| mchbar_setbits8(0x241 + (channel << 10), 1 << 0); |
| |
| if (info.clock_speed_index <= 1 && (info.silicon_revision == 2 |
| || info.silicon_revision == 3)) |
| mchbar_setbits32(0x248 + (channel << 10), 0x00102000); |
| else |
| mchbar_clrbits32(0x248 + (channel << 10), 0x00102000); |
| } |
| |
| mchbar_setbits32(0x115, 1 << 24); |
| |
| { |
| u8 al; |
| al = 0xd; |
| if (!(info.silicon_revision == 0 || info.silicon_revision == 1)) |
| al += 2; |
| al |= ((1 << (info.max_slots_used_in_channel - 1)) - 1) << 4; |
| mchbar_write32(0x210, al << 16 | 0x20); |
| } |
| |
| for (channel = 0; channel < NUM_CHANNELS; channel++) { |
| mchbar_write32(0x288 + (channel << 10), 0x70605040); |
| mchbar_write32(0x28c + (channel << 10), 0xfffec080); |
| mchbar_write32(0x290 + (channel << 10), 0x282091c | |
| (info.max_slots_used_in_channel - 1) << 0x16); |
| } |
| u32 reg1c; |
| pci_read_config32(NORTHBRIDGE, 0x40); // = DEFAULT_EPBAR | 0x001 // OK |
| reg1c = epbar_read32(EPVC1RCAP); // = 0x8001 // OK |
| pci_read_config32(NORTHBRIDGE, 0x40); // = DEFAULT_EPBAR | 0x001 // OK |
| epbar_write32(EPVC1RCAP, reg1c); // OK |
| mchbar_read8(0xe08); // = 0x0 |
| pci_read_config32(NORTHBRIDGE, 0xe4); // = 0x316126 |
| mchbar_setbits8(0x1210, 1 << 1); |
| mchbar_write32(0x1200, 0x8800440); |
| mchbar_write32(0x1204, 0x53ff0453); |
| mchbar_write32(0x1208, 0x19002043); |
| mchbar_write16(0x1214, 0x320); |
| |
| if (info.revision == 0x10 || info.revision == 0x11) { |
| mchbar_write16(0x1214, 0x220); |
| mchbar_setbits8(0x1210, 1 << 6); |
| } |
| |
| mchbar_setbits8(0x1214, 1 << 2); |
| mchbar_write8(0x120c, 1); |
| mchbar_write8(0x1218, 3); |
| mchbar_write8(0x121a, 3); |
| mchbar_write8(0x121c, 3); |
| mchbar_write16(0xc14, 0); |
| mchbar_write16(0xc20, 0); |
| mchbar_write32(0x1c, 0); |
| |
| /* revision dependent here. */ |
| |
| mchbar_setbits16(0x1230, 0x1f07); |
| |
| if (info.uma_enabled) |
| mchbar_setbits32(0x11f4, 1 << 28); |
| |
| mchbar_setbits16(0x1230, 1 << 15); |
| mchbar_setbits8(0x1214, 1 << 0); |
| |
| u8 bl, ebpb; |
| u16 reg_1020; |
| |
| reg_1020 = mchbar_read32(0x1020); // = 0x6c733c // OK |
| mchbar_write8(0x1070, 1); |
| |
| mchbar_write32(0x1000, 0x100); |
| mchbar_write8(0x1007, 0); |
| |
| if (reg_1020 != 0) { |
| mchbar_write16(0x1018, 0); |
| bl = reg_1020 >> 8; |
| ebpb = reg_1020 & 0xff; |
| } else { |
| ebpb = 0; |
| bl = 8; |
| } |
| |
| rdmsr(0x1a2); |
| |
| mchbar_write32(0x1014, 0xffffffff); |
| |
| mchbar_write32(0x1010, ((((ebpb + 0x7d) << 7) / bl) & 0xff) * !!reg_1020); |
| |
| mchbar_write8(0x101c, 0xb8); |
| |
| mchbar_clrsetbits8(0x123e, 0xf0, 0x60); |
| if (reg_1020 != 0) { |
| mchbar_clrsetbits32(0x123c, 0xf << 20, 0x6 << 20); |
| mchbar_write8(0x101c, 0xb8); |
| } |
| |
| const u64 heci_uma_addr = |
| ((u64) |
| ((((u64)pci_read_config16(NORTHBRIDGE, TOM)) << 6) - |
| info.memory_reserved_for_heci_mb)) << 20; |
| |
| setup_heci_uma(heci_uma_addr, info.memory_reserved_for_heci_mb); |
| |
| if (info.uma_enabled) { |
| u16 ax; |
| mchbar_setbits32(0x11b0, 1 << 14); |
| mchbar_setbits32(0x11b4, 1 << 14); |
| mchbar_setbits16(0x1190, 1 << 14); |
| |
| ax = mchbar_read16(0x1190) & 0xf00; // = 0x480a // OK |
| mchbar_write16(0x1170, ax | (mchbar_read16(0x1170) & 0x107f) | 0x4080); |
| mchbar_setbits16(0x1170, 1 << 12); |
| |
| udelay(1000); |
| |
| u16 ecx; |
| for (ecx = 0xffff; ecx && (mchbar_read16(0x1170) & (1 << 12)); ecx--) |
| ; |
| mchbar_clrbits16(0x1190, 1 << 14); |
| } |
| |
| pci_write_config8(SOUTHBRIDGE, GEN_PMCON_2, |
| pci_read_config8(SOUTHBRIDGE, GEN_PMCON_2) & ~0x80); |
| udelay(10000); |
| mchbar_write16(0x2ca8, 1 << 3); |
| |
| udelay(1000); |
| dump_timings(&info); |
| cbmem_wasnot_inited = cbmem_recovery(s3resume); |
| |
| if (!s3resume) |
| save_timings(&info); |
| if (s3resume && cbmem_wasnot_inited) { |
| printk(BIOS_ERR, "Failed S3 resume.\n"); |
| ram_check_nodie(1 * MiB); |
| |
| /* Failed S3 resume, reset to come up cleanly */ |
| full_reset(); |
| } |
| } |