| /* |
| * This file is part of the coreboot project. |
| * |
| * Copyright (C) 2013 Vladimir Serbinenko. |
| * |
| * This program is free software; you can redistribute it and/or modify |
| * it under the terms of the GNU General Public License as published by |
| * the Free Software Foundation; either version 2 of the License, or |
| * (at your option) any later version. |
| * |
| * This program is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| * GNU General Public License for more details. |
| */ |
| |
| /* Please don't remove this. It's needed it to do debugging |
| and reverse engineering to support in futur more nehalem variants. */ |
| #ifndef REAL |
| #define REAL 1 |
| #endif |
| |
| #if REAL |
| #include <stdlib.h> |
| #include <console/console.h> |
| #include <string.h> |
| #include <arch/io.h> |
| #include <cpu/x86/msr.h> |
| #include <cbmem.h> |
| #include <arch/cbfs.h> |
| #include <cbfs.h> |
| #include <ip_checksum.h> |
| #include <pc80/mc146818rtc.h> |
| #include <device/pci_def.h> |
| #include <arch/cpu.h> |
| #include <halt.h> |
| #include <spd.h> |
| #include "raminit.h" |
| #include <timestamp.h> |
| #include <cpu/x86/mtrr.h> |
| #include <cpu/intel/speedstep.h> |
| #include <cpu/intel/turbo.h> |
| #endif |
| |
| #if !REAL |
| typedef unsigned char u8; |
| typedef unsigned short u16; |
| typedef unsigned int u32; |
| typedef u32 device_t; |
| #endif |
| |
| #include "nehalem.h" |
| |
| #include "southbridge/intel/ibexpeak/me.h" |
| |
| #if REAL |
| #include <delay.h> |
| #endif |
| |
| #define NORTHBRIDGE PCI_DEV(0, 0, 0) |
| #define SOUTHBRIDGE PCI_DEV(0, 0x1f, 0) |
| #define GMA PCI_DEV (0, 0x2, 0x0) |
| #define HECIDEV PCI_DEV(0, 0x16, 0) |
| #define HECIBAR 0x10 |
| |
| #define FOR_ALL_RANKS \ |
| for (channel = 0; channel < NUM_CHANNELS; channel++) \ |
| for (slot = 0; slot < NUM_SLOTS; slot++) \ |
| for (rank = 0; rank < NUM_RANKS; rank++) |
| |
| #define FOR_POPULATED_RANKS \ |
| for (channel = 0; channel < NUM_CHANNELS; channel++) \ |
| for (slot = 0; slot < NUM_SLOTS; slot++) \ |
| for (rank = 0; rank < NUM_RANKS; rank++) \ |
| if (info->populated_ranks[channel][slot][rank]) |
| |
| #define FOR_POPULATED_RANKS_BACKWARDS \ |
| for (channel = NUM_CHANNELS - 1; channel >= 0; channel--) \ |
| for (slot = 0; slot < NUM_SLOTS; slot++) \ |
| for (rank = 0; rank < NUM_RANKS; rank++) \ |
| if (info->populated_ranks[channel][slot][rank]) |
| |
| /* [REG_178][CHANNEL][2 * SLOT + RANK][LANE] */ |
| typedef struct { |
| u8 smallest; |
| u8 largest; |
| } timing_bounds_t[2][2][2][9]; |
| |
| struct ram_training { |
| /* [TM][CHANNEL][SLOT][RANK][LANE] */ |
| u16 lane_timings[4][2][2][2][9]; |
| u16 reg_178; |
| u16 reg_10b; |
| |
| u8 reg178_center; |
| u8 reg178_smallest; |
| u8 reg178_largest; |
| timing_bounds_t timing_bounds[2]; |
| u16 timing_offset[2][2][2][9]; |
| u16 timing2_offset[2][2][2][9]; |
| u16 timing2_bounds[2][2][2][9][2]; |
| u8 reg274265[2][3]; /* [CHANNEL][REGISTER] */ |
| u8 reg2ca9_bit0; |
| u32 reg_6dc; |
| u32 reg_6e8; |
| }; |
| |
| #if !REAL |
| #include "raminit_fake.c" |
| #else |
| |
| #include <lib.h> /* Prototypes */ |
| |
| static inline void write_mchbar32(u32 addr, u32 val) |
| { |
| MCHBAR32(addr) = val; |
| } |
| |
| static inline void write_mchbar16(u32 addr, u16 val) |
| { |
| MCHBAR16(addr) = val; |
| } |
| |
| static inline void write_mchbar8(u32 addr, u8 val) |
| { |
| MCHBAR8(addr) = val; |
| } |
| |
| |
| static inline u32 read_mchbar32(u32 addr) |
| { |
| return MCHBAR32(addr); |
| } |
| |
| static inline u16 read_mchbar16(u32 addr) |
| { |
| return MCHBAR16(addr); |
| } |
| |
| static inline u8 read_mchbar8(u32 addr) |
| { |
| return MCHBAR8(addr); |
| } |
| |
| static void clflush(u32 addr) |
| { |
| asm volatile ("clflush (%0)"::"r" (addr)); |
| } |
| |
| typedef struct _u128 { |
| u64 lo; |
| u64 hi; |
| } u128; |
| |
| static void read128(u32 addr, u64 * out) |
| { |
| u128 ret; |
| u128 stor; |
| asm volatile ("movdqu %%xmm0, %0\n" |
| "movdqa (%2), %%xmm0\n" |
| "movdqu %%xmm0, %1\n" |
| "movdqu %0, %%xmm0":"+m" (stor), "=m"(ret):"r"(addr)); |
| out[0] = ret.lo; |
| out[1] = ret.hi; |
| } |
| |
| #endif |
| |
| /* OK */ |
| static void write_1d0(u32 val, u16 addr, int bits, int flag) |
| { |
| write_mchbar32(0x1d0, 0); |
| while (read_mchbar32(0x1d0) & 0x800000) ; |
| write_mchbar32(0x1d4, |
| (val & ((1 << bits) - 1)) | (2 << bits) | (flag << |
| bits)); |
| write_mchbar32(0x1d0, 0x40000000 | addr); |
| while (read_mchbar32(0x1d0) & 0x800000) ; |
| } |
| |
| /* OK */ |
| static u16 read_1d0(u16 addr, int split) |
| { |
| u32 val; |
| write_mchbar32(0x1d0, 0); |
| while (read_mchbar32(0x1d0) & 0x800000) ; |
| write_mchbar32(0x1d0, |
| 0x80000000 | (((read_mchbar8(0x246) >> 2) & 3) + |
| 0x361 - addr)); |
| while (read_mchbar32(0x1d0) & 0x800000) ; |
| val = read_mchbar32(0x1d8); |
| write_1d0(0, 0x33d, 0, 0); |
| write_1d0(0, 0x33d, 0, 0); |
| val &= ((1 << split) - 1); |
| // printk (BIOS_ERR, "R1D0C [%x] => %x\n", addr, val); |
| return val; |
| } |
| |
| static void write32p(uintptr_t addr, uint32_t val) |
| { |
| write32((void *)addr, val); |
| } |
| |
| static uint32_t read32p(uintptr_t addr) |
| { |
| return read32((void *)addr); |
| } |
| |
| static void sfence(void) |
| { |
| #if REAL |
| asm volatile ("sfence"); |
| #endif |
| } |
| |
| static inline u16 get_lane_offset(int slot, int rank, int lane) |
| { |
| return 0x124 * lane + ((lane & 4) ? 0x23e : 0) + 11 * rank + 22 * slot - |
| 0x452 * (lane == 8); |
| } |
| |
| static inline u16 get_timing_register_addr(int lane, int tm, int slot, int rank) |
| { |
| const u16 offs[] = { 0x1d, 0xa8, 0xe6, 0x5c }; |
| return get_lane_offset(slot, rank, lane) + offs[(tm + 3) % 4]; |
| } |
| |
| #if REAL |
| static u32 gav_real(int line, u32 in) |
| { |
| // printk (BIOS_DEBUG, "%d: GAV: %x\n", line, in); |
| return in; |
| } |
| |
| #define gav(x) gav_real (__LINE__, (x)) |
| #endif |
| struct raminfo { |
| u16 clock_speed_index; /* clock_speed (REAL, not DDR) / 133.(3) - 3 */ |
| u16 fsb_frequency; /* in 1.(1)/2 MHz. */ |
| u8 is_x16_module[2][2]; /* [CHANNEL][SLOT] */ |
| u8 density[2][2]; /* [CHANNEL][SLOT] */ |
| u8 populated_ranks[2][2][2]; /* [CHANNEL][SLOT][RANK] */ |
| int rank_start[2][2][2]; |
| u8 cas_latency; |
| u8 board_lane_delay[9]; |
| u8 use_ecc; |
| u8 revision; |
| u8 max_supported_clock_speed_index; |
| u8 uma_enabled; |
| u8 spd[2][2][151]; /* [CHANNEL][SLOT][BYTE] */ |
| u8 silicon_revision; |
| u8 populated_ranks_mask[2]; |
| u8 max_slots_used_in_channel; |
| u8 mode4030[2]; |
| u16 avg4044[2]; |
| u16 max4048[2]; |
| unsigned total_memory_mb; |
| unsigned interleaved_part_mb; |
| unsigned non_interleaved_part_mb; |
| |
| u32 heci_bar; |
| u64 heci_uma_addr; |
| unsigned memory_reserved_for_heci_mb; |
| |
| struct ram_training training; |
| u32 last_500_command[2]; |
| |
| u32 delay46_ps[2]; |
| u32 delay54_ps[2]; |
| u8 revision_flag_1; |
| u8 some_delay_1_cycle_floor; |
| u8 some_delay_2_halfcycles_ceil; |
| u8 some_delay_3_ps_rounded; |
| |
| const struct ram_training *cached_training; |
| }; |
| |
| static void |
| write_500(struct raminfo *info, int channel, u32 val, u16 addr, int bits, |
| int flag); |
| |
| /* OK */ |
| static u16 |
| read_500(struct raminfo *info, int channel, u16 addr, int split) |
| { |
| u32 val; |
| info->last_500_command[channel] = 0x80000000; |
| write_mchbar32(0x500 + (channel << 10), 0); |
| while (read_mchbar32(0x500 + (channel << 10)) & 0x800000) ; |
| write_mchbar32(0x500 + (channel << 10), |
| 0x80000000 | |
| (((read_mchbar8(0x246 + (channel << 10)) >> 2) & |
| 3) + 0xb88 - addr)); |
| while (read_mchbar32(0x500 + (channel << 10)) & 0x800000) ; |
| val = read_mchbar32(0x508 + (channel << 10)); |
| return val & ((1 << split) - 1); |
| } |
| |
| /* OK */ |
| static void |
| write_500(struct raminfo *info, int channel, u32 val, u16 addr, int bits, |
| int flag) |
| { |
| if (info->last_500_command[channel] == 0x80000000) { |
| info->last_500_command[channel] = 0x40000000; |
| write_500(info, channel, 0, 0xb61, 0, 0); |
| } |
| write_mchbar32(0x500 + (channel << 10), 0); |
| while (read_mchbar32(0x500 + (channel << 10)) & 0x800000) ; |
| write_mchbar32(0x504 + (channel << 10), |
| (val & ((1 << bits) - 1)) | (2 << bits) | (flag << |
| bits)); |
| write_mchbar32(0x500 + (channel << 10), 0x40000000 | addr); |
| while (read_mchbar32(0x500 + (channel << 10)) & 0x800000) ; |
| } |
| |
| static int rw_test(int rank) |
| { |
| const u32 mask = 0xf00fc33c; |
| int ok = 0xff; |
| int i; |
| for (i = 0; i < 64; i++) |
| write32p((rank << 28) | (i << 2), 0); |
| sfence(); |
| for (i = 0; i < 64; i++) |
| gav(read32p((rank << 28) | (i << 2))); |
| sfence(); |
| for (i = 0; i < 32; i++) { |
| u32 pat = (((mask >> i) & 1) ? 0xffffffff : 0); |
| write32p((rank << 28) | (i << 3), pat); |
| write32p((rank << 28) | (i << 3) | 4, pat); |
| } |
| sfence(); |
| for (i = 0; i < 32; i++) { |
| u8 pat = (((mask >> i) & 1) ? 0xff : 0); |
| int j; |
| u32 val; |
| gav(val = read32p((rank << 28) | (i << 3))); |
| for (j = 0; j < 4; j++) |
| if (((val >> (j * 8)) & 0xff) != pat) |
| ok &= ~(1 << j); |
| gav(val = read32p((rank << 28) | (i << 3) | 4)); |
| for (j = 0; j < 4; j++) |
| if (((val >> (j * 8)) & 0xff) != pat) |
| ok &= ~(16 << j); |
| } |
| sfence(); |
| for (i = 0; i < 64; i++) |
| write32p((rank << 28) | (i << 2), 0); |
| sfence(); |
| for (i = 0; i < 64; i++) |
| gav(read32p((rank << 28) | (i << 2))); |
| |
| return ok; |
| } |
| |
| static void |
| program_timings(struct raminfo *info, u16 base, int channel, int slot, int rank) |
| { |
| int lane; |
| for (lane = 0; lane < 8; lane++) { |
| write_500(info, channel, |
| base + |
| info->training. |
| lane_timings[2][channel][slot][rank][lane], |
| get_timing_register_addr(lane, 2, slot, rank), 9, 0); |
| write_500(info, channel, |
| base + |
| info->training. |
| lane_timings[3][channel][slot][rank][lane], |
| get_timing_register_addr(lane, 3, slot, rank), 9, 0); |
| } |
| } |
| |
| static void write_26c(int channel, u16 si) |
| { |
| write_mchbar32(0x26c + (channel << 10), 0x03243f35); |
| write_mchbar32(0x268 + (channel << 10), 0xcfc00000 | (si << 9)); |
| write_mchbar16(0x2b9 + (channel << 10), si); |
| } |
| |
| static u32 get_580(int channel, u8 addr) |
| { |
| u32 ret; |
| gav(read_1d0(0x142, 3)); |
| write_mchbar8(0x5ff, 0x0); /* OK */ |
| write_mchbar8(0x5ff, 0x80); /* OK */ |
| write_mchbar32(0x580 + (channel << 10), 0x8493c012 | addr); |
| write_mchbar8(0x580 + (channel << 10), |
| read_mchbar8(0x580 + (channel << 10)) | 1); |
| while (!((ret = read_mchbar32(0x580 + (channel << 10))) & 0x10000)) ; |
| write_mchbar8(0x580 + (channel << 10), |
| read_mchbar8(0x580 + (channel << 10)) & ~1); |
| return ret; |
| } |
| |
| const int cached_config = 0; |
| |
| #define NUM_CHANNELS 2 |
| #define NUM_SLOTS 2 |
| #define NUM_RANKS 2 |
| #define RANK_SHIFT 28 |
| #define CHANNEL_SHIFT 10 |
| |
| #include "raminit_tables.c" |
| |
| static void seq9(struct raminfo *info, int channel, int slot, int rank) |
| { |
| int i, lane; |
| |
| for (i = 0; i < 2; i++) |
| for (lane = 0; lane < 8; lane++) |
| write_500(info, channel, |
| info->training.lane_timings[i + |
| 1][channel][slot] |
| [rank][lane], get_timing_register_addr(lane, |
| i + 1, |
| slot, |
| rank), |
| 9, 0); |
| |
| write_1d0(1, 0x103, 6, 1); |
| for (lane = 0; lane < 8; lane++) |
| write_500(info, channel, |
| info->training. |
| lane_timings[0][channel][slot][rank][lane], |
| get_timing_register_addr(lane, 0, slot, rank), 9, 0); |
| |
| for (i = 0; i < 2; i++) { |
| for (lane = 0; lane < 8; lane++) |
| write_500(info, channel, |
| info->training.lane_timings[i + |
| 1][channel][slot] |
| [rank][lane], get_timing_register_addr(lane, |
| i + 1, |
| slot, |
| rank), |
| 9, 0); |
| gav(get_580(channel, ((i + 1) << 2) | (rank << 5))); |
| } |
| |
| gav(read_1d0(0x142, 3)); // = 0x10408118 |
| write_mchbar8(0x5ff, 0x0); /* OK */ |
| write_mchbar8(0x5ff, 0x80); /* OK */ |
| write_1d0(0x2, 0x142, 3, 1); |
| for (lane = 0; lane < 8; lane++) { |
| // printk (BIOS_ERR, "before: %x\n", info->training.lane_timings[2][channel][slot][rank][lane]); |
| info->training.lane_timings[2][channel][slot][rank][lane] = |
| read_500(info, channel, |
| get_timing_register_addr(lane, 2, slot, rank), 9); |
| //printk (BIOS_ERR, "after: %x\n", info->training.lane_timings[2][channel][slot][rank][lane]); |
| info->training.lane_timings[3][channel][slot][rank][lane] = |
| info->training.lane_timings[2][channel][slot][rank][lane] + |
| 0x20; |
| } |
| } |
| |
| static int count_ranks_in_channel(struct raminfo *info, int channel) |
| { |
| int slot, rank; |
| int res = 0; |
| for (slot = 0; slot < NUM_SLOTS; slot++) |
| for (rank = 0; rank < NUM_SLOTS; rank++) |
| res += info->populated_ranks[channel][slot][rank]; |
| return res; |
| } |
| |
| static void |
| config_rank(struct raminfo *info, int s3resume, int channel, int slot, int rank) |
| { |
| int add; |
| |
| write_1d0(0, 0x178, 7, 1); |
| seq9(info, channel, slot, rank); |
| program_timings(info, 0x80, channel, slot, rank); |
| |
| if (channel == 0) |
| add = count_ranks_in_channel(info, 1); |
| else |
| add = 0; |
| if (!s3resume) |
| gav(rw_test(rank + add)); |
| program_timings(info, 0x00, channel, slot, rank); |
| if (!s3resume) |
| gav(rw_test(rank + add)); |
| if (!s3resume) |
| gav(rw_test(rank + add)); |
| write_1d0(0, 0x142, 3, 1); |
| write_1d0(0, 0x103, 6, 1); |
| |
| gav(get_580(channel, 0xc | (rank << 5))); |
| gav(read_1d0(0x142, 3)); |
| |
| write_mchbar8(0x5ff, 0x0); /* OK */ |
| write_mchbar8(0x5ff, 0x80); /* OK */ |
| } |
| |
| static void set_4cf(struct raminfo *info, int channel, u8 val) |
| { |
| gav(read_500(info, channel, 0x4cf, 4)); // = 0xc2300cf9 |
| write_500(info, channel, val, 0x4cf, 4, 1); |
| gav(read_500(info, channel, 0x659, 4)); // = 0x80300839 |
| write_500(info, channel, val, 0x659, 4, 1); |
| gav(read_500(info, channel, 0x697, 4)); // = 0x80300839 |
| write_500(info, channel, val, 0x697, 4, 1); |
| } |
| |
| static void set_334(int zero) |
| { |
| int j, k, channel; |
| const u32 val3[] = { 0x2a2b2a2b, 0x26272627, 0x2e2f2e2f, 0x2a2b }; |
| u32 vd8[2][16]; |
| |
| for (channel = 0; channel < NUM_CHANNELS; channel++) { |
| for (j = 0; j < 4; j++) { |
| u32 a = (j == 1) ? 0x29292929 : 0x31313131; |
| u32 lmask = (j == 3) ? 0xffff : 0xffffffff; |
| u16 c; |
| if ((j == 0 || j == 3) && zero) |
| c = 0; |
| else if (j == 3) |
| c = 0x5f; |
| else |
| c = 0x5f5f; |
| |
| for (k = 0; k < 2; k++) { |
| write_mchbar32(0x138 + 8 * k, |
| (channel << 26) | (j << 24)); |
| gav(vd8[1][(channel << 3) | (j << 1) | k] = |
| read_mchbar32(0x138 + 8 * k)); |
| gav(vd8[0][(channel << 3) | (j << 1) | k] = |
| read_mchbar32(0x13c + 8 * k)); |
| } |
| |
| write_mchbar32(0x334 + (channel << 10) + (j * 0x44), |
| zero ? 0 : val3[j]); |
| write_mchbar32(0x32c + (channel << 10) + (j * 0x44), |
| zero ? 0 : (0x18191819 & lmask)); |
| write_mchbar16(0x34a + (channel << 10) + (j * 0x44), c); |
| write_mchbar32(0x33c + (channel << 10) + (j * 0x44), |
| zero ? 0 : (a & lmask)); |
| write_mchbar32(0x344 + (channel << 10) + (j * 0x44), |
| zero ? 0 : (a & lmask)); |
| } |
| } |
| |
| write_mchbar32(0x130, read_mchbar32(0x130) | 1); /* OK */ |
| while (read_mchbar8(0x130) & 1) ; /* OK */ |
| } |
| |
| static void rmw_1d0(u16 addr, u32 and, u32 or, int split, int flag) |
| { |
| u32 v; |
| v = read_1d0(addr, split); |
| write_1d0((v & and) | or, addr, split, flag); |
| } |
| |
| static int find_highest_bit_set(u16 val) |
| { |
| int i; |
| for (i = 15; i >= 0; i--) |
| if (val & (1 << i)) |
| return i; |
| return -1; |
| } |
| |
| static int find_lowest_bit_set32(u32 val) |
| { |
| int i; |
| for (i = 0; i < 32; i++) |
| if (val & (1 << i)) |
| return i; |
| return -1; |
| } |
| |
| enum { |
| DEVICE_TYPE = 2, |
| MODULE_TYPE = 3, |
| DENSITY = 4, |
| RANKS_AND_DQ = 7, |
| MEMORY_BUS_WIDTH = 8, |
| TIMEBASE_DIVIDEND = 10, |
| TIMEBASE_DIVISOR = 11, |
| CYCLETIME = 12, |
| |
| CAS_LATENCIES_LSB = 14, |
| CAS_LATENCIES_MSB = 15, |
| CAS_LATENCY_TIME = 16, |
| THERMAL_AND_REFRESH = 31, |
| REFERENCE_RAW_CARD_USED = 62, |
| RANK1_ADDRESS_MAPPING = 63 |
| }; |
| |
| static void calculate_timings(struct raminfo *info) |
| { |
| unsigned cycletime; |
| unsigned cas_latency_time; |
| unsigned supported_cas_latencies; |
| unsigned channel, slot; |
| unsigned clock_speed_index; |
| unsigned min_cas_latency; |
| unsigned cas_latency; |
| unsigned max_clock_index; |
| |
| /* Find common CAS latency */ |
| supported_cas_latencies = 0x3fe; |
| for (channel = 0; channel < NUM_CHANNELS; channel++) |
| for (slot = 0; slot < NUM_SLOTS; slot++) |
| if (info->populated_ranks[channel][slot][0]) |
| supported_cas_latencies &= |
| 2 * |
| (info-> |
| spd[channel][slot][CAS_LATENCIES_LSB] | |
| (info-> |
| spd[channel][slot][CAS_LATENCIES_MSB] << |
| 8)); |
| |
| max_clock_index = min(3, info->max_supported_clock_speed_index); |
| |
| cycletime = min_cycletime[max_clock_index]; |
| cas_latency_time = min_cas_latency_time[max_clock_index]; |
| |
| for (channel = 0; channel < NUM_CHANNELS; channel++) |
| for (slot = 0; slot < NUM_SLOTS; slot++) |
| if (info->populated_ranks[channel][slot][0]) { |
| unsigned timebase; |
| timebase = |
| 1000 * |
| info-> |
| spd[channel][slot][TIMEBASE_DIVIDEND] / |
| info->spd[channel][slot][TIMEBASE_DIVISOR]; |
| cycletime = |
| max(cycletime, |
| timebase * |
| info->spd[channel][slot][CYCLETIME]); |
| cas_latency_time = |
| max(cas_latency_time, |
| timebase * |
| info-> |
| spd[channel][slot][CAS_LATENCY_TIME]); |
| } |
| for (clock_speed_index = 0; clock_speed_index < 3; clock_speed_index++) { |
| if (cycletime == min_cycletime[clock_speed_index]) |
| break; |
| if (cycletime > min_cycletime[clock_speed_index]) { |
| clock_speed_index--; |
| cycletime = min_cycletime[clock_speed_index]; |
| break; |
| } |
| } |
| min_cas_latency = CEIL_DIV(cas_latency_time, cycletime); |
| cas_latency = 0; |
| while (supported_cas_latencies) { |
| cas_latency = find_highest_bit_set(supported_cas_latencies) + 3; |
| if (cas_latency <= min_cas_latency) |
| break; |
| supported_cas_latencies &= |
| ~(1 << find_highest_bit_set(supported_cas_latencies)); |
| } |
| |
| if (cas_latency != min_cas_latency && clock_speed_index) |
| clock_speed_index--; |
| |
| if (cas_latency * min_cycletime[clock_speed_index] > 20000) |
| die("Couldn't configure DRAM"); |
| info->clock_speed_index = clock_speed_index; |
| info->cas_latency = cas_latency; |
| } |
| |
| static void program_base_timings(struct raminfo *info) |
| { |
| unsigned channel; |
| unsigned slot, rank, lane; |
| unsigned extended_silicon_revision; |
| int i; |
| |
| extended_silicon_revision = info->silicon_revision; |
| if (info->silicon_revision == 0) |
| for (channel = 0; channel < NUM_CHANNELS; channel++) |
| for (slot = 0; slot < NUM_SLOTS; slot++) |
| if ((info-> |
| spd[channel][slot][MODULE_TYPE] & 0xF) == |
| 3) |
| extended_silicon_revision = 4; |
| |
| for (channel = 0; channel < NUM_CHANNELS; channel++) { |
| for (slot = 0; slot < NUM_SLOTS; slot++) |
| for (rank = 0; rank < NUM_SLOTS; rank++) { |
| int card_timing_2; |
| if (!info->populated_ranks[channel][slot][rank]) |
| continue; |
| |
| for (lane = 0; lane < 9; lane++) { |
| int tm_reg; |
| int card_timing; |
| |
| card_timing = 0; |
| if ((info-> |
| spd[channel][slot][MODULE_TYPE] & |
| 0xF) == 3) { |
| int reference_card; |
| reference_card = |
| info-> |
| spd[channel][slot] |
| [REFERENCE_RAW_CARD_USED] & |
| 0x1f; |
| if (reference_card == 3) |
| card_timing = |
| u16_ffd1188[0][lane] |
| [info-> |
| clock_speed_index]; |
| if (reference_card == 5) |
| card_timing = |
| u16_ffd1188[1][lane] |
| [info-> |
| clock_speed_index]; |
| } |
| |
| info->training. |
| lane_timings[0][channel][slot][rank] |
| [lane] = |
| u8_FFFD1218[info-> |
| clock_speed_index]; |
| info->training. |
| lane_timings[1][channel][slot][rank] |
| [lane] = 256; |
| |
| for (tm_reg = 2; tm_reg < 4; tm_reg++) |
| info->training. |
| lane_timings[tm_reg] |
| [channel][slot][rank][lane] |
| = |
| u8_FFFD1240[channel] |
| [extended_silicon_revision] |
| [lane][2 * slot + |
| rank][info-> |
| clock_speed_index] |
| + info->max4048[channel] |
| + |
| u8_FFFD0C78[channel] |
| [extended_silicon_revision] |
| [info-> |
| mode4030[channel]][slot] |
| [rank][info-> |
| clock_speed_index] |
| + card_timing; |
| for (tm_reg = 0; tm_reg < 4; tm_reg++) |
| write_500(info, channel, |
| info->training. |
| lane_timings[tm_reg] |
| [channel][slot][rank] |
| [lane], |
| get_timing_register_addr |
| (lane, tm_reg, slot, |
| rank), 9, 0); |
| } |
| |
| card_timing_2 = 0; |
| if (!(extended_silicon_revision != 4 |
| || (info-> |
| populated_ranks_mask[channel] & 5) == |
| 5)) { |
| if ((info-> |
| spd[channel][slot] |
| [REFERENCE_RAW_CARD_USED] & 0x1F) |
| == 3) |
| card_timing_2 = |
| u16_FFFE0EB8[0][info-> |
| clock_speed_index]; |
| if ((info-> |
| spd[channel][slot] |
| [REFERENCE_RAW_CARD_USED] & 0x1F) |
| == 5) |
| card_timing_2 = |
| u16_FFFE0EB8[1][info-> |
| clock_speed_index]; |
| } |
| |
| for (i = 0; i < 3; i++) |
| write_500(info, channel, |
| (card_timing_2 + |
| info->max4048[channel] |
| + |
| u8_FFFD0EF8[channel] |
| [extended_silicon_revision] |
| [info-> |
| mode4030[channel]][info-> |
| clock_speed_index]), |
| u16_fffd0c50[i][slot][rank], |
| 8, 1); |
| write_500(info, channel, |
| (info->max4048[channel] + |
| u8_FFFD0C78[channel] |
| [extended_silicon_revision][info-> |
| mode4030 |
| [channel]] |
| [slot][rank][info-> |
| clock_speed_index]), |
| u16_fffd0c70[slot][rank], 7, 1); |
| } |
| if (!info->populated_ranks_mask[channel]) |
| continue; |
| for (i = 0; i < 3; i++) |
| write_500(info, channel, |
| (info->max4048[channel] + |
| info->avg4044[channel] |
| + |
| u8_FFFD17E0[channel] |
| [extended_silicon_revision][info-> |
| mode4030 |
| [channel]][info-> |
| clock_speed_index]), |
| u16_fffd0c68[i], 8, 1); |
| } |
| } |
| |
| static unsigned int fsbcycle_ps(struct raminfo *info) |
| { |
| return 900000 / info->fsb_frequency; |
| } |
| |
| /* The time of DDR transfer in ps. */ |
| static unsigned int halfcycle_ps(struct raminfo *info) |
| { |
| return 3750 / (info->clock_speed_index + 3); |
| } |
| |
| /* The time of clock cycle in ps. */ |
| static unsigned int cycle_ps(struct raminfo *info) |
| { |
| return 2 * halfcycle_ps(info); |
| } |
| |
| /* Frequency in 1.(1)=10/9 MHz units. */ |
| static unsigned frequency_11(struct raminfo *info) |
| { |
| return (info->clock_speed_index + 3) * 120; |
| } |
| |
| /* Frequency in 0.1 MHz units. */ |
| static unsigned frequency_01(struct raminfo *info) |
| { |
| return 100 * frequency_11(info) / 9; |
| } |
| |
| static unsigned ps_to_halfcycles(struct raminfo *info, unsigned int ps) |
| { |
| return (frequency_11(info) * 2) * ps / 900000; |
| } |
| |
| static unsigned ns_to_cycles(struct raminfo *info, unsigned int ns) |
| { |
| return (frequency_11(info)) * ns / 900; |
| } |
| |
| static void compute_derived_timings(struct raminfo *info) |
| { |
| unsigned channel, slot, rank; |
| int extended_silicon_revision; |
| int some_delay_1_ps; |
| int some_delay_2_ps; |
| int some_delay_2_halfcycles_ceil; |
| int some_delay_2_halfcycles_floor; |
| int some_delay_3_ps; |
| int some_delay_3_halfcycles; |
| int some_delay_3_ps_rounded; |
| int some_delay_1_cycle_ceil; |
| int some_delay_1_cycle_floor; |
| |
| some_delay_3_halfcycles = 0; |
| some_delay_3_ps_rounded = 0; |
| extended_silicon_revision = info->silicon_revision; |
| if (!info->silicon_revision) |
| for (channel = 0; channel < NUM_CHANNELS; channel++) |
| for (slot = 0; slot < NUM_SLOTS; slot++) |
| if ((info-> |
| spd[channel][slot][MODULE_TYPE] & 0xF) == |
| 3) |
| extended_silicon_revision = 4; |
| if (info->board_lane_delay[7] < 5) |
| info->board_lane_delay[7] = 5; |
| info->revision_flag_1 = 2; |
| if (info->silicon_revision == 2 || info->silicon_revision == 3) |
| info->revision_flag_1 = 0; |
| if (info->revision < 16) |
| info->revision_flag_1 = 0; |
| |
| if (info->revision < 8) |
| info->revision_flag_1 = 0; |
| if (info->revision >= 8 && (info->silicon_revision == 0 |
| || info->silicon_revision == 1)) |
| some_delay_2_ps = 735; |
| else |
| some_delay_2_ps = 750; |
| |
| if (info->revision >= 0x10 && (info->silicon_revision == 0 |
| || info->silicon_revision == 1)) |
| some_delay_1_ps = 3929; |
| else |
| some_delay_1_ps = 3490; |
| |
| some_delay_1_cycle_floor = some_delay_1_ps / cycle_ps(info); |
| some_delay_1_cycle_ceil = some_delay_1_ps / cycle_ps(info); |
| if (some_delay_1_ps % cycle_ps(info)) |
| some_delay_1_cycle_ceil++; |
| else |
| some_delay_1_cycle_floor--; |
| info->some_delay_1_cycle_floor = some_delay_1_cycle_floor; |
| if (info->revision_flag_1) |
| some_delay_2_ps = halfcycle_ps(info) >> 6; |
| some_delay_2_ps += |
| max(some_delay_1_ps - 30, |
| 2 * halfcycle_ps(info) * (some_delay_1_cycle_ceil - 1) + 1000) + |
| 375; |
| some_delay_3_ps = |
| halfcycle_ps(info) - some_delay_2_ps % halfcycle_ps(info); |
| if (info->revision_flag_1) { |
| if (some_delay_3_ps < 150) |
| some_delay_3_halfcycles = 0; |
| else |
| some_delay_3_halfcycles = |
| (some_delay_3_ps << 6) / halfcycle_ps(info); |
| some_delay_3_ps_rounded = |
| halfcycle_ps(info) * some_delay_3_halfcycles >> 6; |
| } |
| some_delay_2_halfcycles_ceil = |
| (some_delay_2_ps + halfcycle_ps(info) - 1) / halfcycle_ps(info) - |
| 2 * (some_delay_1_cycle_ceil - 1); |
| if (info->revision_flag_1 && some_delay_3_ps < 150) |
| some_delay_2_halfcycles_ceil++; |
| some_delay_2_halfcycles_floor = some_delay_2_halfcycles_ceil; |
| if (info->revision < 0x10) |
| some_delay_2_halfcycles_floor = |
| some_delay_2_halfcycles_ceil - 1; |
| if (!info->revision_flag_1) |
| some_delay_2_halfcycles_floor++; |
| info->some_delay_2_halfcycles_ceil = some_delay_2_halfcycles_ceil; |
| info->some_delay_3_ps_rounded = some_delay_3_ps_rounded; |
| if ((info->populated_ranks[0][0][0] && info->populated_ranks[0][1][0]) |
| || (info->populated_ranks[1][0][0] |
| && info->populated_ranks[1][1][0])) |
| info->max_slots_used_in_channel = 2; |
| else |
| info->max_slots_used_in_channel = 1; |
| for (channel = 0; channel < 2; channel++) |
| write_mchbar32(0x244 + (channel << 10), |
| ((info->revision < 8) ? 1 : 0x200) |
| | ((2 - info->max_slots_used_in_channel) << 17) | |
| (channel << 21) | (info-> |
| some_delay_1_cycle_floor << |
| 18) | 0x9510); |
| if (info->max_slots_used_in_channel == 1) { |
| info->mode4030[0] = (count_ranks_in_channel(info, 0) == 2); |
| info->mode4030[1] = (count_ranks_in_channel(info, 1) == 2); |
| } else { |
| info->mode4030[0] = ((count_ranks_in_channel(info, 0) == 1) || (count_ranks_in_channel(info, 0) == 2)) ? 2 : 3; /* 2 if 1 or 2 ranks */ |
| info->mode4030[1] = ((count_ranks_in_channel(info, 1) == 1) |
| || (count_ranks_in_channel(info, 1) == |
| 2)) ? 2 : 3; |
| } |
| for (channel = 0; channel < NUM_CHANNELS; channel++) { |
| int max_of_unk; |
| int min_of_unk_2; |
| |
| int i, count; |
| int sum; |
| |
| if (!info->populated_ranks_mask[channel]) |
| continue; |
| |
| max_of_unk = 0; |
| min_of_unk_2 = 32767; |
| |
| sum = 0; |
| count = 0; |
| for (i = 0; i < 3; i++) { |
| int unk1; |
| if (info->revision < 8) |
| unk1 = |
| u8_FFFD1891[0][channel][info-> |
| clock_speed_index] |
| [i]; |
| else if (! |
| (info->revision >= 0x10 |
| || info->revision_flag_1)) |
| unk1 = |
| u8_FFFD1891[1][channel][info-> |
| clock_speed_index] |
| [i]; |
| else |
| unk1 = 0; |
| for (slot = 0; slot < NUM_SLOTS; slot++) |
| for (rank = 0; rank < NUM_RANKS; rank++) { |
| int a = 0; |
| int b = 0; |
| |
| if (!info-> |
| populated_ranks[channel][slot] |
| [rank]) |
| continue; |
| if (extended_silicon_revision == 4 |
| && (info-> |
| populated_ranks_mask[channel] & |
| 5) != 5) { |
| if ((info-> |
| spd[channel][slot] |
| [REFERENCE_RAW_CARD_USED] & |
| 0x1F) == 3) { |
| a = u16_ffd1178[0] |
| [info-> |
| clock_speed_index]; |
| b = u16_fe0eb8[0][info-> |
| clock_speed_index]; |
| } else |
| if ((info-> |
| spd[channel][slot] |
| [REFERENCE_RAW_CARD_USED] |
| & 0x1F) == 5) { |
| a = u16_ffd1178[1] |
| [info-> |
| clock_speed_index]; |
| b = u16_fe0eb8[1][info-> |
| clock_speed_index]; |
| } |
| } |
| min_of_unk_2 = min(min_of_unk_2, a); |
| min_of_unk_2 = min(min_of_unk_2, b); |
| if (rank == 0) { |
| sum += a; |
| count++; |
| } |
| { |
| int t; |
| t = b + |
| u8_FFFD0EF8[channel] |
| [extended_silicon_revision] |
| [info-> |
| mode4030[channel]][info-> |
| clock_speed_index]; |
| if (unk1 >= t) |
| max_of_unk = |
| max(max_of_unk, |
| unk1 - t); |
| } |
| } |
| { |
| int t = |
| u8_FFFD17E0[channel] |
| [extended_silicon_revision][info-> |
| mode4030 |
| [channel]] |
| [info->clock_speed_index] + min_of_unk_2; |
| if (unk1 >= t) |
| max_of_unk = max(max_of_unk, unk1 - t); |
| } |
| } |
| |
| info->avg4044[channel] = sum / count; |
| info->max4048[channel] = max_of_unk; |
| } |
| } |
| |
| static void jedec_read(struct raminfo *info, |
| int channel, int slot, int rank, |
| int total_rank, u8 addr3, unsigned int value) |
| { |
| /* Handle mirrored mapping. */ |
| if ((rank & 1) && (info->spd[channel][slot][RANK1_ADDRESS_MAPPING] & 1)) |
| addr3 = |
| (addr3 & 0xCF) | ((addr3 & 0x10) << 1) | ((addr3 >> 1) & |
| 0x10); |
| write_mchbar8(0x271, addr3 | (read_mchbar8(0x271) & 0xC1)); |
| write_mchbar8(0x671, addr3 | (read_mchbar8(0x671) & 0xC1)); |
| |
| /* Handle mirrored mapping. */ |
| if ((rank & 1) && (info->spd[channel][slot][RANK1_ADDRESS_MAPPING] & 1)) |
| value = |
| (value & ~0x1f8) | ((value >> 1) & 0xa8) | ((value & 0xa8) |
| << 1); |
| |
| read32p((value << 3) | (total_rank << 28)); |
| |
| write_mchbar8(0x271, (read_mchbar8(0x271) & 0xC3) | 2); |
| write_mchbar8(0x671, (read_mchbar8(0x671) & 0xC3) | 2); |
| |
| read32p(total_rank << 28); |
| } |
| |
| enum { |
| MR1_RZQ12 = 512, |
| MR1_RZQ2 = 64, |
| MR1_RZQ4 = 4, |
| MR1_ODS34OHM = 2 |
| }; |
| |
| enum { |
| MR0_BT_INTERLEAVED = 8, |
| MR0_DLL_RESET_ON = 256 |
| }; |
| |
| enum { |
| MR2_RTT_WR_DISABLED = 0, |
| MR2_RZQ2 = 1 << 10 |
| }; |
| |
| static void jedec_init(struct raminfo *info) |
| { |
| int write_recovery; |
| int channel, slot, rank; |
| int total_rank; |
| int dll_on; |
| int self_refresh_temperature; |
| int auto_self_refresh; |
| |
| auto_self_refresh = 1; |
| self_refresh_temperature = 1; |
| if (info->board_lane_delay[3] <= 10) { |
| if (info->board_lane_delay[3] <= 8) |
| write_recovery = info->board_lane_delay[3] - 4; |
| else |
| write_recovery = 5; |
| } else { |
| write_recovery = 6; |
| } |
| FOR_POPULATED_RANKS { |
| auto_self_refresh &= |
| (info->spd[channel][slot][THERMAL_AND_REFRESH] >> 2) & 1; |
| self_refresh_temperature &= |
| info->spd[channel][slot][THERMAL_AND_REFRESH] & 1; |
| } |
| if (auto_self_refresh == 1) |
| self_refresh_temperature = 0; |
| |
| dll_on = ((info->silicon_revision != 2 && info->silicon_revision != 3) |
| || (info->populated_ranks[0][0][0] |
| && info->populated_ranks[0][1][0]) |
| || (info->populated_ranks[1][0][0] |
| && info->populated_ranks[1][1][0])); |
| |
| total_rank = 0; |
| |
| for (channel = NUM_CHANNELS - 1; channel >= 0; channel--) { |
| int rtt, rtt_wr = MR2_RTT_WR_DISABLED; |
| int rzq_reg58e; |
| |
| if (info->silicon_revision == 2 || info->silicon_revision == 3) { |
| rzq_reg58e = 64; |
| rtt = MR1_RZQ2; |
| if (info->clock_speed_index != 0) { |
| rzq_reg58e = 4; |
| if (info->populated_ranks_mask[channel] == 3) |
| rtt = MR1_RZQ4; |
| } |
| } else { |
| if ((info->populated_ranks_mask[channel] & 5) == 5) { |
| rtt = MR1_RZQ12; |
| rzq_reg58e = 64; |
| rtt_wr = MR2_RZQ2; |
| } else { |
| rzq_reg58e = 4; |
| rtt = MR1_RZQ4; |
| } |
| } |
| |
| write_mchbar16(0x588 + (channel << 10), 0x0); |
| write_mchbar16(0x58a + (channel << 10), 0x4); |
| write_mchbar16(0x58c + (channel << 10), rtt | MR1_ODS34OHM); |
| write_mchbar16(0x58e + (channel << 10), rzq_reg58e | 0x82); |
| write_mchbar16(0x590 + (channel << 10), 0x1282); |
| |
| for (slot = 0; slot < NUM_SLOTS; slot++) |
| for (rank = 0; rank < NUM_RANKS; rank++) |
| if (info->populated_ranks[channel][slot][rank]) { |
| jedec_read(info, channel, slot, rank, |
| total_rank, 0x28, |
| rtt_wr | (info-> |
| clock_speed_index |
| << 3) |
| | (auto_self_refresh << 6) | |
| (self_refresh_temperature << |
| 7)); |
| jedec_read(info, channel, slot, rank, |
| total_rank, 0x38, 0); |
| jedec_read(info, channel, slot, rank, |
| total_rank, 0x18, |
| rtt | MR1_ODS34OHM); |
| jedec_read(info, channel, slot, rank, |
| total_rank, 6, |
| (dll_on << 12) | |
| (write_recovery << 9) |
| | ((info->cas_latency - 4) << |
| 4) | MR0_BT_INTERLEAVED | |
| MR0_DLL_RESET_ON); |
| total_rank++; |
| } |
| } |
| } |
| |
| static void program_modules_memory_map(struct raminfo *info, int pre_jedec) |
| { |
| unsigned channel, slot, rank; |
| unsigned int total_mb[2] = { 0, 0 }; /* total memory per channel in MB */ |
| unsigned int channel_0_non_interleaved; |
| |
| FOR_ALL_RANKS { |
| if (info->populated_ranks[channel][slot][rank]) { |
| total_mb[channel] += |
| pre_jedec ? 256 : (256 << info-> |
| density[channel][slot] >> info-> |
| is_x16_module[channel][slot]); |
| write_mchbar8(0x208 + rank + 2 * slot + (channel << 10), |
| (pre_jedec ? (1 | ((1 + 1) << 1)) |
| : (info-> |
| is_x16_module[channel][slot] | |
| ((info->density[channel][slot] + |
| 1) << 1))) | 0x80); |
| } |
| write_mchbar16(0x200 + (channel << 10) + 4 * slot + 2 * rank, |
| total_mb[channel] >> 6); |
| } |
| |
| info->total_memory_mb = total_mb[0] + total_mb[1]; |
| |
| info->interleaved_part_mb = |
| pre_jedec ? 0 : 2 * min(total_mb[0], total_mb[1]); |
| info->non_interleaved_part_mb = |
| total_mb[0] + total_mb[1] - info->interleaved_part_mb; |
| channel_0_non_interleaved = total_mb[0] - info->interleaved_part_mb / 2; |
| write_mchbar32(0x100, |
| channel_0_non_interleaved | (info-> |
| non_interleaved_part_mb << |
| 16)); |
| if (!pre_jedec) |
| write_mchbar16(0x104, info->interleaved_part_mb); |
| } |
| |
| static void program_board_delay(struct raminfo *info) |
| { |
| int cas_latency_shift; |
| int some_delay_ns; |
| int some_delay_3_half_cycles; |
| |
| unsigned channel, i; |
| int high_multiplier; |
| int lane_3_delay; |
| int cas_latency_derived; |
| |
| high_multiplier = 0; |
| some_delay_ns = 200; |
| some_delay_3_half_cycles = 4; |
| cas_latency_shift = info->silicon_revision == 0 |
| || info->silicon_revision == 1 ? 1 : 0; |
| if (info->revision < 8) { |
| some_delay_ns = 600; |
| cas_latency_shift = 0; |
| } |
| { |
| int speed_bit; |
| speed_bit = |
| ((info->clock_speed_index > 1 |
| || (info->silicon_revision != 2 |
| && info->silicon_revision != 3))) ^ (info->revision >= |
| 0x10); |
| write_500(info, 0, speed_bit | ((!info->use_ecc) << 1), 0x60e, |
| 3, 1); |
| write_500(info, 1, speed_bit | ((!info->use_ecc) << 1), 0x60e, |
| 3, 1); |
| if (info->revision >= 0x10 && info->clock_speed_index <= 1 |
| && (info->silicon_revision == 2 |
| || info->silicon_revision == 3)) |
| rmw_1d0(0x116, 5, 2, 4, 1); |
| } |
| write_mchbar32(0x120, |
| (1 << (info->max_slots_used_in_channel + 28)) | |
| 0x188e7f9f); |
| |
| write_mchbar8(0x124, |
| info->board_lane_delay[4] + |
| ((frequency_01(info) + 999) / 1000)); |
| write_mchbar16(0x125, 0x1360); |
| write_mchbar8(0x127, 0x40); |
| if (info->fsb_frequency < frequency_11(info) / 2) { |
| unsigned some_delay_2_half_cycles; |
| high_multiplier = 1; |
| some_delay_2_half_cycles = ps_to_halfcycles(info, |
| ((3 * |
| fsbcycle_ps(info)) |
| >> 1) + |
| (halfcycle_ps(info) |
| * |
| reg178_min[info-> |
| clock_speed_index] |
| >> 6) |
| + |
| 4 * |
| halfcycle_ps(info) |
| + 2230); |
| some_delay_3_half_cycles = |
| min((some_delay_2_half_cycles + |
| (frequency_11(info) * 2) * (28 - |
| some_delay_2_half_cycles) / |
| (frequency_11(info) * 2 - |
| 4 * (info->fsb_frequency))) >> 3, 7); |
| } |
| if (read_mchbar8(0x2ca9) & 1) |
| some_delay_3_half_cycles = 3; |
| for (channel = 0; channel < NUM_CHANNELS; channel++) { |
| write_mchbar32(0x220 + (channel << 10), |
| read_mchbar32(0x220 + |
| (channel << 10)) | 0x18001117); |
| write_mchbar32(0x224 + (channel << 10), |
| (info->max_slots_used_in_channel - 1) |
| | |
| ((info->cas_latency - 5 - |
| info->clock_speed_index) << 21) |
| | |
| ((info->max_slots_used_in_channel + |
| info->cas_latency - cas_latency_shift - |
| 4) << 16) |
| | ((info->cas_latency - cas_latency_shift - 4) << |
| 26) |
| | |
| ((info->cas_latency - info->clock_speed_index + |
| info->max_slots_used_in_channel - 6) << 8)); |
| write_mchbar32(0x228 + (channel << 10), |
| info->max_slots_used_in_channel); |
| write_mchbar8(0x239 + (channel << 10), 32); |
| write_mchbar32(0x248 + (channel << 10), |
| (high_multiplier << 24) | |
| (some_delay_3_half_cycles << 25) | 0x840000); |
| write_mchbar32(0x278 + (channel << 10), 0xc362042); |
| write_mchbar32(0x27c + (channel << 10), 0x8b000062); |
| write_mchbar32(0x24c + (channel << 10), |
| ((! !info-> |
| clock_speed_index) << 17) | (((2 + |
| info-> |
| clock_speed_index |
| - |
| (! !info-> |
| clock_speed_index))) |
| << 12) | 0x10200); |
| |
| write_mchbar8(0x267 + (channel << 10), 0x4); |
| write_mchbar16(0x272 + (channel << 10), 0x155); |
| write_mchbar32(0x2bc + (channel << 10), |
| (read_mchbar32(0x2bc + (channel << 10)) & |
| 0xFF000000) |
| | 0x707070); |
| |
| write_500(info, channel, |
| ((!info->populated_ranks[channel][1][1]) |
| | (!info->populated_ranks[channel][1][0] << 1) |
| | (!info->populated_ranks[channel][0][1] << 2) |
| | (!info->populated_ranks[channel][0][0] << 3)), |
| 0x4c9, 4, 1); |
| } |
| |
| write_mchbar8(0x2c4, ((1 + (info->clock_speed_index != 0)) << 6) | 0xC); |
| { |
| u8 freq_divisor = 2; |
| if (info->fsb_frequency == frequency_11(info)) |
| freq_divisor = 3; |
| else if (2 * info->fsb_frequency < 3 * (frequency_11(info) / 2)) |
| freq_divisor = 1; |
| else |
| freq_divisor = 2; |
| write_mchbar32(0x2c0, (freq_divisor << 11) | 0x6009c400); |
| } |
| |
| if (info->board_lane_delay[3] <= 10) { |
| if (info->board_lane_delay[3] <= 8) |
| lane_3_delay = info->board_lane_delay[3]; |
| else |
| lane_3_delay = 10; |
| } else { |
| lane_3_delay = 12; |
| } |
| cas_latency_derived = info->cas_latency - info->clock_speed_index + 2; |
| if (info->clock_speed_index > 1) |
| cas_latency_derived++; |
| for (channel = 0; channel < NUM_CHANNELS; channel++) { |
| write_mchbar32(0x240 + (channel << 10), |
| ((info->clock_speed_index == |
| 0) * 0x11000) | 0x1002100 | ((2 + |
| info-> |
| clock_speed_index) |
| << 4) | (info-> |
| cas_latency |
| - 3)); |
| write_500(info, channel, (info->clock_speed_index << 1) | 1, |
| 0x609, 6, 1); |
| write_500(info, channel, |
| info->clock_speed_index + 2 * info->cas_latency - 7, |
| 0x601, 6, 1); |
| |
| write_mchbar32(0x250 + (channel << 10), |
| ((lane_3_delay + info->clock_speed_index + |
| 9) << 6) |
| | (info->board_lane_delay[7] << 2) | (info-> |
| board_lane_delay |
| [4] << 16) |
| | (info->board_lane_delay[1] << 25) | (info-> |
| board_lane_delay |
| [1] << 29) |
| | 1); |
| write_mchbar32(0x254 + (channel << 10), |
| (info-> |
| board_lane_delay[1] >> 3) | ((info-> |
| board_lane_delay |
| [8] + |
| 4 * |
| info-> |
| use_ecc) << 6) | |
| 0x80 | (info->board_lane_delay[6] << 1) | (info-> |
| board_lane_delay |
| [2] << |
| 28) | |
| (cas_latency_derived << 16) | 0x4700000); |
| write_mchbar32(0x258 + (channel << 10), |
| ((info->board_lane_delay[5] + |
| info->clock_speed_index + |
| 9) << 12) | ((info->clock_speed_index - |
| info->cas_latency + 12) << 8) |
| | (info->board_lane_delay[2] << 17) | (info-> |
| board_lane_delay |
| [4] << 24) |
| | 0x47); |
| write_mchbar32(0x25c + (channel << 10), |
| (info->board_lane_delay[1] << 1) | (info-> |
| board_lane_delay |
| [0] << 8) | |
| 0x1da50000); |
| write_mchbar8(0x264 + (channel << 10), 0xff); |
| write_mchbar8(0x5f8 + (channel << 10), |
| (cas_latency_shift << 3) | info->use_ecc); |
| } |
| |
| program_modules_memory_map(info, 1); |
| |
| write_mchbar16(0x610, |
| (min(ns_to_cycles(info, some_delay_ns) / 2, 127) << 9) |
| | (read_mchbar16(0x610) & 0x1C3) | 0x3C); |
| write_mchbar16(0x612, read_mchbar16(0x612) | 0x100); |
| write_mchbar16(0x214, read_mchbar16(0x214) | 0x3E00); |
| for (i = 0; i < 8; i++) { |
| pcie_write_config32(PCI_DEV (QUICKPATH_BUS, 0, 1), 0x80 + 4 * i, |
| (info->total_memory_mb - 64) | !i | 2); |
| pcie_write_config32(PCI_DEV (QUICKPATH_BUS, 0, 1), 0xc0 + 4 * i, 0); |
| } |
| } |
| |
| #define BETTER_MEMORY_MAP 0 |
| |
| static void program_total_memory_map(struct raminfo *info) |
| { |
| unsigned int TOM, TOLUD, TOUUD; |
| unsigned int quickpath_reserved; |
| unsigned int REMAPbase; |
| unsigned int uma_base_igd; |
| unsigned int uma_base_gtt; |
| int memory_remap; |
| unsigned int memory_map[8]; |
| int i; |
| unsigned int current_limit; |
| unsigned int tseg_base; |
| int uma_size_igd = 0, uma_size_gtt = 0; |
| |
| memset(memory_map, 0, sizeof(memory_map)); |
| |
| #if REAL |
| if (info->uma_enabled) { |
| u16 t = pcie_read_config16(NORTHBRIDGE, D0F0_GGC); |
| gav(t); |
| const int uma_sizes_gtt[16] = |
| { 0, 1, 0, 2, 0, 0, 0, 0, 0, 2, 3, 4, 42, 42, 42, 42 }; |
| /* Igd memory */ |
| const int uma_sizes_igd[16] = { |
| 0, 0, 0, 0, 0, 32, 48, 64, 128, 256, 96, 160, 224, 352, |
| 256, 512 |
| }; |
| |
| uma_size_igd = uma_sizes_igd[(t >> 4) & 0xF]; |
| uma_size_gtt = uma_sizes_gtt[(t >> 8) & 0xF]; |
| } |
| #endif |
| |
| TOM = info->total_memory_mb; |
| if (TOM == 4096) |
| TOM = 4032; |
| TOUUD = ALIGN_DOWN(TOM - info->memory_reserved_for_heci_mb, 64); |
| TOLUD = ALIGN_DOWN(min(3072 + ALIGN_UP(uma_size_igd + uma_size_gtt, 64) |
| , TOUUD), 64); |
| memory_remap = 0; |
| if (TOUUD - TOLUD > 64) { |
| memory_remap = 1; |
| REMAPbase = max(4096, TOUUD); |
| TOUUD = TOUUD - TOLUD + 4096; |
| } |
| if (TOUUD > 4096) |
| memory_map[2] = TOUUD | 1; |
| quickpath_reserved = 0; |
| |
| { |
| u32 t; |
| |
| gav(t = pcie_read_config32(PCI_DEV(QUICKPATH_BUS, 0, 1), 0x68)); |
| if (t & 0x800) |
| quickpath_reserved = |
| (1 << find_lowest_bit_set32(t >> 20)); |
| } |
| if (memory_remap) |
| TOUUD -= quickpath_reserved; |
| |
| #if !REAL |
| if (info->uma_enabled) { |
| u16 t = pcie_read_config16(NORTHBRIDGE, D0F0_GGC); |
| gav(t); |
| const int uma_sizes_gtt[16] = |
| { 0, 1, 0, 2, 0, 0, 0, 0, 0, 2, 3, 4, 42, 42, 42, 42 }; |
| /* Igd memory */ |
| const int uma_sizes_igd[16] = { |
| 0, 0, 0, 0, 0, 32, 48, 64, 128, 256, 96, 160, 224, 352, |
| 256, 512 |
| }; |
| |
| uma_size_igd = uma_sizes_igd[(t >> 4) & 0xF]; |
| uma_size_gtt = uma_sizes_gtt[(t >> 8) & 0xF]; |
| } |
| #endif |
| |
| uma_base_igd = TOLUD - uma_size_igd; |
| uma_base_gtt = uma_base_igd - uma_size_gtt; |
| tseg_base = ALIGN_DOWN(uma_base_gtt, 64) - (CONFIG_SMM_TSEG_SIZE >> 20); |
| if (!memory_remap) |
| tseg_base -= quickpath_reserved; |
| tseg_base = ALIGN_DOWN(tseg_base, 8); |
| |
| pcie_write_config16(NORTHBRIDGE, D0F0_TOLUD, TOLUD << 4); |
| pcie_write_config16(NORTHBRIDGE, D0F0_TOM, TOM >> 6); |
| if (memory_remap) { |
| pcie_write_config16(NORTHBRIDGE, D0F0_REMAPBASE, REMAPbase >> 6); |
| pcie_write_config16(NORTHBRIDGE, D0F0_REMAPLIMIT, (TOUUD - 64) >> 6); |
| } |
| pcie_write_config16(NORTHBRIDGE, D0F0_TOUUD, TOUUD); |
| |
| if (info->uma_enabled) { |
| pcie_write_config32(NORTHBRIDGE, D0F0_IGD_BASE, uma_base_igd << 20); |
| pcie_write_config32(NORTHBRIDGE, D0F0_GTT_BASE, uma_base_gtt << 20); |
| } |
| pcie_write_config32(NORTHBRIDGE, TSEG, tseg_base << 20); |
| |
| current_limit = 0; |
| memory_map[0] = ALIGN_DOWN(uma_base_gtt, 64) | 1; |
| memory_map[1] = 4096; |
| for (i = 0; i < ARRAY_SIZE(memory_map); i++) { |
| current_limit = max(current_limit, memory_map[i] & ~1); |
| pcie_write_config32(PCI_DEV(QUICKPATH_BUS, 0, 1), 4 * i + 0x80, |
| (memory_map[i] & 1) | ALIGN_DOWN(current_limit - |
| 1, 64) | 2); |
| pcie_write_config32(PCI_DEV(QUICKPATH_BUS, 0, 1), 4 * i + 0xc0, 0); |
| } |
| } |
| |
| static void collect_system_info(struct raminfo *info) |
| { |
| u32 capid0[3]; |
| int i; |
| unsigned channel; |
| |
| /* Wait for some bit, maybe TXT clear. */ |
| while (!(read8((u8 *)0xfed40000) & (1 << 7))) ; |
| |
| if (!info->heci_bar) |
| gav(info->heci_bar = |
| pcie_read_config32(HECIDEV, HECIBAR) & 0xFFFFFFF8); |
| if (!info->memory_reserved_for_heci_mb) { |
| /* Wait for ME to be ready */ |
| intel_early_me_init(); |
| info->memory_reserved_for_heci_mb = intel_early_me_uma_size(); |
| } |
| |
| for (i = 0; i < 3; i++) |
| gav(capid0[i] = |
| pcie_read_config32(NORTHBRIDGE, D0F0_CAPID0 | (i << 2))); |
| gav(info->revision = pcie_read_config8(NORTHBRIDGE, PCI_REVISION_ID)); |
| info->max_supported_clock_speed_index = (~capid0[1] & 7); |
| |
| if ((capid0[1] >> 11) & 1) |
| info->uma_enabled = 0; |
| else |
| gav(info->uma_enabled = |
| pcie_read_config8(NORTHBRIDGE, D0F0_DEVEN) & 8); |
| /* Unrecognised: [0000:fffd3d2d] 37f81.37f82 ! CPUID: eax: 00000001; ecx: 00000e00 => 00020655.00010800.029ae3ff.bfebfbff */ |
| info->silicon_revision = 0; |
| |
| if (capid0[2] & 2) { |
| info->silicon_revision = 0; |
| info->max_supported_clock_speed_index = 2; |
| for (channel = 0; channel < NUM_CHANNELS; channel++) |
| if (info->populated_ranks[channel][0][0] |
| && (info->spd[channel][0][MODULE_TYPE] & 0xf) == |
| 3) { |
| info->silicon_revision = 2; |
| info->max_supported_clock_speed_index = 1; |
| } |
| } else { |
| switch (((capid0[2] >> 18) & 1) + 2 * ((capid0[1] >> 3) & 1)) { |
| case 1: |
| case 2: |
| info->silicon_revision = 3; |
| break; |
| case 3: |
| info->silicon_revision = 0; |
| break; |
| case 0: |
| info->silicon_revision = 2; |
| break; |
| } |
| switch (pcie_read_config16(NORTHBRIDGE, PCI_DEVICE_ID)) { |
| case 0x40: |
| info->silicon_revision = 0; |
| break; |
| case 0x48: |
| info->silicon_revision = 1; |
| break; |
| } |
| } |
| } |
| |
| static void write_training_data(struct raminfo *info) |
| { |
| int tm, channel, slot, rank, lane; |
| if (info->revision < 8) |
| return; |
| |
| for (tm = 0; tm < 4; tm++) |
| for (channel = 0; channel < NUM_CHANNELS; channel++) |
| for (slot = 0; slot < NUM_SLOTS; slot++) |
| for (rank = 0; rank < NUM_RANKS; rank++) |
| for (lane = 0; lane < 9; lane++) |
| write_500(info, channel, |
| info-> |
| cached_training-> |
| lane_timings[tm] |
| [channel][slot][rank] |
| [lane], |
| get_timing_register_addr |
| (lane, tm, slot, |
| rank), 9, 0); |
| write_1d0(info->cached_training->reg_178, 0x178, 7, 1); |
| write_1d0(info->cached_training->reg_10b, 0x10b, 6, 1); |
| } |
| |
| static void dump_timings(struct raminfo *info) |
| { |
| #if REAL |
| int channel, slot, rank, lane, i; |
| printk(BIOS_DEBUG, "Timings:\n"); |
| FOR_POPULATED_RANKS { |
| printk(BIOS_DEBUG, "channel %d, slot %d, rank %d\n", channel, |
| slot, rank); |
| for (lane = 0; lane < 9; lane++) { |
| printk(BIOS_DEBUG, "lane %d: ", lane); |
| for (i = 0; i < 4; i++) { |
| printk(BIOS_DEBUG, "%x (%x) ", |
| read_500(info, channel, |
| get_timing_register_addr |
| (lane, i, slot, rank), |
| 9), |
| info->training. |
| lane_timings[i][channel][slot][rank] |
| [lane]); |
| } |
| printk(BIOS_DEBUG, "\n"); |
| } |
| } |
| printk(BIOS_DEBUG, "[178] = %x (%x)\n", read_1d0(0x178, 7), |
| info->training.reg_178); |
| printk(BIOS_DEBUG, "[10b] = %x (%x)\n", read_1d0(0x10b, 6), |
| info->training.reg_10b); |
| #endif |
| } |
| |
| /* Read timings and other registers that need to be restored verbatim and |
| put them to CBMEM. |
| */ |
| static void save_timings(struct raminfo *info) |
| { |
| struct ram_training train; |
| struct mrc_data_container *mrcdata; |
| int output_len = ALIGN(sizeof(train), 16); |
| int channel, slot, rank, lane, i; |
| |
| train = info->training; |
| FOR_POPULATED_RANKS for (lane = 0; lane < 9; lane++) |
| for (i = 0; i < 4; i++) |
| train.lane_timings[i][channel][slot][rank][lane] = |
| read_500(info, channel, |
| get_timing_register_addr(lane, i, slot, |
| rank), 9); |
| train.reg_178 = read_1d0(0x178, 7); |
| train.reg_10b = read_1d0(0x10b, 6); |
| |
| for (channel = 0; channel < NUM_CHANNELS; channel++) { |
| u32 reg32; |
| reg32 = read_mchbar32 ((channel << 10) + 0x274); |
| train.reg274265[channel][0] = reg32 >> 16; |
| train.reg274265[channel][1] = reg32 & 0xffff; |
| train.reg274265[channel][2] = read_mchbar16 ((channel << 10) + 0x265) >> 8; |
| } |
| train.reg2ca9_bit0 = read_mchbar8(0x2ca9) & 1; |
| train.reg_6dc = read_mchbar32 (0x6dc); |
| train.reg_6e8 = read_mchbar32 (0x6e8); |
| |
| printk (BIOS_SPEW, "[6dc] = %x\n", train.reg_6dc); |
| printk (BIOS_SPEW, "[6e8] = %x\n", train.reg_6e8); |
| |
| /* Save the MRC S3 restore data to cbmem */ |
| mrcdata = cbmem_add |
| (CBMEM_ID_MRCDATA, output_len + sizeof(struct mrc_data_container)); |
| |
| if (mrcdata != NULL) { |
| printk(BIOS_DEBUG, "Relocate MRC DATA from %p to %p (%u bytes)\n", |
| &train, mrcdata, output_len); |
| |
| mrcdata->mrc_signature = MRC_DATA_SIGNATURE; |
| mrcdata->mrc_data_size = output_len; |
| mrcdata->reserved = 0; |
| memcpy(mrcdata->mrc_data, &train, sizeof(train)); |
| |
| /* Zero the unused space in aligned buffer. */ |
| if (output_len > sizeof(train)) |
| memset(mrcdata->mrc_data + sizeof(train), 0, |
| output_len - sizeof(train)); |
| |
| mrcdata->mrc_checksum = compute_ip_checksum(mrcdata->mrc_data, |
| mrcdata->mrc_data_size); |
| } |
| } |
| |
| #if REAL |
| static const struct ram_training *get_cached_training(void) |
| { |
| struct mrc_data_container *cont; |
| cont = find_current_mrc_cache(); |
| if (!cont) |
| return 0; |
| return (void *)cont->mrc_data; |
| } |
| #endif |
| |
| /* FIXME: add timeout. */ |
| static void wait_heci_ready(void) |
| { |
| while (!(read32(DEFAULT_HECIBAR + 0xc) & 8)) ; // = 0x8000000c |
| write32((DEFAULT_HECIBAR + 0x4), |
| (read32(DEFAULT_HECIBAR + 0x4) & ~0x10) | 0xc); |
| } |
| |
| /* FIXME: add timeout. */ |
| static void wait_heci_cb_avail(int len) |
| { |
| union { |
| struct mei_csr csr; |
| u32 raw; |
| } csr; |
| |
| while (!(read32(DEFAULT_HECIBAR + 0xc) & 8)) ; |
| |
| do |
| csr.raw = read32(DEFAULT_HECIBAR + 0x4); |
| while (len > |
| csr.csr.buffer_depth - (csr.csr.buffer_write_ptr - |
| csr.csr.buffer_read_ptr)); |
| } |
| |
| static void send_heci_packet(struct mei_header *head, u32 * payload) |
| { |
| int len = (head->length + 3) / 4; |
| int i; |
| |
| wait_heci_cb_avail(len + 1); |
| |
| /* FIXME: handle leftovers correctly. */ |
| write32(DEFAULT_HECIBAR + 0, *(u32 *) head); |
| for (i = 0; i < len - 1; i++) |
| write32(DEFAULT_HECIBAR + 0, payload[i]); |
| |
| write32(DEFAULT_HECIBAR + 0, payload[i] & ((1 << (8 * len)) - 1)); |
| write32(DEFAULT_HECIBAR + 0x4, read32(DEFAULT_HECIBAR + 0x4) | 0x4); |
| } |
| |
| static void |
| send_heci_message(u8 * msg, int len, u8 hostaddress, u8 clientaddress) |
| { |
| struct mei_header head; |
| int maxlen; |
| |
| wait_heci_ready(); |
| maxlen = (read32(DEFAULT_HECIBAR + 0x4) >> 24) * 4 - 4; |
| |
| while (len) { |
| int cur = len; |
| if (cur > maxlen) { |
| cur = maxlen; |
| head.is_complete = 0; |
| } else |
| head.is_complete = 1; |
| head.length = cur; |
| head.reserved = 0; |
| head.client_address = clientaddress; |
| head.host_address = hostaddress; |
| send_heci_packet(&head, (u32 *) msg); |
| len -= cur; |
| msg += cur; |
| } |
| } |
| |
| /* FIXME: Add timeout. */ |
| static int |
| recv_heci_packet(struct raminfo *info, struct mei_header *head, u32 * packet, |
| u32 * packet_size) |
| { |
| union { |
| struct mei_csr csr; |
| u32 raw; |
| } csr; |
| int i = 0; |
| |
| write32(DEFAULT_HECIBAR + 0x4, read32(DEFAULT_HECIBAR + 0x4) | 2); |
| do { |
| csr.raw = read32(DEFAULT_HECIBAR + 0xc); |
| #if !REAL |
| if (i++ > 346) |
| return -1; |
| #endif |
| } |
| while (csr.csr.buffer_write_ptr == csr.csr.buffer_read_ptr); |
| *(u32 *) head = read32(DEFAULT_HECIBAR + 0x8); |
| if (!head->length) { |
| write32(DEFAULT_HECIBAR + 0x4, |
| read32(DEFAULT_HECIBAR + 0x4) | 2); |
| *packet_size = 0; |
| return 0; |
| } |
| if (head->length + 4 > 4 * csr.csr.buffer_depth |
| || head->length > *packet_size) { |
| *packet_size = 0; |
| return -1; |
| } |
| |
| do |
| csr.raw = read32(DEFAULT_HECIBAR + 0xc); |
| while ((head->length + 3) >> 2 > |
| csr.csr.buffer_write_ptr - csr.csr.buffer_read_ptr); |
| |
| for (i = 0; i < (head->length + 3) >> 2; i++) |
| packet[i++] = read32(DEFAULT_HECIBAR + 0x8); |
| *packet_size = head->length; |
| if (!csr.csr.ready) |
| *packet_size = 0; |
| write32(DEFAULT_HECIBAR + 0x4, read32(DEFAULT_HECIBAR + 0x4) | 4); |
| return 0; |
| } |
| |
| /* FIXME: Add timeout. */ |
| static int |
| recv_heci_message(struct raminfo *info, u32 * message, u32 * message_size) |
| { |
| struct mei_header head; |
| int current_position; |
| |
| current_position = 0; |
| while (1) { |
| u32 current_size; |
| current_size = *message_size - current_position; |
| if (recv_heci_packet |
| (info, &head, message + (current_position >> 2), |
| ¤t_size) == -1) |
| break; |
| if (!current_size) |
| break; |
| current_position += current_size; |
| if (head.is_complete) { |
| *message_size = current_position; |
| return 0; |
| } |
| |
| if (current_position >= *message_size) |
| break; |
| } |
| *message_size = 0; |
| return -1; |
| } |
| |
| static void send_heci_uma_message(struct raminfo *info) |
| { |
| struct uma_reply { |
| u8 group_id; |
| u8 command; |
| u8 reserved; |
| u8 result; |
| u8 field2; |
| u8 unk3[0x48 - 4 - 1]; |
| } __attribute__ ((packed)) reply; |
| struct uma_message { |
| u8 group_id; |
| u8 cmd; |
| u8 reserved; |
| u8 result; |
| u32 c2; |
| u64 heci_uma_addr; |
| u32 memory_reserved_for_heci_mb; |
| u16 c3; |
| } __attribute__ ((packed)) msg = { |
| 0, MKHI_SET_UMA, 0, 0, |
| 0x82, |
| info->heci_uma_addr, info->memory_reserved_for_heci_mb, 0}; |
| u32 reply_size; |
| |
| send_heci_message((u8 *) & msg, sizeof(msg), 0, 7); |
| |
| reply_size = sizeof(reply); |
| if (recv_heci_message(info, (u32 *) & reply, &reply_size) == -1) |
| return; |
| |
| if (reply.command != (MKHI_SET_UMA | (1 << 7))) |
| die("HECI init failed\n"); |
| } |
| |
| static void setup_heci_uma(struct raminfo *info) |
| { |
| u32 reg44; |
| |
| reg44 = pcie_read_config32(HECIDEV, 0x44); // = 0x80010020 |
| info->memory_reserved_for_heci_mb = 0; |
| info->heci_uma_addr = 0; |
| if (!((reg44 & 0x10000) && !(pcie_read_config32(HECIDEV, 0x40) & 0x20))) |
| return; |
| |
| info->heci_bar = pcie_read_config32(HECIDEV, 0x10) & 0xFFFFFFF0; |
| info->memory_reserved_for_heci_mb = reg44 & 0x3f; |
| info->heci_uma_addr = |
| ((u64) |
| ((((u64) pcie_read_config16(NORTHBRIDGE, D0F0_TOM)) << 6) - |
| info->memory_reserved_for_heci_mb)) << 20; |
| |
| pcie_read_config32(NORTHBRIDGE, DMIBAR); |
| if (info->memory_reserved_for_heci_mb) { |
| write32(DEFAULT_DMIBAR + 0x14, |
| read32(DEFAULT_DMIBAR + 0x14) & ~0x80); |
| write32(DEFAULT_RCBA + 0x14, |
| read32(DEFAULT_RCBA + 0x14) & ~0x80); |
| write32(DEFAULT_DMIBAR + 0x20, |
| read32(DEFAULT_DMIBAR + 0x20) & ~0x80); |
| write32(DEFAULT_RCBA + 0x20, |
| read32(DEFAULT_RCBA + 0x20) & ~0x80); |
| write32(DEFAULT_DMIBAR + 0x2c, |
| read32(DEFAULT_DMIBAR + 0x2c) & ~0x80); |
| write32(DEFAULT_RCBA + 0x30, |
| read32(DEFAULT_RCBA + 0x30) & ~0x80); |
| write32(DEFAULT_DMIBAR + 0x38, |
| read32(DEFAULT_DMIBAR + 0x38) & ~0x80); |
| write32(DEFAULT_RCBA + 0x40, |
| read32(DEFAULT_RCBA + 0x40) & ~0x80); |
| |
| write32(DEFAULT_RCBA + 0x40, 0x87000080); // OK |
| write32(DEFAULT_DMIBAR + 0x38, 0x87000080); // OK |
| while (read16(DEFAULT_RCBA + 0x46) & 2 |
| && read16(DEFAULT_DMIBAR + 0x3e) & 2) ; |
| } |
| |
| write_mchbar32(0x24, 0x10000 + info->memory_reserved_for_heci_mb); |
| |
| send_heci_uma_message(info); |
| |
| pcie_write_config32(HECIDEV, 0x10, 0x0); |
| pcie_write_config8(HECIDEV, 0x4, 0x0); |
| |
| } |
| |
| static int have_match_ranks(struct raminfo *info, int channel, int ranks) |
| { |
| int ranks_in_channel; |
| ranks_in_channel = info->populated_ranks[channel][0][0] |
| + info->populated_ranks[channel][0][1] |
| + info->populated_ranks[channel][1][0] |
| + info->populated_ranks[channel][1][1]; |
| |
| /* empty channel */ |
| if (ranks_in_channel == 0) |
| return 1; |
| |
| if (ranks_in_channel != ranks) |
| return 0; |
| /* single slot */ |
| if (info->populated_ranks[channel][0][0] != |
| info->populated_ranks[channel][1][0]) |
| return 1; |
| if (info->populated_ranks[channel][0][1] != |
| info->populated_ranks[channel][1][1]) |
| return 1; |
| if (info->is_x16_module[channel][0] != info->is_x16_module[channel][1]) |
| return 0; |
| if (info->density[channel][0] != info->density[channel][1]) |
| return 0; |
| return 1; |
| } |
| |
| static void read_4090(struct raminfo *info) |
| { |
| int i, channel, slot, rank, lane; |
| for (i = 0; i < 2; i++) |
| for (slot = 0; slot < NUM_SLOTS; slot++) |
| for (rank = 0; rank < NUM_RANKS; rank++) |
| for (lane = 0; lane < 9; lane++) |
| info->training. |
| lane_timings[0][i][slot][rank][lane] |
| = 32; |
| |
| for (i = 1; i < 4; i++) |
| for (channel = 0; channel < NUM_CHANNELS; channel++) |
| for (slot = 0; slot < NUM_SLOTS; slot++) |
| for (rank = 0; rank < NUM_RANKS; rank++) |
| for (lane = 0; lane < 9; lane++) { |
| info->training. |
| lane_timings[i][channel] |
| [slot][rank][lane] = |
| read_500(info, channel, |
| get_timing_register_addr |
| (lane, i, slot, |
| rank), 9) |
| + (i == 1) * 11; // !!!! |
| } |
| |
| } |
| |
| static u32 get_etalon2(int flip, u32 addr) |
| { |
| const u16 invmask[] = { |
| 0xaaaa, 0x6db6, 0x4924, 0xeeee, 0xcccc, 0x8888, 0x7bde, 0x739c, |
| 0x6318, 0x4210, 0xefbe, 0xcf3c, 0x8e38, 0x0c30, 0x0820 |
| }; |
| u32 ret; |
| u32 comp4 = addr / 480; |
| addr %= 480; |
| u32 comp1 = addr & 0xf; |
| u32 comp2 = (addr >> 4) & 1; |
| u32 comp3 = addr >> 5; |
| |
| if (comp4) |
| ret = 0x1010101 << (comp4 - 1); |
| else |
| ret = 0; |
| if (flip ^ (((invmask[comp3] >> comp1) ^ comp2) & 1)) |
| ret = ~ret; |
| |
| return ret; |
| } |
| |
| static void disable_cache(void) |
| { |
| msr_t msr = {.lo = 0, .hi = 0 }; |
| |
| wrmsr(MTRR_PHYS_BASE(3), msr); |
| wrmsr(MTRR_PHYS_MASK(3), msr); |
| } |
| |
| static void enable_cache(unsigned int base, unsigned int size) |
| { |
| msr_t msr; |
| msr.lo = base | MTRR_TYPE_WRPROT; |
| msr.hi = 0; |
| wrmsr(MTRR_PHYS_BASE(3), msr); |
| msr.lo = ((~(ALIGN_DOWN(size + 4096, 4096) - 1) | MTRR_DEF_TYPE_EN) |
| & 0xffffffff); |
| msr.hi = 0x0000000f; |
| wrmsr(MTRR_PHYS_MASK(3), msr); |
| } |
| |
| static void flush_cache(u32 start, u32 size) |
| { |
| u32 end; |
| u32 addr; |
| |
| end = start + (ALIGN_DOWN(size + 4096, 4096)); |
| for (addr = start; addr < end; addr += 64) |
| clflush(addr); |
| } |
| |
| static void clear_errors(void) |
| { |
| pcie_write_config8(NORTHBRIDGE, 0xc0, 0x01); |
| } |
| |
| static void write_testing(struct raminfo *info, int totalrank, int flip) |
| { |
| int nwrites = 0; |
| /* in 8-byte units. */ |
| u32 offset; |
| u8 *base; |
| |
| base = (u8 *)(totalrank << 28); |
| for (offset = 0; offset < 9 * 480; offset += 2) { |
| write32(base + offset * 8, get_etalon2(flip, offset)); |
| write32(base + offset * 8 + 4, get_etalon2(flip, offset)); |
| write32(base + offset * 8 + 8, get_etalon2(flip, offset + 1)); |
| write32(base + offset * 8 + 12, get_etalon2(flip, offset + 1)); |
| nwrites += 4; |
| if (nwrites >= 320) { |
| clear_errors(); |
| nwrites = 0; |
| } |
| } |
| } |
| |
| static u8 check_testing(struct raminfo *info, u8 total_rank, int flip) |
| { |
| u8 failmask = 0; |
| int i; |
| int comp1, comp2, comp3; |
| u32 failxor[2] = { 0, 0 }; |
| |
| enable_cache((total_rank << 28), 1728 * 5 * 4); |
| |
| for (comp3 = 0; comp3 < 9 && failmask != 0xff; comp3++) { |
| for (comp1 = 0; comp1 < 4; comp1++) |
| for (comp2 = 0; comp2 < 60; comp2++) { |
| u32 re[4]; |
| u32 curroffset = |
| comp3 * 8 * 60 + 2 * comp1 + 8 * comp2; |
| read128((total_rank << 28) | (curroffset << 3), |
| (u64 *) re); |
| failxor[0] |= |
| get_etalon2(flip, curroffset) ^ re[0]; |
| failxor[1] |= |
| get_etalon2(flip, curroffset) ^ re[1]; |
| failxor[0] |= |
| get_etalon2(flip, curroffset | 1) ^ re[2]; |
| failxor[1] |= |
| get_etalon2(flip, curroffset | 1) ^ re[3]; |
| } |
| for (i = 0; i < 8; i++) |
| if ((0xff << (8 * (i % 4))) & failxor[i / 4]) |
| failmask |= 1 << i; |
| } |
| disable_cache(); |
| flush_cache((total_rank << 28), 1728 * 5 * 4); |
| return failmask; |
| } |
| |
| const u32 seed1[0x18] = { |
| 0x3a9d5ab5, 0x576cb65b, 0x555773b6, 0x2ab772ee, |
| 0x555556ee, 0x3a9d5ab5, 0x576cb65b, 0x555773b6, |
| 0x2ab772ee, 0x555556ee, 0x5155a555, 0x5155a555, |
| 0x5155a555, 0x5155a555, 0x3a9d5ab5, 0x576cb65b, |
| 0x555773b6, 0x2ab772ee, 0x555556ee, 0x55d6b4a5, |
| 0x366d6b3a, 0x2ae5ddbb, 0x3b9ddbb7, 0x55d6b4a5, |
| }; |
| |
| static u32 get_seed2(int a, int b) |
| { |
| const u32 seed2[5] = { |
| 0x55555555, 0x33333333, 0x2e555a55, 0x55555555, |
| 0x5b6db6db, |
| }; |
| u32 r; |
| r = seed2[(a + (a >= 10)) / 5]; |
| return b ? ~r : r; |
| } |
| |
| static int make_shift(int comp2, int comp5, int x) |
| { |
| const u8 seed3[32] = { |
| 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| 0x00, 0x00, 0x38, 0x1c, 0x3c, 0x18, 0x38, 0x38, |
| 0x38, 0x38, 0x38, 0x38, 0x0f, 0x0f, 0x0f, 0x0f, |
| 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, |
| }; |
| |
| return (comp2 - ((seed3[comp5] >> (x & 7)) & 1)) & 0x1f; |
| } |
| |
| static u32 get_etalon(int flip, u32 addr) |
| { |
| u32 mask_byte = 0; |
| int comp1 = (addr >> 1) & 1; |
| int comp2 = (addr >> 3) & 0x1f; |
| int comp3 = (addr >> 8) & 0xf; |
| int comp4 = (addr >> 12) & 0xf; |
| int comp5 = (addr >> 16) & 0x1f; |
| u32 mask_bit = ~(0x10001 << comp3); |
| u32 part1; |
| u32 part2; |
| int byte; |
| |
| part2 = |
| ((seed1[comp5] >> |
| make_shift(comp2, comp5, |
| (comp3 >> 3) | (comp1 << 2) | 2)) & 1) ^ flip; |
| part1 = |
| ((seed1[comp5] >> |
| make_shift(comp2, comp5, |
| (comp3 >> 3) | (comp1 << 2) | 0)) & 1) ^ flip; |
| |
| for (byte = 0; byte < 4; byte++) |
| if ((get_seed2(comp5, comp4) >> |
| make_shift(comp2, comp5, (byte | (comp1 << 2)))) & 1) |
| mask_byte |= 0xff << (8 * byte); |
| |
| return (mask_bit & mask_byte) | (part1 << comp3) | (part2 << |
| (comp3 + 16)); |
| } |
| |
| static void |
| write_testing_type2(struct raminfo *info, u8 totalrank, u8 region, u8 block, |
| char flip) |
| { |
| int i; |
| for (i = 0; i < 2048; i++) |
| write32p((totalrank << 28) | (region << 25) | (block << 16) | |
| (i << 2), get_etalon(flip, (block << 16) | (i << 2))); |
| } |
| |
| static u8 |
| check_testing_type2(struct raminfo *info, u8 totalrank, u8 region, u8 block, |
| char flip) |
| { |
| u8 failmask = 0; |
| u32 failxor[2]; |
| int i; |
| int comp1, comp2, comp3; |
| |
| failxor[0] = 0; |
| failxor[1] = 0; |
| |
| enable_cache(totalrank << 28, 134217728); |
| for (comp3 = 0; comp3 < 2 && failmask != 0xff; comp3++) { |
| for (comp1 = 0; comp1 < 16; comp1++) |
| for (comp2 = 0; comp2 < 64; comp2++) { |
| u32 addr = |
| (totalrank << 28) | (region << 25) | (block |
| << 16) |
| | (comp3 << 12) | (comp2 << 6) | (comp1 << |
| 2); |
| failxor[comp1 & 1] |= |
| read32p(addr) ^ get_etalon(flip, addr); |
| } |
| for (i = 0; i < 8; i++) |
| if ((0xff << (8 * (i % 4))) & failxor[i / 4]) |
| failmask |= 1 << i; |
| } |
| disable_cache(); |
| flush_cache((totalrank << 28) | (region << 25) | (block << 16), 16384); |
| return failmask; |
| } |
| |
| static int check_bounded(unsigned short *vals, u16 bound) |
| { |
| int i; |
| |
| for (i = 0; i < 8; i++) |
| if (vals[i] < bound) |
| return 0; |
| return 1; |
| } |
| |
| enum state { |
| BEFORE_USABLE = 0, AT_USABLE = 1, AT_MARGIN = 2, COMPLETE = 3 |
| }; |
| |
| static int validate_state(enum state *in) |
| { |
| int i; |
| for (i = 0; i < 8; i++) |
| if (in[i] != COMPLETE) |
| return 0; |
| return 1; |
| } |
| |
| static void |
| do_fsm(enum state *state, u16 * counter, |
| u8 fail_mask, int margin, int uplimit, |
| u8 * res_low, u8 * res_high, u8 val) |
| { |
| int lane; |
| |
| for (lane = 0; lane < 8; lane++) { |
| int is_fail = (fail_mask >> lane) & 1; |
| switch (state[lane]) { |
| case BEFORE_USABLE: |
| if (!is_fail) { |
| counter[lane] = 1; |
| state[lane] = AT_USABLE; |
| break; |
| } |
| counter[lane] = 0; |
| state[lane] = BEFORE_USABLE; |
| break; |
| case AT_USABLE: |
| if (!is_fail) { |
| ++counter[lane]; |
| if (counter[lane] >= margin) { |
| state[lane] = AT_MARGIN; |
| res_low[lane] = val - margin + 1; |
| break; |
| } |
| state[lane] = 1; |
| break; |
| } |
| counter[lane] = 0; |
| state[lane] = BEFORE_USABLE; |
| break; |
| case AT_MARGIN: |
| if (is_fail) { |
| state[lane] = COMPLETE; |
| res_high[lane] = val - 1; |
| } else { |
| counter[lane]++; |
| state[lane] = AT_MARGIN; |
| if (val == uplimit) { |
| state[lane] = COMPLETE; |
| res_high[lane] = uplimit; |
| } |
| } |
| break; |
| case COMPLETE: |
| break; |
| } |
| } |
| } |
| |
| static void |
| train_ram_at_178(struct raminfo *info, u8 channel, int slot, int rank, |
| u8 total_rank, u8 reg_178, int first_run, int niter, |
| timing_bounds_t * timings) |
| { |
| int lane; |
| enum state state[8]; |
| u16 count[8]; |
| u8 lower_usable[8]; |
| u8 upper_usable[8]; |
| unsigned short num_sucessfully_checked[8]; |
| u8 secondary_total_rank; |
| u8 reg1b3; |
| |
| if (info->populated_ranks_mask[1]) { |
| if (channel == 1) |
| secondary_total_rank = |
| info->populated_ranks[1][0][0] + |
| info->populated_ranks[1][0][1] |
| + info->populated_ranks[1][1][0] + |
| info->populated_ranks[1][1][1]; |
| else |
| secondary_total_rank = 0; |
| } else |
| secondary_total_rank = total_rank; |
| |
| { |
| int i; |
| for (i = 0; i < 8; i++) |
| state[i] = BEFORE_USABLE; |
| } |
| |
| if (!first_run) { |
| int is_all_ok = 1; |
| for (lane = 0; lane < 8; lane++) |
| if (timings[reg_178][channel][slot][rank][lane]. |
| smallest == |
| timings[reg_178][channel][slot][rank][lane]. |
| largest) { |
| timings[reg_178][channel][slot][rank][lane]. |
| smallest = 0; |
| timings[reg_178][channel][slot][rank][lane]. |
| largest = 0; |
| is_all_ok = 0; |
| } |
| if (is_all_ok) { |
| int i; |
| for (i = 0; i < 8; i++) |
| state[i] = COMPLETE; |
| } |
| } |
| |
| for (reg1b3 = 0; reg1b3 < 0x30 && !validate_state(state); reg1b3++) { |
| u8 failmask = 0; |
| write_1d0(reg1b3 ^ 32, 0x1b3, 6, 1); |
| write_1d0(reg1b3 ^ 32, 0x1a3, 6, 1); |
| failmask = check_testing(info, total_rank, 0); |
| write_mchbar32(0xfb0, read_mchbar32(0xfb0) | 0x00030000); |
| do_fsm(state, count, failmask, 5, 47, lower_usable, |
| upper_usable, reg1b3); |
| } |
| |
| if (reg1b3) { |
| write_1d0(0, 0x1b3, 6, 1); |
| write_1d0(0, 0x1a3, 6, 1); |
| for (lane = 0; lane < 8; lane++) { |
| if (state[lane] == COMPLETE) { |
| timings[reg_178][channel][slot][rank][lane]. |
| smallest = |
| lower_usable[lane] + |
| (info->training. |
| lane_timings[0][channel][slot][rank][lane] |
| & 0x3F) - 32; |
| timings[reg_178][channel][slot][rank][lane]. |
| largest = |
| upper_usable[lane] + |
| (info->training. |
| lane_timings[0][channel][slot][rank][lane] |
| & 0x3F) - 32; |
| } |
| } |
| } |
| |
| if (!first_run) { |
| for (lane = 0; lane < 8; lane++) |
| if (state[lane] == COMPLETE) { |
| write_500(info, channel, |
| timings[reg_178][channel][slot][rank] |
| [lane].smallest, |
| get_timing_register_addr(lane, 0, |
| slot, rank), |
| 9, 1); |
| write_500(info, channel, |
| timings[reg_178][channel][slot][rank] |
| [lane].smallest + |
| info->training. |
| lane_timings[1][channel][slot][rank] |
| [lane] |
| - |
| info->training. |
| lane_timings[0][channel][slot][rank] |
| [lane], get_timing_register_addr(lane, |
| 1, |
| slot, |
| rank), |
| 9, 1); |
| num_sucessfully_checked[lane] = 0; |
| } else |
| num_sucessfully_checked[lane] = -1; |
| |
| do { |
| u8 failmask = 0; |
| int i; |
| for (i = 0; i < niter; i++) { |
| if (failmask == 0xFF) |
| break; |
| failmask |= |
| check_testing_type2(info, total_rank, 2, i, |
| 0); |
| failmask |= |
| check_testing_type2(info, total_rank, 3, i, |
| 1); |
| } |
| write_mchbar32(0xfb0, |
| read_mchbar32(0xfb0) | 0x00030000); |
| for (lane = 0; lane < 8; lane++) |
| if (num_sucessfully_checked[lane] != 0xffff) { |
| if ((1 << lane) & failmask) { |
| if (timings[reg_178][channel] |
| [slot][rank][lane]. |
| largest <= |
| timings[reg_178][channel] |
| [slot][rank][lane].smallest) |
| num_sucessfully_checked |
| [lane] = -1; |
| else { |
| num_sucessfully_checked |
| [lane] = 0; |
| timings[reg_178] |
| [channel][slot] |
| [rank][lane]. |
| smallest++; |
| write_500(info, channel, |
| timings |
| [reg_178] |
| [channel] |
| [slot][rank] |
| [lane]. |
| smallest, |
| get_timing_register_addr |
| (lane, 0, |
| slot, rank), |
| 9, 1); |
| write_500(info, channel, |
| timings |
| [reg_178] |
| [channel] |
| [slot][rank] |
| [lane]. |
| smallest + |
| info-> |
| training. |
| lane_timings |
| [1][channel] |
| [slot][rank] |
| [lane] |
| - |
| info-> |
| training. |
| lane_timings |
| [0][channel] |
| [slot][rank] |
| [lane], |
| get_timing_register_addr |
| (lane, 1, |
| slot, rank), |
| 9, 1); |
| } |
| } else |
| num_sucessfully_checked[lane]++; |
| } |
| } |
| while (!check_bounded(num_sucessfully_checked, 2)); |
| |
| for (lane = 0; lane < 8; lane++) |
| if (state[lane] == COMPLETE) { |
| write_500(info, channel, |
| timings[reg_178][channel][slot][rank] |
| [lane].largest, |
| get_timing_register_addr(lane, 0, |
| slot, rank), |
| 9, 1); |
| write_500(info, channel, |
| timings[reg_178][channel][slot][rank] |
| [lane].largest + |
| info->training. |
| lane_timings[1][channel][slot][rank] |
| [lane] |
| - |
| info->training. |
| lane_timings[0][channel][slot][rank] |
| [lane], get_timing_register_addr(lane, |
| 1, |
| slot, |
| rank), |
| 9, 1); |
| num_sucessfully_checked[lane] = 0; |
| } else |
| num_sucessfully_checked[lane] = -1; |
| |
| do { |
| int failmask = 0; |
| int i; |
| for (i = 0; i < niter; i++) { |
| if (failmask == 0xFF) |
| break; |
| failmask |= |
| check_testing_type2(info, total_rank, 2, i, |
| 0); |
| failmask |= |
| check_testing_type2(info, total_rank, 3, i, |
| 1); |
| } |
| |
| write_mchbar32(0xfb0, |
| read_mchbar32(0xfb0) | 0x00030000); |
| for (lane = 0; lane < 8; lane++) { |
| if (num_sucessfully_checked[lane] != 0xffff) { |
| if ((1 << lane) & failmask) { |
| if (timings[reg_178][channel] |
| [slot][rank][lane]. |
| largest <= |
| timings[reg_178][channel] |
| [slot][rank][lane]. |
| smallest) { |
| num_sucessfully_checked |
| [lane] = -1; |
| } else { |
| num_sucessfully_checked |
| [lane] = 0; |
| timings[reg_178] |
| [channel][slot] |
| [rank][lane]. |
| largest--; |
| write_500(info, channel, |
| timings |
| [reg_178] |
| [channel] |
| [slot][rank] |
| [lane]. |
| largest, |
| get_timing_register_addr |
| (lane, 0, |
| slot, rank), |
| 9, 1); |
| write_500(info, channel, |
| timings |
| [reg_178] |
| [channel] |
| [slot][rank] |
| [lane]. |
| largest + |
| info-> |
| training. |
| lane_timings |
| [1][channel] |
| [slot][rank] |
| [lane] |
| - |
| info-> |
| training. |
| lane_timings |
| [0][channel] |
| [slot][rank] |
| [lane], |
| get_timing_register_addr |
| (lane, 1, |
| slot, rank), |
| 9, 1); |
| } |
| } else |
| num_sucessfully_checked[lane]++; |
| } |
| } |
| } |
| while (!check_bounded(num_sucessfully_checked, 3)); |
| |
| for (lane = 0; lane < 8; lane++) { |
| write_500(info, channel, |
| info->training. |
| lane_timings[0][channel][slot][rank][lane], |
| get_timing_register_addr(lane, 0, slot, rank), |
| 9, 1); |
| write_500(info, channel, |
| info->training. |
| lane_timings[1][channel][slot][rank][lane], |
| get_timing_register_addr(lane, 1, slot, rank), |
| 9, 1); |
| if (timings[reg_178][channel][slot][rank][lane]. |
| largest <= |
| timings[reg_178][channel][slot][rank][lane]. |
| smallest) { |
| timings[reg_178][channel][slot][rank][lane]. |
| largest = 0; |
| timings[reg_178][channel][slot][rank][lane]. |
| smallest = 0; |
| } |
| } |
| } |
| } |
| |
| static void set_10b(struct raminfo *info, u8 val) |
| { |
| int channel; |
| int slot, rank; |
| int lane; |
| |
| if (read_1d0(0x10b, 6) == val) |
| return; |
| |
| write_1d0(val, 0x10b, 6, 1); |
| |
| FOR_POPULATED_RANKS_BACKWARDS for (lane = 0; lane < 9; lane++) { |
| u16 reg_500; |
| reg_500 = read_500(info, channel, |
| get_timing_register_addr(lane, 0, slot, |
| rank), 9); |
| if (val == 1) { |
| if (lut16[info->clock_speed_index] <= reg_500) |
| reg_500 -= lut16[info->clock_speed_index]; |
| else |
| reg_500 = 0; |
| } else { |
| reg_500 += lut16[info->clock_speed_index]; |
| } |
| write_500(info, channel, reg_500, |
| get_timing_register_addr(lane, 0, slot, rank), 9, 1); |
| } |
| } |
| |
| static void set_ecc(int onoff) |
| { |
| int channel; |
| for (channel = 0; channel < NUM_CHANNELS; channel++) { |
| u8 t; |
| t = read_mchbar8((channel << 10) + 0x5f8); |
| if (onoff) |
| t |= 1; |
| else |
| t &= ~1; |
| write_mchbar8((channel << 10) + 0x5f8, t); |
| |