blob: 4bc95ba27d87f11e8760c989fc6d2fa227a80034 [file] [log] [blame]
/*
* This file is part of the coreboot project.
*
* Copyright (C) 2013 Vladimir Serbinenko.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
/* Please don't remove this. It's needed it to do debugging
and reverse engineering to support in futur more nehalem variants. */
#ifndef REAL
#define REAL 1
#endif
#if REAL
#include <stdlib.h>
#include <console/console.h>
#include <string.h>
#include <arch/io.h>
#include <cpu/x86/msr.h>
#include <cbmem.h>
#include <arch/cbfs.h>
#include <cbfs.h>
#include <ip_checksum.h>
#include <pc80/mc146818rtc.h>
#include <device/pci_def.h>
#include <arch/cpu.h>
#include <halt.h>
#include <spd.h>
#include "raminit.h"
#include <timestamp.h>
#include <cpu/x86/mtrr.h>
#include <cpu/intel/speedstep.h>
#include <cpu/intel/turbo.h>
#endif
#if !REAL
typedef unsigned char u8;
typedef unsigned short u16;
typedef unsigned int u32;
typedef u32 device_t;
#endif
#include "nehalem.h"
#include "southbridge/intel/ibexpeak/me.h"
#if REAL
#include <delay.h>
#endif
#define NORTHBRIDGE PCI_DEV(0, 0, 0)
#define SOUTHBRIDGE PCI_DEV(0, 0x1f, 0)
#define GMA PCI_DEV (0, 0x2, 0x0)
#define HECIDEV PCI_DEV(0, 0x16, 0)
#define HECIBAR 0x10
#define FOR_ALL_RANKS \
for (channel = 0; channel < NUM_CHANNELS; channel++) \
for (slot = 0; slot < NUM_SLOTS; slot++) \
for (rank = 0; rank < NUM_RANKS; rank++)
#define FOR_POPULATED_RANKS \
for (channel = 0; channel < NUM_CHANNELS; channel++) \
for (slot = 0; slot < NUM_SLOTS; slot++) \
for (rank = 0; rank < NUM_RANKS; rank++) \
if (info->populated_ranks[channel][slot][rank])
#define FOR_POPULATED_RANKS_BACKWARDS \
for (channel = NUM_CHANNELS - 1; channel >= 0; channel--) \
for (slot = 0; slot < NUM_SLOTS; slot++) \
for (rank = 0; rank < NUM_RANKS; rank++) \
if (info->populated_ranks[channel][slot][rank])
/* [REG_178][CHANNEL][2 * SLOT + RANK][LANE] */
typedef struct {
u8 smallest;
u8 largest;
} timing_bounds_t[2][2][2][9];
struct ram_training {
/* [TM][CHANNEL][SLOT][RANK][LANE] */
u16 lane_timings[4][2][2][2][9];
u16 reg_178;
u16 reg_10b;
u8 reg178_center;
u8 reg178_smallest;
u8 reg178_largest;
timing_bounds_t timing_bounds[2];
u16 timing_offset[2][2][2][9];
u16 timing2_offset[2][2][2][9];
u16 timing2_bounds[2][2][2][9][2];
u8 reg274265[2][3]; /* [CHANNEL][REGISTER] */
u8 reg2ca9_bit0;
u32 reg_6dc;
u32 reg_6e8;
};
#if !REAL
#include "raminit_fake.c"
#else
#include <lib.h> /* Prototypes */
static inline void write_mchbar32(u32 addr, u32 val)
{
MCHBAR32(addr) = val;
}
static inline void write_mchbar16(u32 addr, u16 val)
{
MCHBAR16(addr) = val;
}
static inline void write_mchbar8(u32 addr, u8 val)
{
MCHBAR8(addr) = val;
}
static inline u32 read_mchbar32(u32 addr)
{
return MCHBAR32(addr);
}
static inline u16 read_mchbar16(u32 addr)
{
return MCHBAR16(addr);
}
static inline u8 read_mchbar8(u32 addr)
{
return MCHBAR8(addr);
}
static void clflush(u32 addr)
{
asm volatile ("clflush (%0)"::"r" (addr));
}
typedef struct _u128 {
u64 lo;
u64 hi;
} u128;
static void read128(u32 addr, u64 * out)
{
u128 ret;
u128 stor;
asm volatile ("movdqu %%xmm0, %0\n"
"movdqa (%2), %%xmm0\n"
"movdqu %%xmm0, %1\n"
"movdqu %0, %%xmm0":"+m" (stor), "=m"(ret):"r"(addr));
out[0] = ret.lo;
out[1] = ret.hi;
}
#endif
/* OK */
static void write_1d0(u32 val, u16 addr, int bits, int flag)
{
write_mchbar32(0x1d0, 0);
while (read_mchbar32(0x1d0) & 0x800000) ;
write_mchbar32(0x1d4,
(val & ((1 << bits) - 1)) | (2 << bits) | (flag <<
bits));
write_mchbar32(0x1d0, 0x40000000 | addr);
while (read_mchbar32(0x1d0) & 0x800000) ;
}
/* OK */
static u16 read_1d0(u16 addr, int split)
{
u32 val;
write_mchbar32(0x1d0, 0);
while (read_mchbar32(0x1d0) & 0x800000) ;
write_mchbar32(0x1d0,
0x80000000 | (((read_mchbar8(0x246) >> 2) & 3) +
0x361 - addr));
while (read_mchbar32(0x1d0) & 0x800000) ;
val = read_mchbar32(0x1d8);
write_1d0(0, 0x33d, 0, 0);
write_1d0(0, 0x33d, 0, 0);
val &= ((1 << split) - 1);
// printk (BIOS_ERR, "R1D0C [%x] => %x\n", addr, val);
return val;
}
static void write32p(uintptr_t addr, uint32_t val)
{
write32((void *)addr, val);
}
static uint32_t read32p(uintptr_t addr)
{
return read32((void *)addr);
}
static void sfence(void)
{
#if REAL
asm volatile ("sfence");
#endif
}
static inline u16 get_lane_offset(int slot, int rank, int lane)
{
return 0x124 * lane + ((lane & 4) ? 0x23e : 0) + 11 * rank + 22 * slot -
0x452 * (lane == 8);
}
static inline u16 get_timing_register_addr(int lane, int tm, int slot, int rank)
{
const u16 offs[] = { 0x1d, 0xa8, 0xe6, 0x5c };
return get_lane_offset(slot, rank, lane) + offs[(tm + 3) % 4];
}
#if REAL
static u32 gav_real(int line, u32 in)
{
// printk (BIOS_DEBUG, "%d: GAV: %x\n", line, in);
return in;
}
#define gav(x) gav_real (__LINE__, (x))
#endif
struct raminfo {
u16 clock_speed_index; /* clock_speed (REAL, not DDR) / 133.(3) - 3 */
u16 fsb_frequency; /* in 1.(1)/2 MHz. */
u8 is_x16_module[2][2]; /* [CHANNEL][SLOT] */
u8 density[2][2]; /* [CHANNEL][SLOT] */
u8 populated_ranks[2][2][2]; /* [CHANNEL][SLOT][RANK] */
int rank_start[2][2][2];
u8 cas_latency;
u8 board_lane_delay[9];
u8 use_ecc;
u8 revision;
u8 max_supported_clock_speed_index;
u8 uma_enabled;
u8 spd[2][2][151]; /* [CHANNEL][SLOT][BYTE] */
u8 silicon_revision;
u8 populated_ranks_mask[2];
u8 max_slots_used_in_channel;
u8 mode4030[2];
u16 avg4044[2];
u16 max4048[2];
unsigned total_memory_mb;
unsigned interleaved_part_mb;
unsigned non_interleaved_part_mb;
u32 heci_bar;
u64 heci_uma_addr;
unsigned memory_reserved_for_heci_mb;
struct ram_training training;
u32 last_500_command[2];
u32 delay46_ps[2];
u32 delay54_ps[2];
u8 revision_flag_1;
u8 some_delay_1_cycle_floor;
u8 some_delay_2_halfcycles_ceil;
u8 some_delay_3_ps_rounded;
const struct ram_training *cached_training;
};
static void
write_500(struct raminfo *info, int channel, u32 val, u16 addr, int bits,
int flag);
/* OK */
static u16
read_500(struct raminfo *info, int channel, u16 addr, int split)
{
u32 val;
info->last_500_command[channel] = 0x80000000;
write_mchbar32(0x500 + (channel << 10), 0);
while (read_mchbar32(0x500 + (channel << 10)) & 0x800000) ;
write_mchbar32(0x500 + (channel << 10),
0x80000000 |
(((read_mchbar8(0x246 + (channel << 10)) >> 2) &
3) + 0xb88 - addr));
while (read_mchbar32(0x500 + (channel << 10)) & 0x800000) ;
val = read_mchbar32(0x508 + (channel << 10));
return val & ((1 << split) - 1);
}
/* OK */
static void
write_500(struct raminfo *info, int channel, u32 val, u16 addr, int bits,
int flag)
{
if (info->last_500_command[channel] == 0x80000000) {
info->last_500_command[channel] = 0x40000000;
write_500(info, channel, 0, 0xb61, 0, 0);
}
write_mchbar32(0x500 + (channel << 10), 0);
while (read_mchbar32(0x500 + (channel << 10)) & 0x800000) ;
write_mchbar32(0x504 + (channel << 10),
(val & ((1 << bits) - 1)) | (2 << bits) | (flag <<
bits));
write_mchbar32(0x500 + (channel << 10), 0x40000000 | addr);
while (read_mchbar32(0x500 + (channel << 10)) & 0x800000) ;
}
static int rw_test(int rank)
{
const u32 mask = 0xf00fc33c;
int ok = 0xff;
int i;
for (i = 0; i < 64; i++)
write32p((rank << 28) | (i << 2), 0);
sfence();
for (i = 0; i < 64; i++)
gav(read32p((rank << 28) | (i << 2)));
sfence();
for (i = 0; i < 32; i++) {
u32 pat = (((mask >> i) & 1) ? 0xffffffff : 0);
write32p((rank << 28) | (i << 3), pat);
write32p((rank << 28) | (i << 3) | 4, pat);
}
sfence();
for (i = 0; i < 32; i++) {
u8 pat = (((mask >> i) & 1) ? 0xff : 0);
int j;
u32 val;
gav(val = read32p((rank << 28) | (i << 3)));
for (j = 0; j < 4; j++)
if (((val >> (j * 8)) & 0xff) != pat)
ok &= ~(1 << j);
gav(val = read32p((rank << 28) | (i << 3) | 4));
for (j = 0; j < 4; j++)
if (((val >> (j * 8)) & 0xff) != pat)
ok &= ~(16 << j);
}
sfence();
for (i = 0; i < 64; i++)
write32p((rank << 28) | (i << 2), 0);
sfence();
for (i = 0; i < 64; i++)
gav(read32p((rank << 28) | (i << 2)));
return ok;
}
static void
program_timings(struct raminfo *info, u16 base, int channel, int slot, int rank)
{
int lane;
for (lane = 0; lane < 8; lane++) {
write_500(info, channel,
base +
info->training.
lane_timings[2][channel][slot][rank][lane],
get_timing_register_addr(lane, 2, slot, rank), 9, 0);
write_500(info, channel,
base +
info->training.
lane_timings[3][channel][slot][rank][lane],
get_timing_register_addr(lane, 3, slot, rank), 9, 0);
}
}
static void write_26c(int channel, u16 si)
{
write_mchbar32(0x26c + (channel << 10), 0x03243f35);
write_mchbar32(0x268 + (channel << 10), 0xcfc00000 | (si << 9));
write_mchbar16(0x2b9 + (channel << 10), si);
}
static u32 get_580(int channel, u8 addr)
{
u32 ret;
gav(read_1d0(0x142, 3));
write_mchbar8(0x5ff, 0x0); /* OK */
write_mchbar8(0x5ff, 0x80); /* OK */
write_mchbar32(0x580 + (channel << 10), 0x8493c012 | addr);
write_mchbar8(0x580 + (channel << 10),
read_mchbar8(0x580 + (channel << 10)) | 1);
while (!((ret = read_mchbar32(0x580 + (channel << 10))) & 0x10000)) ;
write_mchbar8(0x580 + (channel << 10),
read_mchbar8(0x580 + (channel << 10)) & ~1);
return ret;
}
const int cached_config = 0;
#define NUM_CHANNELS 2
#define NUM_SLOTS 2
#define NUM_RANKS 2
#define RANK_SHIFT 28
#define CHANNEL_SHIFT 10
#include "raminit_tables.c"
static void seq9(struct raminfo *info, int channel, int slot, int rank)
{
int i, lane;
for (i = 0; i < 2; i++)
for (lane = 0; lane < 8; lane++)
write_500(info, channel,
info->training.lane_timings[i +
1][channel][slot]
[rank][lane], get_timing_register_addr(lane,
i + 1,
slot,
rank),
9, 0);
write_1d0(1, 0x103, 6, 1);
for (lane = 0; lane < 8; lane++)
write_500(info, channel,
info->training.
lane_timings[0][channel][slot][rank][lane],
get_timing_register_addr(lane, 0, slot, rank), 9, 0);
for (i = 0; i < 2; i++) {
for (lane = 0; lane < 8; lane++)
write_500(info, channel,
info->training.lane_timings[i +
1][channel][slot]
[rank][lane], get_timing_register_addr(lane,
i + 1,
slot,
rank),
9, 0);
gav(get_580(channel, ((i + 1) << 2) | (rank << 5)));
}
gav(read_1d0(0x142, 3)); // = 0x10408118
write_mchbar8(0x5ff, 0x0); /* OK */
write_mchbar8(0x5ff, 0x80); /* OK */
write_1d0(0x2, 0x142, 3, 1);
for (lane = 0; lane < 8; lane++) {
// printk (BIOS_ERR, "before: %x\n", info->training.lane_timings[2][channel][slot][rank][lane]);
info->training.lane_timings[2][channel][slot][rank][lane] =
read_500(info, channel,
get_timing_register_addr(lane, 2, slot, rank), 9);
//printk (BIOS_ERR, "after: %x\n", info->training.lane_timings[2][channel][slot][rank][lane]);
info->training.lane_timings[3][channel][slot][rank][lane] =
info->training.lane_timings[2][channel][slot][rank][lane] +
0x20;
}
}
static int count_ranks_in_channel(struct raminfo *info, int channel)
{
int slot, rank;
int res = 0;
for (slot = 0; slot < NUM_SLOTS; slot++)
for (rank = 0; rank < NUM_SLOTS; rank++)
res += info->populated_ranks[channel][slot][rank];
return res;
}
static void
config_rank(struct raminfo *info, int s3resume, int channel, int slot, int rank)
{
int add;
write_1d0(0, 0x178, 7, 1);
seq9(info, channel, slot, rank);
program_timings(info, 0x80, channel, slot, rank);
if (channel == 0)
add = count_ranks_in_channel(info, 1);
else
add = 0;
if (!s3resume)
gav(rw_test(rank + add));
program_timings(info, 0x00, channel, slot, rank);
if (!s3resume)
gav(rw_test(rank + add));
if (!s3resume)
gav(rw_test(rank + add));
write_1d0(0, 0x142, 3, 1);
write_1d0(0, 0x103, 6, 1);
gav(get_580(channel, 0xc | (rank << 5)));
gav(read_1d0(0x142, 3));
write_mchbar8(0x5ff, 0x0); /* OK */
write_mchbar8(0x5ff, 0x80); /* OK */
}
static void set_4cf(struct raminfo *info, int channel, u8 val)
{
gav(read_500(info, channel, 0x4cf, 4)); // = 0xc2300cf9
write_500(info, channel, val, 0x4cf, 4, 1);
gav(read_500(info, channel, 0x659, 4)); // = 0x80300839
write_500(info, channel, val, 0x659, 4, 1);
gav(read_500(info, channel, 0x697, 4)); // = 0x80300839
write_500(info, channel, val, 0x697, 4, 1);
}
static void set_334(int zero)
{
int j, k, channel;
const u32 val3[] = { 0x2a2b2a2b, 0x26272627, 0x2e2f2e2f, 0x2a2b };
u32 vd8[2][16];
for (channel = 0; channel < NUM_CHANNELS; channel++) {
for (j = 0; j < 4; j++) {
u32 a = (j == 1) ? 0x29292929 : 0x31313131;
u32 lmask = (j == 3) ? 0xffff : 0xffffffff;
u16 c;
if ((j == 0 || j == 3) && zero)
c = 0;
else if (j == 3)
c = 0x5f;
else
c = 0x5f5f;
for (k = 0; k < 2; k++) {
write_mchbar32(0x138 + 8 * k,
(channel << 26) | (j << 24));
gav(vd8[1][(channel << 3) | (j << 1) | k] =
read_mchbar32(0x138 + 8 * k));
gav(vd8[0][(channel << 3) | (j << 1) | k] =
read_mchbar32(0x13c + 8 * k));
}
write_mchbar32(0x334 + (channel << 10) + (j * 0x44),
zero ? 0 : val3[j]);
write_mchbar32(0x32c + (channel << 10) + (j * 0x44),
zero ? 0 : (0x18191819 & lmask));
write_mchbar16(0x34a + (channel << 10) + (j * 0x44), c);
write_mchbar32(0x33c + (channel << 10) + (j * 0x44),
zero ? 0 : (a & lmask));
write_mchbar32(0x344 + (channel << 10) + (j * 0x44),
zero ? 0 : (a & lmask));
}
}
write_mchbar32(0x130, read_mchbar32(0x130) | 1); /* OK */
while (read_mchbar8(0x130) & 1) ; /* OK */
}
static void rmw_1d0(u16 addr, u32 and, u32 or, int split, int flag)
{
u32 v;
v = read_1d0(addr, split);
write_1d0((v & and) | or, addr, split, flag);
}
static int find_highest_bit_set(u16 val)
{
int i;
for (i = 15; i >= 0; i--)
if (val & (1 << i))
return i;
return -1;
}
static int find_lowest_bit_set32(u32 val)
{
int i;
for (i = 0; i < 32; i++)
if (val & (1 << i))
return i;
return -1;
}
enum {
DEVICE_TYPE = 2,
MODULE_TYPE = 3,
DENSITY = 4,
RANKS_AND_DQ = 7,
MEMORY_BUS_WIDTH = 8,
TIMEBASE_DIVIDEND = 10,
TIMEBASE_DIVISOR = 11,
CYCLETIME = 12,
CAS_LATENCIES_LSB = 14,
CAS_LATENCIES_MSB = 15,
CAS_LATENCY_TIME = 16,
THERMAL_AND_REFRESH = 31,
REFERENCE_RAW_CARD_USED = 62,
RANK1_ADDRESS_MAPPING = 63
};
static void calculate_timings(struct raminfo *info)
{
unsigned cycletime;
unsigned cas_latency_time;
unsigned supported_cas_latencies;
unsigned channel, slot;
unsigned clock_speed_index;
unsigned min_cas_latency;
unsigned cas_latency;
unsigned max_clock_index;
/* Find common CAS latency */
supported_cas_latencies = 0x3fe;
for (channel = 0; channel < NUM_CHANNELS; channel++)
for (slot = 0; slot < NUM_SLOTS; slot++)
if (info->populated_ranks[channel][slot][0])
supported_cas_latencies &=
2 *
(info->
spd[channel][slot][CAS_LATENCIES_LSB] |
(info->
spd[channel][slot][CAS_LATENCIES_MSB] <<
8));
max_clock_index = min(3, info->max_supported_clock_speed_index);
cycletime = min_cycletime[max_clock_index];
cas_latency_time = min_cas_latency_time[max_clock_index];
for (channel = 0; channel < NUM_CHANNELS; channel++)
for (slot = 0; slot < NUM_SLOTS; slot++)
if (info->populated_ranks[channel][slot][0]) {
unsigned timebase;
timebase =
1000 *
info->
spd[channel][slot][TIMEBASE_DIVIDEND] /
info->spd[channel][slot][TIMEBASE_DIVISOR];
cycletime =
max(cycletime,
timebase *
info->spd[channel][slot][CYCLETIME]);
cas_latency_time =
max(cas_latency_time,
timebase *
info->
spd[channel][slot][CAS_LATENCY_TIME]);
}
for (clock_speed_index = 0; clock_speed_index < 3; clock_speed_index++) {
if (cycletime == min_cycletime[clock_speed_index])
break;
if (cycletime > min_cycletime[clock_speed_index]) {
clock_speed_index--;
cycletime = min_cycletime[clock_speed_index];
break;
}
}
min_cas_latency = CEIL_DIV(cas_latency_time, cycletime);
cas_latency = 0;
while (supported_cas_latencies) {
cas_latency = find_highest_bit_set(supported_cas_latencies) + 3;
if (cas_latency <= min_cas_latency)
break;
supported_cas_latencies &=
~(1 << find_highest_bit_set(supported_cas_latencies));
}
if (cas_latency != min_cas_latency && clock_speed_index)
clock_speed_index--;
if (cas_latency * min_cycletime[clock_speed_index] > 20000)
die("Couldn't configure DRAM");
info->clock_speed_index = clock_speed_index;
info->cas_latency = cas_latency;
}
static void program_base_timings(struct raminfo *info)
{
unsigned channel;
unsigned slot, rank, lane;
unsigned extended_silicon_revision;
int i;
extended_silicon_revision = info->silicon_revision;
if (info->silicon_revision == 0)
for (channel = 0; channel < NUM_CHANNELS; channel++)
for (slot = 0; slot < NUM_SLOTS; slot++)
if ((info->
spd[channel][slot][MODULE_TYPE] & 0xF) ==
3)
extended_silicon_revision = 4;
for (channel = 0; channel < NUM_CHANNELS; channel++) {
for (slot = 0; slot < NUM_SLOTS; slot++)
for (rank = 0; rank < NUM_SLOTS; rank++) {
int card_timing_2;
if (!info->populated_ranks[channel][slot][rank])
continue;
for (lane = 0; lane < 9; lane++) {
int tm_reg;
int card_timing;
card_timing = 0;
if ((info->
spd[channel][slot][MODULE_TYPE] &
0xF) == 3) {
int reference_card;
reference_card =
info->
spd[channel][slot]
[REFERENCE_RAW_CARD_USED] &
0x1f;
if (reference_card == 3)
card_timing =
u16_ffd1188[0][lane]
[info->
clock_speed_index];
if (reference_card == 5)
card_timing =
u16_ffd1188[1][lane]
[info->
clock_speed_index];
}
info->training.
lane_timings[0][channel][slot][rank]
[lane] =
u8_FFFD1218[info->
clock_speed_index];
info->training.
lane_timings[1][channel][slot][rank]
[lane] = 256;
for (tm_reg = 2; tm_reg < 4; tm_reg++)
info->training.
lane_timings[tm_reg]
[channel][slot][rank][lane]
=
u8_FFFD1240[channel]
[extended_silicon_revision]
[lane][2 * slot +
rank][info->
clock_speed_index]
+ info->max4048[channel]
+
u8_FFFD0C78[channel]
[extended_silicon_revision]
[info->
mode4030[channel]][slot]
[rank][info->
clock_speed_index]
+ card_timing;
for (tm_reg = 0; tm_reg < 4; tm_reg++)
write_500(info, channel,
info->training.
lane_timings[tm_reg]
[channel][slot][rank]
[lane],
get_timing_register_addr
(lane, tm_reg, slot,
rank), 9, 0);
}
card_timing_2 = 0;
if (!(extended_silicon_revision != 4
|| (info->
populated_ranks_mask[channel] & 5) ==
5)) {
if ((info->
spd[channel][slot]
[REFERENCE_RAW_CARD_USED] & 0x1F)
== 3)
card_timing_2 =
u16_FFFE0EB8[0][info->
clock_speed_index];
if ((info->
spd[channel][slot]
[REFERENCE_RAW_CARD_USED] & 0x1F)
== 5)
card_timing_2 =
u16_FFFE0EB8[1][info->
clock_speed_index];
}
for (i = 0; i < 3; i++)
write_500(info, channel,
(card_timing_2 +
info->max4048[channel]
+
u8_FFFD0EF8[channel]
[extended_silicon_revision]
[info->
mode4030[channel]][info->
clock_speed_index]),
u16_fffd0c50[i][slot][rank],
8, 1);
write_500(info, channel,
(info->max4048[channel] +
u8_FFFD0C78[channel]
[extended_silicon_revision][info->
mode4030
[channel]]
[slot][rank][info->
clock_speed_index]),
u16_fffd0c70[slot][rank], 7, 1);
}
if (!info->populated_ranks_mask[channel])
continue;
for (i = 0; i < 3; i++)
write_500(info, channel,
(info->max4048[channel] +
info->avg4044[channel]
+
u8_FFFD17E0[channel]
[extended_silicon_revision][info->
mode4030
[channel]][info->
clock_speed_index]),
u16_fffd0c68[i], 8, 1);
}
}
static unsigned int fsbcycle_ps(struct raminfo *info)
{
return 900000 / info->fsb_frequency;
}
/* The time of DDR transfer in ps. */
static unsigned int halfcycle_ps(struct raminfo *info)
{
return 3750 / (info->clock_speed_index + 3);
}
/* The time of clock cycle in ps. */
static unsigned int cycle_ps(struct raminfo *info)
{
return 2 * halfcycle_ps(info);
}
/* Frequency in 1.(1)=10/9 MHz units. */
static unsigned frequency_11(struct raminfo *info)
{
return (info->clock_speed_index + 3) * 120;
}
/* Frequency in 0.1 MHz units. */
static unsigned frequency_01(struct raminfo *info)
{
return 100 * frequency_11(info) / 9;
}
static unsigned ps_to_halfcycles(struct raminfo *info, unsigned int ps)
{
return (frequency_11(info) * 2) * ps / 900000;
}
static unsigned ns_to_cycles(struct raminfo *info, unsigned int ns)
{
return (frequency_11(info)) * ns / 900;
}
static void compute_derived_timings(struct raminfo *info)
{
unsigned channel, slot, rank;
int extended_silicon_revision;
int some_delay_1_ps;
int some_delay_2_ps;
int some_delay_2_halfcycles_ceil;
int some_delay_2_halfcycles_floor;
int some_delay_3_ps;
int some_delay_3_halfcycles;
int some_delay_3_ps_rounded;
int some_delay_1_cycle_ceil;
int some_delay_1_cycle_floor;
some_delay_3_halfcycles = 0;
some_delay_3_ps_rounded = 0;
extended_silicon_revision = info->silicon_revision;
if (!info->silicon_revision)
for (channel = 0; channel < NUM_CHANNELS; channel++)
for (slot = 0; slot < NUM_SLOTS; slot++)
if ((info->
spd[channel][slot][MODULE_TYPE] & 0xF) ==
3)
extended_silicon_revision = 4;
if (info->board_lane_delay[7] < 5)
info->board_lane_delay[7] = 5;
info->revision_flag_1 = 2;
if (info->silicon_revision == 2 || info->silicon_revision == 3)
info->revision_flag_1 = 0;
if (info->revision < 16)
info->revision_flag_1 = 0;
if (info->revision < 8)
info->revision_flag_1 = 0;
if (info->revision >= 8 && (info->silicon_revision == 0
|| info->silicon_revision == 1))
some_delay_2_ps = 735;
else
some_delay_2_ps = 750;
if (info->revision >= 0x10 && (info->silicon_revision == 0
|| info->silicon_revision == 1))
some_delay_1_ps = 3929;
else
some_delay_1_ps = 3490;
some_delay_1_cycle_floor = some_delay_1_ps / cycle_ps(info);
some_delay_1_cycle_ceil = some_delay_1_ps / cycle_ps(info);
if (some_delay_1_ps % cycle_ps(info))
some_delay_1_cycle_ceil++;
else
some_delay_1_cycle_floor--;
info->some_delay_1_cycle_floor = some_delay_1_cycle_floor;
if (info->revision_flag_1)
some_delay_2_ps = halfcycle_ps(info) >> 6;
some_delay_2_ps +=
max(some_delay_1_ps - 30,
2 * halfcycle_ps(info) * (some_delay_1_cycle_ceil - 1) + 1000) +
375;
some_delay_3_ps =
halfcycle_ps(info) - some_delay_2_ps % halfcycle_ps(info);
if (info->revision_flag_1) {
if (some_delay_3_ps < 150)
some_delay_3_halfcycles = 0;
else
some_delay_3_halfcycles =
(some_delay_3_ps << 6) / halfcycle_ps(info);
some_delay_3_ps_rounded =
halfcycle_ps(info) * some_delay_3_halfcycles >> 6;
}
some_delay_2_halfcycles_ceil =
(some_delay_2_ps + halfcycle_ps(info) - 1) / halfcycle_ps(info) -
2 * (some_delay_1_cycle_ceil - 1);
if (info->revision_flag_1 && some_delay_3_ps < 150)
some_delay_2_halfcycles_ceil++;
some_delay_2_halfcycles_floor = some_delay_2_halfcycles_ceil;
if (info->revision < 0x10)
some_delay_2_halfcycles_floor =
some_delay_2_halfcycles_ceil - 1;
if (!info->revision_flag_1)
some_delay_2_halfcycles_floor++;
info->some_delay_2_halfcycles_ceil = some_delay_2_halfcycles_ceil;
info->some_delay_3_ps_rounded = some_delay_3_ps_rounded;
if ((info->populated_ranks[0][0][0] && info->populated_ranks[0][1][0])
|| (info->populated_ranks[1][0][0]
&& info->populated_ranks[1][1][0]))
info->max_slots_used_in_channel = 2;
else
info->max_slots_used_in_channel = 1;
for (channel = 0; channel < 2; channel++)
write_mchbar32(0x244 + (channel << 10),
((info->revision < 8) ? 1 : 0x200)
| ((2 - info->max_slots_used_in_channel) << 17) |
(channel << 21) | (info->
some_delay_1_cycle_floor <<
18) | 0x9510);
if (info->max_slots_used_in_channel == 1) {
info->mode4030[0] = (count_ranks_in_channel(info, 0) == 2);
info->mode4030[1] = (count_ranks_in_channel(info, 1) == 2);
} else {
info->mode4030[0] = ((count_ranks_in_channel(info, 0) == 1) || (count_ranks_in_channel(info, 0) == 2)) ? 2 : 3; /* 2 if 1 or 2 ranks */
info->mode4030[1] = ((count_ranks_in_channel(info, 1) == 1)
|| (count_ranks_in_channel(info, 1) ==
2)) ? 2 : 3;
}
for (channel = 0; channel < NUM_CHANNELS; channel++) {
int max_of_unk;
int min_of_unk_2;
int i, count;
int sum;
if (!info->populated_ranks_mask[channel])
continue;
max_of_unk = 0;
min_of_unk_2 = 32767;
sum = 0;
count = 0;
for (i = 0; i < 3; i++) {
int unk1;
if (info->revision < 8)
unk1 =
u8_FFFD1891[0][channel][info->
clock_speed_index]
[i];
else if (!
(info->revision >= 0x10
|| info->revision_flag_1))
unk1 =
u8_FFFD1891[1][channel][info->
clock_speed_index]
[i];
else
unk1 = 0;
for (slot = 0; slot < NUM_SLOTS; slot++)
for (rank = 0; rank < NUM_RANKS; rank++) {
int a = 0;
int b = 0;
if (!info->
populated_ranks[channel][slot]
[rank])
continue;
if (extended_silicon_revision == 4
&& (info->
populated_ranks_mask[channel] &
5) != 5) {
if ((info->
spd[channel][slot]
[REFERENCE_RAW_CARD_USED] &
0x1F) == 3) {
a = u16_ffd1178[0]
[info->
clock_speed_index];
b = u16_fe0eb8[0][info->
clock_speed_index];
} else
if ((info->
spd[channel][slot]
[REFERENCE_RAW_CARD_USED]
& 0x1F) == 5) {
a = u16_ffd1178[1]
[info->
clock_speed_index];
b = u16_fe0eb8[1][info->
clock_speed_index];
}
}
min_of_unk_2 = min(min_of_unk_2, a);
min_of_unk_2 = min(min_of_unk_2, b);
if (rank == 0) {
sum += a;
count++;
}
{
int t;
t = b +
u8_FFFD0EF8[channel]
[extended_silicon_revision]
[info->
mode4030[channel]][info->
clock_speed_index];
if (unk1 >= t)
max_of_unk =
max(max_of_unk,
unk1 - t);
}
}
{
int t =
u8_FFFD17E0[channel]
[extended_silicon_revision][info->
mode4030
[channel]]
[info->clock_speed_index] + min_of_unk_2;
if (unk1 >= t)
max_of_unk = max(max_of_unk, unk1 - t);
}
}
info->avg4044[channel] = sum / count;
info->max4048[channel] = max_of_unk;
}
}
static void jedec_read(struct raminfo *info,
int channel, int slot, int rank,
int total_rank, u8 addr3, unsigned int value)
{
/* Handle mirrored mapping. */
if ((rank & 1) && (info->spd[channel][slot][RANK1_ADDRESS_MAPPING] & 1))
addr3 =
(addr3 & 0xCF) | ((addr3 & 0x10) << 1) | ((addr3 >> 1) &
0x10);
write_mchbar8(0x271, addr3 | (read_mchbar8(0x271) & 0xC1));
write_mchbar8(0x671, addr3 | (read_mchbar8(0x671) & 0xC1));
/* Handle mirrored mapping. */
if ((rank & 1) && (info->spd[channel][slot][RANK1_ADDRESS_MAPPING] & 1))
value =
(value & ~0x1f8) | ((value >> 1) & 0xa8) | ((value & 0xa8)
<< 1);
read32p((value << 3) | (total_rank << 28));
write_mchbar8(0x271, (read_mchbar8(0x271) & 0xC3) | 2);
write_mchbar8(0x671, (read_mchbar8(0x671) & 0xC3) | 2);
read32p(total_rank << 28);
}
enum {
MR1_RZQ12 = 512,
MR1_RZQ2 = 64,
MR1_RZQ4 = 4,
MR1_ODS34OHM = 2
};
enum {
MR0_BT_INTERLEAVED = 8,
MR0_DLL_RESET_ON = 256
};
enum {
MR2_RTT_WR_DISABLED = 0,
MR2_RZQ2 = 1 << 10
};
static void jedec_init(struct raminfo *info)
{
int write_recovery;
int channel, slot, rank;
int total_rank;
int dll_on;
int self_refresh_temperature;
int auto_self_refresh;
auto_self_refresh = 1;
self_refresh_temperature = 1;
if (info->board_lane_delay[3] <= 10) {
if (info->board_lane_delay[3] <= 8)
write_recovery = info->board_lane_delay[3] - 4;
else
write_recovery = 5;
} else {
write_recovery = 6;
}
FOR_POPULATED_RANKS {
auto_self_refresh &=
(info->spd[channel][slot][THERMAL_AND_REFRESH] >> 2) & 1;
self_refresh_temperature &=
info->spd[channel][slot][THERMAL_AND_REFRESH] & 1;
}
if (auto_self_refresh == 1)
self_refresh_temperature = 0;
dll_on = ((info->silicon_revision != 2 && info->silicon_revision != 3)
|| (info->populated_ranks[0][0][0]
&& info->populated_ranks[0][1][0])
|| (info->populated_ranks[1][0][0]
&& info->populated_ranks[1][1][0]));
total_rank = 0;
for (channel = NUM_CHANNELS - 1; channel >= 0; channel--) {
int rtt, rtt_wr = MR2_RTT_WR_DISABLED;
int rzq_reg58e;
if (info->silicon_revision == 2 || info->silicon_revision == 3) {
rzq_reg58e = 64;
rtt = MR1_RZQ2;
if (info->clock_speed_index != 0) {
rzq_reg58e = 4;
if (info->populated_ranks_mask[channel] == 3)
rtt = MR1_RZQ4;
}
} else {
if ((info->populated_ranks_mask[channel] & 5) == 5) {
rtt = MR1_RZQ12;
rzq_reg58e = 64;
rtt_wr = MR2_RZQ2;
} else {
rzq_reg58e = 4;
rtt = MR1_RZQ4;
}
}
write_mchbar16(0x588 + (channel << 10), 0x0);
write_mchbar16(0x58a + (channel << 10), 0x4);
write_mchbar16(0x58c + (channel << 10), rtt | MR1_ODS34OHM);
write_mchbar16(0x58e + (channel << 10), rzq_reg58e | 0x82);
write_mchbar16(0x590 + (channel << 10), 0x1282);
for (slot = 0; slot < NUM_SLOTS; slot++)
for (rank = 0; rank < NUM_RANKS; rank++)
if (info->populated_ranks[channel][slot][rank]) {
jedec_read(info, channel, slot, rank,
total_rank, 0x28,
rtt_wr | (info->
clock_speed_index
<< 3)
| (auto_self_refresh << 6) |
(self_refresh_temperature <<
7));
jedec_read(info, channel, slot, rank,
total_rank, 0x38, 0);
jedec_read(info, channel, slot, rank,
total_rank, 0x18,
rtt | MR1_ODS34OHM);
jedec_read(info, channel, slot, rank,
total_rank, 6,
(dll_on << 12) |
(write_recovery << 9)
| ((info->cas_latency - 4) <<
4) | MR0_BT_INTERLEAVED |
MR0_DLL_RESET_ON);
total_rank++;
}
}
}
static void program_modules_memory_map(struct raminfo *info, int pre_jedec)
{
unsigned channel, slot, rank;
unsigned int total_mb[2] = { 0, 0 }; /* total memory per channel in MB */
unsigned int channel_0_non_interleaved;
FOR_ALL_RANKS {
if (info->populated_ranks[channel][slot][rank]) {
total_mb[channel] +=
pre_jedec ? 256 : (256 << info->
density[channel][slot] >> info->
is_x16_module[channel][slot]);
write_mchbar8(0x208 + rank + 2 * slot + (channel << 10),
(pre_jedec ? (1 | ((1 + 1) << 1))
: (info->
is_x16_module[channel][slot] |
((info->density[channel][slot] +
1) << 1))) | 0x80);
}
write_mchbar16(0x200 + (channel << 10) + 4 * slot + 2 * rank,
total_mb[channel] >> 6);
}
info->total_memory_mb = total_mb[0] + total_mb[1];
info->interleaved_part_mb =
pre_jedec ? 0 : 2 * min(total_mb[0], total_mb[1]);
info->non_interleaved_part_mb =
total_mb[0] + total_mb[1] - info->interleaved_part_mb;
channel_0_non_interleaved = total_mb[0] - info->interleaved_part_mb / 2;
write_mchbar32(0x100,
channel_0_non_interleaved | (info->
non_interleaved_part_mb <<
16));
if (!pre_jedec)
write_mchbar16(0x104, info->interleaved_part_mb);
}
static void program_board_delay(struct raminfo *info)
{
int cas_latency_shift;
int some_delay_ns;
int some_delay_3_half_cycles;
unsigned channel, i;
int high_multiplier;
int lane_3_delay;
int cas_latency_derived;
high_multiplier = 0;
some_delay_ns = 200;
some_delay_3_half_cycles = 4;
cas_latency_shift = info->silicon_revision == 0
|| info->silicon_revision == 1 ? 1 : 0;
if (info->revision < 8) {
some_delay_ns = 600;
cas_latency_shift = 0;
}
{
int speed_bit;
speed_bit =
((info->clock_speed_index > 1
|| (info->silicon_revision != 2
&& info->silicon_revision != 3))) ^ (info->revision >=
0x10);
write_500(info, 0, speed_bit | ((!info->use_ecc) << 1), 0x60e,
3, 1);
write_500(info, 1, speed_bit | ((!info->use_ecc) << 1), 0x60e,
3, 1);
if (info->revision >= 0x10 && info->clock_speed_index <= 1
&& (info->silicon_revision == 2
|| info->silicon_revision == 3))
rmw_1d0(0x116, 5, 2, 4, 1);
}
write_mchbar32(0x120,
(1 << (info->max_slots_used_in_channel + 28)) |
0x188e7f9f);
write_mchbar8(0x124,
info->board_lane_delay[4] +
((frequency_01(info) + 999) / 1000));
write_mchbar16(0x125, 0x1360);
write_mchbar8(0x127, 0x40);
if (info->fsb_frequency < frequency_11(info) / 2) {
unsigned some_delay_2_half_cycles;
high_multiplier = 1;
some_delay_2_half_cycles = ps_to_halfcycles(info,
((3 *
fsbcycle_ps(info))
>> 1) +
(halfcycle_ps(info)
*
reg178_min[info->
clock_speed_index]
>> 6)
+
4 *
halfcycle_ps(info)
+ 2230);
some_delay_3_half_cycles =
min((some_delay_2_half_cycles +
(frequency_11(info) * 2) * (28 -
some_delay_2_half_cycles) /
(frequency_11(info) * 2 -
4 * (info->fsb_frequency))) >> 3, 7);
}
if (read_mchbar8(0x2ca9) & 1)
some_delay_3_half_cycles = 3;
for (channel = 0; channel < NUM_CHANNELS; channel++) {
write_mchbar32(0x220 + (channel << 10),
read_mchbar32(0x220 +
(channel << 10)) | 0x18001117);
write_mchbar32(0x224 + (channel << 10),
(info->max_slots_used_in_channel - 1)
|
((info->cas_latency - 5 -
info->clock_speed_index) << 21)
|
((info->max_slots_used_in_channel +
info->cas_latency - cas_latency_shift -
4) << 16)
| ((info->cas_latency - cas_latency_shift - 4) <<
26)
|
((info->cas_latency - info->clock_speed_index +
info->max_slots_used_in_channel - 6) << 8));
write_mchbar32(0x228 + (channel << 10),
info->max_slots_used_in_channel);
write_mchbar8(0x239 + (channel << 10), 32);
write_mchbar32(0x248 + (channel << 10),
(high_multiplier << 24) |
(some_delay_3_half_cycles << 25) | 0x840000);
write_mchbar32(0x278 + (channel << 10), 0xc362042);
write_mchbar32(0x27c + (channel << 10), 0x8b000062);
write_mchbar32(0x24c + (channel << 10),
((! !info->
clock_speed_index) << 17) | (((2 +
info->
clock_speed_index
-
(! !info->
clock_speed_index)))
<< 12) | 0x10200);
write_mchbar8(0x267 + (channel << 10), 0x4);
write_mchbar16(0x272 + (channel << 10), 0x155);
write_mchbar32(0x2bc + (channel << 10),
(read_mchbar32(0x2bc + (channel << 10)) &
0xFF000000)
| 0x707070);
write_500(info, channel,
((!info->populated_ranks[channel][1][1])
| (!info->populated_ranks[channel][1][0] << 1)
| (!info->populated_ranks[channel][0][1] << 2)
| (!info->populated_ranks[channel][0][0] << 3)),
0x4c9, 4, 1);
}
write_mchbar8(0x2c4, ((1 + (info->clock_speed_index != 0)) << 6) | 0xC);
{
u8 freq_divisor = 2;
if (info->fsb_frequency == frequency_11(info))
freq_divisor = 3;
else if (2 * info->fsb_frequency < 3 * (frequency_11(info) / 2))
freq_divisor = 1;
else
freq_divisor = 2;
write_mchbar32(0x2c0, (freq_divisor << 11) | 0x6009c400);
}
if (info->board_lane_delay[3] <= 10) {
if (info->board_lane_delay[3] <= 8)
lane_3_delay = info->board_lane_delay[3];
else
lane_3_delay = 10;
} else {
lane_3_delay = 12;
}
cas_latency_derived = info->cas_latency - info->clock_speed_index + 2;
if (info->clock_speed_index > 1)
cas_latency_derived++;
for (channel = 0; channel < NUM_CHANNELS; channel++) {
write_mchbar32(0x240 + (channel << 10),
((info->clock_speed_index ==
0) * 0x11000) | 0x1002100 | ((2 +
info->
clock_speed_index)
<< 4) | (info->
cas_latency
- 3));
write_500(info, channel, (info->clock_speed_index << 1) | 1,
0x609, 6, 1);
write_500(info, channel,
info->clock_speed_index + 2 * info->cas_latency - 7,
0x601, 6, 1);
write_mchbar32(0x250 + (channel << 10),
((lane_3_delay + info->clock_speed_index +
9) << 6)
| (info->board_lane_delay[7] << 2) | (info->
board_lane_delay
[4] << 16)
| (info->board_lane_delay[1] << 25) | (info->
board_lane_delay
[1] << 29)
| 1);
write_mchbar32(0x254 + (channel << 10),
(info->
board_lane_delay[1] >> 3) | ((info->
board_lane_delay
[8] +
4 *
info->
use_ecc) << 6) |
0x80 | (info->board_lane_delay[6] << 1) | (info->
board_lane_delay
[2] <<
28) |
(cas_latency_derived << 16) | 0x4700000);
write_mchbar32(0x258 + (channel << 10),
((info->board_lane_delay[5] +
info->clock_speed_index +
9) << 12) | ((info->clock_speed_index -
info->cas_latency + 12) << 8)
| (info->board_lane_delay[2] << 17) | (info->
board_lane_delay
[4] << 24)
| 0x47);
write_mchbar32(0x25c + (channel << 10),
(info->board_lane_delay[1] << 1) | (info->
board_lane_delay
[0] << 8) |
0x1da50000);
write_mchbar8(0x264 + (channel << 10), 0xff);
write_mchbar8(0x5f8 + (channel << 10),
(cas_latency_shift << 3) | info->use_ecc);
}
program_modules_memory_map(info, 1);
write_mchbar16(0x610,
(min(ns_to_cycles(info, some_delay_ns) / 2, 127) << 9)
| (read_mchbar16(0x610) & 0x1C3) | 0x3C);
write_mchbar16(0x612, read_mchbar16(0x612) | 0x100);
write_mchbar16(0x214, read_mchbar16(0x214) | 0x3E00);
for (i = 0; i < 8; i++) {
pcie_write_config32(PCI_DEV (QUICKPATH_BUS, 0, 1), 0x80 + 4 * i,
(info->total_memory_mb - 64) | !i | 2);
pcie_write_config32(PCI_DEV (QUICKPATH_BUS, 0, 1), 0xc0 + 4 * i, 0);
}
}
#define BETTER_MEMORY_MAP 0
static void program_total_memory_map(struct raminfo *info)
{
unsigned int TOM, TOLUD, TOUUD;
unsigned int quickpath_reserved;
unsigned int REMAPbase;
unsigned int uma_base_igd;
unsigned int uma_base_gtt;
int memory_remap;
unsigned int memory_map[8];
int i;
unsigned int current_limit;
unsigned int tseg_base;
int uma_size_igd = 0, uma_size_gtt = 0;
memset(memory_map, 0, sizeof(memory_map));
#if REAL
if (info->uma_enabled) {
u16 t = pcie_read_config16(NORTHBRIDGE, D0F0_GGC);
gav(t);
const int uma_sizes_gtt[16] =
{ 0, 1, 0, 2, 0, 0, 0, 0, 0, 2, 3, 4, 42, 42, 42, 42 };
/* Igd memory */
const int uma_sizes_igd[16] = {
0, 0, 0, 0, 0, 32, 48, 64, 128, 256, 96, 160, 224, 352,
256, 512
};
uma_size_igd = uma_sizes_igd[(t >> 4) & 0xF];
uma_size_gtt = uma_sizes_gtt[(t >> 8) & 0xF];
}
#endif
TOM = info->total_memory_mb;
if (TOM == 4096)
TOM = 4032;
TOUUD = ALIGN_DOWN(TOM - info->memory_reserved_for_heci_mb, 64);
TOLUD = ALIGN_DOWN(min(3072 + ALIGN_UP(uma_size_igd + uma_size_gtt, 64)
, TOUUD), 64);
memory_remap = 0;
if (TOUUD - TOLUD > 64) {
memory_remap = 1;
REMAPbase = max(4096, TOUUD);
TOUUD = TOUUD - TOLUD + 4096;
}
if (TOUUD > 4096)
memory_map[2] = TOUUD | 1;
quickpath_reserved = 0;
{
u32 t;
gav(t = pcie_read_config32(PCI_DEV(QUICKPATH_BUS, 0, 1), 0x68));
if (t & 0x800)
quickpath_reserved =
(1 << find_lowest_bit_set32(t >> 20));
}
if (memory_remap)
TOUUD -= quickpath_reserved;
#if !REAL
if (info->uma_enabled) {
u16 t = pcie_read_config16(NORTHBRIDGE, D0F0_GGC);
gav(t);
const int uma_sizes_gtt[16] =
{ 0, 1, 0, 2, 0, 0, 0, 0, 0, 2, 3, 4, 42, 42, 42, 42 };
/* Igd memory */
const int uma_sizes_igd[16] = {
0, 0, 0, 0, 0, 32, 48, 64, 128, 256, 96, 160, 224, 352,
256, 512
};
uma_size_igd = uma_sizes_igd[(t >> 4) & 0xF];
uma_size_gtt = uma_sizes_gtt[(t >> 8) & 0xF];
}
#endif
uma_base_igd = TOLUD - uma_size_igd;
uma_base_gtt = uma_base_igd - uma_size_gtt;
tseg_base = ALIGN_DOWN(uma_base_gtt, 64) - (CONFIG_SMM_TSEG_SIZE >> 20);
if (!memory_remap)
tseg_base -= quickpath_reserved;
tseg_base = ALIGN_DOWN(tseg_base, 8);
pcie_write_config16(NORTHBRIDGE, D0F0_TOLUD, TOLUD << 4);
pcie_write_config16(NORTHBRIDGE, D0F0_TOM, TOM >> 6);
if (memory_remap) {
pcie_write_config16(NORTHBRIDGE, D0F0_REMAPBASE, REMAPbase >> 6);
pcie_write_config16(NORTHBRIDGE, D0F0_REMAPLIMIT, (TOUUD - 64) >> 6);
}
pcie_write_config16(NORTHBRIDGE, D0F0_TOUUD, TOUUD);
if (info->uma_enabled) {
pcie_write_config32(NORTHBRIDGE, D0F0_IGD_BASE, uma_base_igd << 20);
pcie_write_config32(NORTHBRIDGE, D0F0_GTT_BASE, uma_base_gtt << 20);
}
pcie_write_config32(NORTHBRIDGE, TSEG, tseg_base << 20);
current_limit = 0;
memory_map[0] = ALIGN_DOWN(uma_base_gtt, 64) | 1;
memory_map[1] = 4096;
for (i = 0; i < ARRAY_SIZE(memory_map); i++) {
current_limit = max(current_limit, memory_map[i] & ~1);
pcie_write_config32(PCI_DEV(QUICKPATH_BUS, 0, 1), 4 * i + 0x80,
(memory_map[i] & 1) | ALIGN_DOWN(current_limit -
1, 64) | 2);
pcie_write_config32(PCI_DEV(QUICKPATH_BUS, 0, 1), 4 * i + 0xc0, 0);
}
}
static void collect_system_info(struct raminfo *info)
{
u32 capid0[3];
int i;
unsigned channel;
/* Wait for some bit, maybe TXT clear. */
while (!(read8((u8 *)0xfed40000) & (1 << 7))) ;
if (!info->heci_bar)
gav(info->heci_bar =
pcie_read_config32(HECIDEV, HECIBAR) & 0xFFFFFFF8);
if (!info->memory_reserved_for_heci_mb) {
/* Wait for ME to be ready */
intel_early_me_init();
info->memory_reserved_for_heci_mb = intel_early_me_uma_size();
}
for (i = 0; i < 3; i++)
gav(capid0[i] =
pcie_read_config32(NORTHBRIDGE, D0F0_CAPID0 | (i << 2)));
gav(info->revision = pcie_read_config8(NORTHBRIDGE, PCI_REVISION_ID));
info->max_supported_clock_speed_index = (~capid0[1] & 7);
if ((capid0[1] >> 11) & 1)
info->uma_enabled = 0;
else
gav(info->uma_enabled =
pcie_read_config8(NORTHBRIDGE, D0F0_DEVEN) & 8);
/* Unrecognised: [0000:fffd3d2d] 37f81.37f82 ! CPUID: eax: 00000001; ecx: 00000e00 => 00020655.00010800.029ae3ff.bfebfbff */
info->silicon_revision = 0;
if (capid0[2] & 2) {
info->silicon_revision = 0;
info->max_supported_clock_speed_index = 2;
for (channel = 0; channel < NUM_CHANNELS; channel++)
if (info->populated_ranks[channel][0][0]
&& (info->spd[channel][0][MODULE_TYPE] & 0xf) ==
3) {
info->silicon_revision = 2;
info->max_supported_clock_speed_index = 1;
}
} else {
switch (((capid0[2] >> 18) & 1) + 2 * ((capid0[1] >> 3) & 1)) {
case 1:
case 2:
info->silicon_revision = 3;
break;
case 3:
info->silicon_revision = 0;
break;
case 0:
info->silicon_revision = 2;
break;
}
switch (pcie_read_config16(NORTHBRIDGE, PCI_DEVICE_ID)) {
case 0x40:
info->silicon_revision = 0;
break;
case 0x48:
info->silicon_revision = 1;
break;
}
}
}
static void write_training_data(struct raminfo *info)
{
int tm, channel, slot, rank, lane;
if (info->revision < 8)
return;
for (tm = 0; tm < 4; tm++)
for (channel = 0; channel < NUM_CHANNELS; channel++)
for (slot = 0; slot < NUM_SLOTS; slot++)
for (rank = 0; rank < NUM_RANKS; rank++)
for (lane = 0; lane < 9; lane++)
write_500(info, channel,
info->
cached_training->
lane_timings[tm]
[channel][slot][rank]
[lane],
get_timing_register_addr
(lane, tm, slot,
rank), 9, 0);
write_1d0(info->cached_training->reg_178, 0x178, 7, 1);
write_1d0(info->cached_training->reg_10b, 0x10b, 6, 1);
}
static void dump_timings(struct raminfo *info)
{
#if REAL
int channel, slot, rank, lane, i;
printk(BIOS_DEBUG, "Timings:\n");
FOR_POPULATED_RANKS {
printk(BIOS_DEBUG, "channel %d, slot %d, rank %d\n", channel,
slot, rank);
for (lane = 0; lane < 9; lane++) {
printk(BIOS_DEBUG, "lane %d: ", lane);
for (i = 0; i < 4; i++) {
printk(BIOS_DEBUG, "%x (%x) ",
read_500(info, channel,
get_timing_register_addr
(lane, i, slot, rank),
9),
info->training.
lane_timings[i][channel][slot][rank]
[lane]);
}
printk(BIOS_DEBUG, "\n");
}
}
printk(BIOS_DEBUG, "[178] = %x (%x)\n", read_1d0(0x178, 7),
info->training.reg_178);
printk(BIOS_DEBUG, "[10b] = %x (%x)\n", read_1d0(0x10b, 6),
info->training.reg_10b);
#endif
}
/* Read timings and other registers that need to be restored verbatim and
put them to CBMEM.
*/
static void save_timings(struct raminfo *info)
{
struct ram_training train;
struct mrc_data_container *mrcdata;
int output_len = ALIGN(sizeof(train), 16);
int channel, slot, rank, lane, i;
train = info->training;
FOR_POPULATED_RANKS for (lane = 0; lane < 9; lane++)
for (i = 0; i < 4; i++)
train.lane_timings[i][channel][slot][rank][lane] =
read_500(info, channel,
get_timing_register_addr(lane, i, slot,
rank), 9);
train.reg_178 = read_1d0(0x178, 7);
train.reg_10b = read_1d0(0x10b, 6);
for (channel = 0; channel < NUM_CHANNELS; channel++) {
u32 reg32;
reg32 = read_mchbar32 ((channel << 10) + 0x274);
train.reg274265[channel][0] = reg32 >> 16;
train.reg274265[channel][1] = reg32 & 0xffff;
train.reg274265[channel][2] = read_mchbar16 ((channel << 10) + 0x265) >> 8;
}
train.reg2ca9_bit0 = read_mchbar8(0x2ca9) & 1;
train.reg_6dc = read_mchbar32 (0x6dc);
train.reg_6e8 = read_mchbar32 (0x6e8);
printk (BIOS_SPEW, "[6dc] = %x\n", train.reg_6dc);
printk (BIOS_SPEW, "[6e8] = %x\n", train.reg_6e8);
/* Save the MRC S3 restore data to cbmem */
mrcdata = cbmem_add
(CBMEM_ID_MRCDATA, output_len + sizeof(struct mrc_data_container));
if (mrcdata != NULL) {
printk(BIOS_DEBUG, "Relocate MRC DATA from %p to %p (%u bytes)\n",
&train, mrcdata, output_len);
mrcdata->mrc_signature = MRC_DATA_SIGNATURE;
mrcdata->mrc_data_size = output_len;
mrcdata->reserved = 0;
memcpy(mrcdata->mrc_data, &train, sizeof(train));
/* Zero the unused space in aligned buffer. */
if (output_len > sizeof(train))
memset(mrcdata->mrc_data + sizeof(train), 0,
output_len - sizeof(train));
mrcdata->mrc_checksum = compute_ip_checksum(mrcdata->mrc_data,
mrcdata->mrc_data_size);
}
}
#if REAL
static const struct ram_training *get_cached_training(void)
{
struct mrc_data_container *cont;
cont = find_current_mrc_cache();
if (!cont)
return 0;
return (void *)cont->mrc_data;
}
#endif
/* FIXME: add timeout. */
static void wait_heci_ready(void)
{
while (!(read32(DEFAULT_HECIBAR + 0xc) & 8)) ; // = 0x8000000c
write32((DEFAULT_HECIBAR + 0x4),
(read32(DEFAULT_HECIBAR + 0x4) & ~0x10) | 0xc);
}
/* FIXME: add timeout. */
static void wait_heci_cb_avail(int len)
{
union {
struct mei_csr csr;
u32 raw;
} csr;
while (!(read32(DEFAULT_HECIBAR + 0xc) & 8)) ;
do
csr.raw = read32(DEFAULT_HECIBAR + 0x4);
while (len >
csr.csr.buffer_depth - (csr.csr.buffer_write_ptr -
csr.csr.buffer_read_ptr));
}
static void send_heci_packet(struct mei_header *head, u32 * payload)
{
int len = (head->length + 3) / 4;
int i;
wait_heci_cb_avail(len + 1);
/* FIXME: handle leftovers correctly. */
write32(DEFAULT_HECIBAR + 0, *(u32 *) head);
for (i = 0; i < len - 1; i++)
write32(DEFAULT_HECIBAR + 0, payload[i]);
write32(DEFAULT_HECIBAR + 0, payload[i] & ((1 << (8 * len)) - 1));
write32(DEFAULT_HECIBAR + 0x4, read32(DEFAULT_HECIBAR + 0x4) | 0x4);
}
static void
send_heci_message(u8 * msg, int len, u8 hostaddress, u8 clientaddress)
{
struct mei_header head;
int maxlen;
wait_heci_ready();
maxlen = (read32(DEFAULT_HECIBAR + 0x4) >> 24) * 4 - 4;
while (len) {
int cur = len;
if (cur > maxlen) {
cur = maxlen;
head.is_complete = 0;
} else
head.is_complete = 1;
head.length = cur;
head.reserved = 0;
head.client_address = clientaddress;
head.host_address = hostaddress;
send_heci_packet(&head, (u32 *) msg);
len -= cur;
msg += cur;
}
}
/* FIXME: Add timeout. */
static int
recv_heci_packet(struct raminfo *info, struct mei_header *head, u32 * packet,
u32 * packet_size)
{
union {
struct mei_csr csr;
u32 raw;
} csr;
int i = 0;
write32(DEFAULT_HECIBAR + 0x4, read32(DEFAULT_HECIBAR + 0x4) | 2);
do {
csr.raw = read32(DEFAULT_HECIBAR + 0xc);
#if !REAL
if (i++ > 346)
return -1;
#endif
}
while (csr.csr.buffer_write_ptr == csr.csr.buffer_read_ptr);
*(u32 *) head = read32(DEFAULT_HECIBAR + 0x8);
if (!head->length) {
write32(DEFAULT_HECIBAR + 0x4,
read32(DEFAULT_HECIBAR + 0x4) | 2);
*packet_size = 0;
return 0;
}
if (head->length + 4 > 4 * csr.csr.buffer_depth
|| head->length > *packet_size) {
*packet_size = 0;
return -1;
}
do
csr.raw = read32(DEFAULT_HECIBAR + 0xc);
while ((head->length + 3) >> 2 >
csr.csr.buffer_write_ptr - csr.csr.buffer_read_ptr);
for (i = 0; i < (head->length + 3) >> 2; i++)
packet[i++] = read32(DEFAULT_HECIBAR + 0x8);
*packet_size = head->length;
if (!csr.csr.ready)
*packet_size = 0;
write32(DEFAULT_HECIBAR + 0x4, read32(DEFAULT_HECIBAR + 0x4) | 4);
return 0;
}
/* FIXME: Add timeout. */
static int
recv_heci_message(struct raminfo *info, u32 * message, u32 * message_size)
{
struct mei_header head;
int current_position;
current_position = 0;
while (1) {
u32 current_size;
current_size = *message_size - current_position;
if (recv_heci_packet
(info, &head, message + (current_position >> 2),
&current_size) == -1)
break;
if (!current_size)
break;
current_position += current_size;
if (head.is_complete) {
*message_size = current_position;
return 0;
}
if (current_position >= *message_size)
break;
}
*message_size = 0;
return -1;
}
static void send_heci_uma_message(struct raminfo *info)
{
struct uma_reply {
u8 group_id;
u8 command;
u8 reserved;
u8 result;
u8 field2;
u8 unk3[0x48 - 4 - 1];
} __attribute__ ((packed)) reply;
struct uma_message {
u8 group_id;
u8 cmd;
u8 reserved;
u8 result;
u32 c2;
u64 heci_uma_addr;
u32 memory_reserved_for_heci_mb;
u16 c3;
} __attribute__ ((packed)) msg = {
0, MKHI_SET_UMA, 0, 0,
0x82,
info->heci_uma_addr, info->memory_reserved_for_heci_mb, 0};
u32 reply_size;
send_heci_message((u8 *) & msg, sizeof(msg), 0, 7);
reply_size = sizeof(reply);
if (recv_heci_message(info, (u32 *) & reply, &reply_size) == -1)
return;
if (reply.command != (MKHI_SET_UMA | (1 << 7)))
die("HECI init failed\n");
}
static void setup_heci_uma(struct raminfo *info)
{
u32 reg44;
reg44 = pcie_read_config32(HECIDEV, 0x44); // = 0x80010020
info->memory_reserved_for_heci_mb = 0;
info->heci_uma_addr = 0;
if (!((reg44 & 0x10000) && !(pcie_read_config32(HECIDEV, 0x40) & 0x20)))
return;
info->heci_bar = pcie_read_config32(HECIDEV, 0x10) & 0xFFFFFFF0;
info->memory_reserved_for_heci_mb = reg44 & 0x3f;
info->heci_uma_addr =
((u64)
((((u64) pcie_read_config16(NORTHBRIDGE, D0F0_TOM)) << 6) -
info->memory_reserved_for_heci_mb)) << 20;
pcie_read_config32(NORTHBRIDGE, DMIBAR);
if (info->memory_reserved_for_heci_mb) {
write32(DEFAULT_DMIBAR + 0x14,
read32(DEFAULT_DMIBAR + 0x14) & ~0x80);
write32(DEFAULT_RCBA + 0x14,
read32(DEFAULT_RCBA + 0x14) & ~0x80);
write32(DEFAULT_DMIBAR + 0x20,
read32(DEFAULT_DMIBAR + 0x20) & ~0x80);
write32(DEFAULT_RCBA + 0x20,
read32(DEFAULT_RCBA + 0x20) & ~0x80);
write32(DEFAULT_DMIBAR + 0x2c,
read32(DEFAULT_DMIBAR + 0x2c) & ~0x80);
write32(DEFAULT_RCBA + 0x30,
read32(DEFAULT_RCBA + 0x30) & ~0x80);
write32(DEFAULT_DMIBAR + 0x38,
read32(DEFAULT_DMIBAR + 0x38) & ~0x80);
write32(DEFAULT_RCBA + 0x40,
read32(DEFAULT_RCBA + 0x40) & ~0x80);
write32(DEFAULT_RCBA + 0x40, 0x87000080); // OK
write32(DEFAULT_DMIBAR + 0x38, 0x87000080); // OK
while (read16(DEFAULT_RCBA + 0x46) & 2
&& read16(DEFAULT_DMIBAR + 0x3e) & 2) ;
}
write_mchbar32(0x24, 0x10000 + info->memory_reserved_for_heci_mb);
send_heci_uma_message(info);
pcie_write_config32(HECIDEV, 0x10, 0x0);
pcie_write_config8(HECIDEV, 0x4, 0x0);
}
static int have_match_ranks(struct raminfo *info, int channel, int ranks)
{
int ranks_in_channel;
ranks_in_channel = info->populated_ranks[channel][0][0]
+ info->populated_ranks[channel][0][1]
+ info->populated_ranks[channel][1][0]
+ info->populated_ranks[channel][1][1];
/* empty channel */
if (ranks_in_channel == 0)
return 1;
if (ranks_in_channel != ranks)
return 0;
/* single slot */
if (info->populated_ranks[channel][0][0] !=
info->populated_ranks[channel][1][0])
return 1;
if (info->populated_ranks[channel][0][1] !=
info->populated_ranks[channel][1][1])
return 1;
if (info->is_x16_module[channel][0] != info->is_x16_module[channel][1])
return 0;
if (info->density[channel][0] != info->density[channel][1])
return 0;
return 1;
}
static void read_4090(struct raminfo *info)
{
int i, channel, slot, rank, lane;
for (i = 0; i < 2; i++)
for (slot = 0; slot < NUM_SLOTS; slot++)
for (rank = 0; rank < NUM_RANKS; rank++)
for (lane = 0; lane < 9; lane++)
info->training.
lane_timings[0][i][slot][rank][lane]
= 32;
for (i = 1; i < 4; i++)
for (channel = 0; channel < NUM_CHANNELS; channel++)
for (slot = 0; slot < NUM_SLOTS; slot++)
for (rank = 0; rank < NUM_RANKS; rank++)
for (lane = 0; lane < 9; lane++) {
info->training.
lane_timings[i][channel]
[slot][rank][lane] =
read_500(info, channel,
get_timing_register_addr
(lane, i, slot,
rank), 9)
+ (i == 1) * 11; // !!!!
}
}
static u32 get_etalon2(int flip, u32 addr)
{
const u16 invmask[] = {
0xaaaa, 0x6db6, 0x4924, 0xeeee, 0xcccc, 0x8888, 0x7bde, 0x739c,
0x6318, 0x4210, 0xefbe, 0xcf3c, 0x8e38, 0x0c30, 0x0820
};
u32 ret;
u32 comp4 = addr / 480;
addr %= 480;
u32 comp1 = addr & 0xf;
u32 comp2 = (addr >> 4) & 1;
u32 comp3 = addr >> 5;
if (comp4)
ret = 0x1010101 << (comp4 - 1);
else
ret = 0;
if (flip ^ (((invmask[comp3] >> comp1) ^ comp2) & 1))
ret = ~ret;
return ret;
}
static void disable_cache(void)
{
msr_t msr = {.lo = 0, .hi = 0 };
wrmsr(MTRR_PHYS_BASE(3), msr);
wrmsr(MTRR_PHYS_MASK(3), msr);
}
static void enable_cache(unsigned int base, unsigned int size)
{
msr_t msr;
msr.lo = base | MTRR_TYPE_WRPROT;
msr.hi = 0;
wrmsr(MTRR_PHYS_BASE(3), msr);
msr.lo = ((~(ALIGN_DOWN(size + 4096, 4096) - 1) | MTRR_DEF_TYPE_EN)
& 0xffffffff);
msr.hi = 0x0000000f;
wrmsr(MTRR_PHYS_MASK(3), msr);
}
static void flush_cache(u32 start, u32 size)
{
u32 end;
u32 addr;
end = start + (ALIGN_DOWN(size + 4096, 4096));
for (addr = start; addr < end; addr += 64)
clflush(addr);
}
static void clear_errors(void)
{
pcie_write_config8(NORTHBRIDGE, 0xc0, 0x01);
}
static void write_testing(struct raminfo *info, int totalrank, int flip)
{
int nwrites = 0;
/* in 8-byte units. */
u32 offset;
u8 *base;
base = (u8 *)(totalrank << 28);
for (offset = 0; offset < 9 * 480; offset += 2) {
write32(base + offset * 8, get_etalon2(flip, offset));
write32(base + offset * 8 + 4, get_etalon2(flip, offset));
write32(base + offset * 8 + 8, get_etalon2(flip, offset + 1));
write32(base + offset * 8 + 12, get_etalon2(flip, offset + 1));
nwrites += 4;
if (nwrites >= 320) {
clear_errors();
nwrites = 0;
}
}
}
static u8 check_testing(struct raminfo *info, u8 total_rank, int flip)
{
u8 failmask = 0;
int i;
int comp1, comp2, comp3;
u32 failxor[2] = { 0, 0 };
enable_cache((total_rank << 28), 1728 * 5 * 4);
for (comp3 = 0; comp3 < 9 && failmask != 0xff; comp3++) {
for (comp1 = 0; comp1 < 4; comp1++)
for (comp2 = 0; comp2 < 60; comp2++) {
u32 re[4];
u32 curroffset =
comp3 * 8 * 60 + 2 * comp1 + 8 * comp2;
read128((total_rank << 28) | (curroffset << 3),
(u64 *) re);
failxor[0] |=
get_etalon2(flip, curroffset) ^ re[0];
failxor[1] |=
get_etalon2(flip, curroffset) ^ re[1];
failxor[0] |=
get_etalon2(flip, curroffset | 1) ^ re[2];
failxor[1] |=
get_etalon2(flip, curroffset | 1) ^ re[3];
}
for (i = 0; i < 8; i++)
if ((0xff << (8 * (i % 4))) & failxor[i / 4])
failmask |= 1 << i;
}
disable_cache();
flush_cache((total_rank << 28), 1728 * 5 * 4);
return failmask;
}
const u32 seed1[0x18] = {
0x3a9d5ab5, 0x576cb65b, 0x555773b6, 0x2ab772ee,
0x555556ee, 0x3a9d5ab5, 0x576cb65b, 0x555773b6,
0x2ab772ee, 0x555556ee, 0x5155a555, 0x5155a555,
0x5155a555, 0x5155a555, 0x3a9d5ab5, 0x576cb65b,
0x555773b6, 0x2ab772ee, 0x555556ee, 0x55d6b4a5,
0x366d6b3a, 0x2ae5ddbb, 0x3b9ddbb7, 0x55d6b4a5,
};
static u32 get_seed2(int a, int b)
{
const u32 seed2[5] = {
0x55555555, 0x33333333, 0x2e555a55, 0x55555555,
0x5b6db6db,
};
u32 r;
r = seed2[(a + (a >= 10)) / 5];
return b ? ~r : r;
}
static int make_shift(int comp2, int comp5, int x)
{
const u8 seed3[32] = {
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x38, 0x1c, 0x3c, 0x18, 0x38, 0x38,
0x38, 0x38, 0x38, 0x38, 0x0f, 0x0f, 0x0f, 0x0f,
0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
};
return (comp2 - ((seed3[comp5] >> (x & 7)) & 1)) & 0x1f;
}
static u32 get_etalon(int flip, u32 addr)
{
u32 mask_byte = 0;
int comp1 = (addr >> 1) & 1;
int comp2 = (addr >> 3) & 0x1f;
int comp3 = (addr >> 8) & 0xf;
int comp4 = (addr >> 12) & 0xf;
int comp5 = (addr >> 16) & 0x1f;
u32 mask_bit = ~(0x10001 << comp3);
u32 part1;
u32 part2;
int byte;
part2 =
((seed1[comp5] >>
make_shift(comp2, comp5,
(comp3 >> 3) | (comp1 << 2) | 2)) & 1) ^ flip;
part1 =
((seed1[comp5] >>
make_shift(comp2, comp5,
(comp3 >> 3) | (comp1 << 2) | 0)) & 1) ^ flip;
for (byte = 0; byte < 4; byte++)
if ((get_seed2(comp5, comp4) >>
make_shift(comp2, comp5, (byte | (comp1 << 2)))) & 1)
mask_byte |= 0xff << (8 * byte);
return (mask_bit & mask_byte) | (part1 << comp3) | (part2 <<
(comp3 + 16));
}
static void
write_testing_type2(struct raminfo *info, u8 totalrank, u8 region, u8 block,
char flip)
{
int i;
for (i = 0; i < 2048; i++)
write32p((totalrank << 28) | (region << 25) | (block << 16) |
(i << 2), get_etalon(flip, (block << 16) | (i << 2)));
}
static u8
check_testing_type2(struct raminfo *info, u8 totalrank, u8 region, u8 block,
char flip)
{
u8 failmask = 0;
u32 failxor[2];
int i;
int comp1, comp2, comp3;
failxor[0] = 0;
failxor[1] = 0;
enable_cache(totalrank << 28, 134217728);
for (comp3 = 0; comp3 < 2 && failmask != 0xff; comp3++) {
for (comp1 = 0; comp1 < 16; comp1++)
for (comp2 = 0; comp2 < 64; comp2++) {
u32 addr =
(totalrank << 28) | (region << 25) | (block
<< 16)
| (comp3 << 12) | (comp2 << 6) | (comp1 <<
2);
failxor[comp1 & 1] |=
read32p(addr) ^ get_etalon(flip, addr);
}
for (i = 0; i < 8; i++)
if ((0xff << (8 * (i % 4))) & failxor[i / 4])
failmask |= 1 << i;
}
disable_cache();
flush_cache((totalrank << 28) | (region << 25) | (block << 16), 16384);
return failmask;
}
static int check_bounded(unsigned short *vals, u16 bound)
{
int i;
for (i = 0; i < 8; i++)
if (vals[i] < bound)
return 0;
return 1;
}
enum state {
BEFORE_USABLE = 0, AT_USABLE = 1, AT_MARGIN = 2, COMPLETE = 3
};
static int validate_state(enum state *in)
{
int i;
for (i = 0; i < 8; i++)
if (in[i] != COMPLETE)
return 0;
return 1;
}
static void
do_fsm(enum state *state, u16 * counter,
u8 fail_mask, int margin, int uplimit,
u8 * res_low, u8 * res_high, u8 val)
{
int lane;
for (lane = 0; lane < 8; lane++) {
int is_fail = (fail_mask >> lane) & 1;
switch (state[lane]) {
case BEFORE_USABLE:
if (!is_fail) {
counter[lane] = 1;
state[lane] = AT_USABLE;
break;
}
counter[lane] = 0;
state[lane] = BEFORE_USABLE;
break;
case AT_USABLE:
if (!is_fail) {
++counter[lane];
if (counter[lane] >= margin) {
state[lane] = AT_MARGIN;
res_low[lane] = val - margin + 1;
break;
}
state[lane] = 1;
break;
}
counter[lane] = 0;
state[lane] = BEFORE_USABLE;
break;
case AT_MARGIN:
if (is_fail) {
state[lane] = COMPLETE;
res_high[lane] = val - 1;
} else {
counter[lane]++;
state[lane] = AT_MARGIN;
if (val == uplimit) {
state[lane] = COMPLETE;
res_high[lane] = uplimit;
}
}
break;
case COMPLETE:
break;
}
}
}
static void
train_ram_at_178(struct raminfo *info, u8 channel, int slot, int rank,
u8 total_rank, u8 reg_178, int first_run, int niter,
timing_bounds_t * timings)
{
int lane;
enum state state[8];
u16 count[8];
u8 lower_usable[8];
u8 upper_usable[8];
unsigned short num_sucessfully_checked[8];
u8 secondary_total_rank;
u8 reg1b3;
if (info->populated_ranks_mask[1]) {
if (channel == 1)
secondary_total_rank =
info->populated_ranks[1][0][0] +
info->populated_ranks[1][0][1]
+ info->populated_ranks[1][1][0] +
info->populated_ranks[1][1][1];
else
secondary_total_rank = 0;
} else
secondary_total_rank = total_rank;
{
int i;
for (i = 0; i < 8; i++)
state[i] = BEFORE_USABLE;
}
if (!first_run) {
int is_all_ok = 1;
for (lane = 0; lane < 8; lane++)
if (timings[reg_178][channel][slot][rank][lane].
smallest ==
timings[reg_178][channel][slot][rank][lane].
largest) {
timings[reg_178][channel][slot][rank][lane].
smallest = 0;
timings[reg_178][channel][slot][rank][lane].
largest = 0;
is_all_ok = 0;
}
if (is_all_ok) {
int i;
for (i = 0; i < 8; i++)
state[i] = COMPLETE;
}
}
for (reg1b3 = 0; reg1b3 < 0x30 && !validate_state(state); reg1b3++) {
u8 failmask = 0;
write_1d0(reg1b3 ^ 32, 0x1b3, 6, 1);
write_1d0(reg1b3 ^ 32, 0x1a3, 6, 1);
failmask = check_testing(info, total_rank, 0);
write_mchbar32(0xfb0, read_mchbar32(0xfb0) | 0x00030000);
do_fsm(state, count, failmask, 5, 47, lower_usable,
upper_usable, reg1b3);
}
if (reg1b3) {
write_1d0(0, 0x1b3, 6, 1);
write_1d0(0, 0x1a3, 6, 1);
for (lane = 0; lane < 8; lane++) {
if (state[lane] == COMPLETE) {
timings[reg_178][channel][slot][rank][lane].
smallest =
lower_usable[lane] +
(info->training.
lane_timings[0][channel][slot][rank][lane]
& 0x3F) - 32;
timings[reg_178][channel][slot][rank][lane].
largest =
upper_usable[lane] +
(info->training.
lane_timings[0][channel][slot][rank][lane]
& 0x3F) - 32;
}
}
}
if (!first_run) {
for (lane = 0; lane < 8; lane++)
if (state[lane] == COMPLETE) {
write_500(info, channel,
timings[reg_178][channel][slot][rank]
[lane].smallest,
get_timing_register_addr(lane, 0,
slot, rank),
9, 1);
write_500(info, channel,
timings[reg_178][channel][slot][rank]
[lane].smallest +
info->training.
lane_timings[1][channel][slot][rank]
[lane]
-
info->training.
lane_timings[0][channel][slot][rank]
[lane], get_timing_register_addr(lane,
1,
slot,
rank),
9, 1);
num_sucessfully_checked[lane] = 0;
} else
num_sucessfully_checked[lane] = -1;
do {
u8 failmask = 0;
int i;
for (i = 0; i < niter; i++) {
if (failmask == 0xFF)
break;
failmask |=
check_testing_type2(info, total_rank, 2, i,
0);
failmask |=
check_testing_type2(info, total_rank, 3, i,
1);
}
write_mchbar32(0xfb0,
read_mchbar32(0xfb0) | 0x00030000);
for (lane = 0; lane < 8; lane++)
if (num_sucessfully_checked[lane] != 0xffff) {
if ((1 << lane) & failmask) {
if (timings[reg_178][channel]
[slot][rank][lane].
largest <=
timings[reg_178][channel]
[slot][rank][lane].smallest)
num_sucessfully_checked
[lane] = -1;
else {
num_sucessfully_checked
[lane] = 0;
timings[reg_178]
[channel][slot]
[rank][lane].
smallest++;
write_500(info, channel,
timings
[reg_178]
[channel]
[slot][rank]
[lane].
smallest,
get_timing_register_addr
(lane, 0,
slot, rank),
9, 1);
write_500(info, channel,
timings
[reg_178]
[channel]
[slot][rank]
[lane].
smallest +
info->
training.
lane_timings
[1][channel]
[slot][rank]
[lane]
-
info->
training.
lane_timings
[0][channel]
[slot][rank]
[lane],
get_timing_register_addr
(lane, 1,
slot, rank),
9, 1);
}
} else
num_sucessfully_checked[lane]++;
}
}
while (!check_bounded(num_sucessfully_checked, 2));
for (lane = 0; lane < 8; lane++)
if (state[lane] == COMPLETE) {
write_500(info, channel,
timings[reg_178][channel][slot][rank]
[lane].largest,
get_timing_register_addr(lane, 0,
slot, rank),
9, 1);
write_500(info, channel,
timings[reg_178][channel][slot][rank]
[lane].largest +
info->training.
lane_timings[1][channel][slot][rank]
[lane]
-
info->training.
lane_timings[0][channel][slot][rank]
[lane], get_timing_register_addr(lane,
1,
slot,
rank),
9, 1);
num_sucessfully_checked[lane] = 0;
} else
num_sucessfully_checked[lane] = -1;
do {
int failmask = 0;
int i;
for (i = 0; i < niter; i++) {
if (failmask == 0xFF)
break;
failmask |=
check_testing_type2(info, total_rank, 2, i,
0);
failmask |=
check_testing_type2(info, total_rank, 3, i,
1);
}
write_mchbar32(0xfb0,
read_mchbar32(0xfb0) | 0x00030000);
for (lane = 0; lane < 8; lane++) {
if (num_sucessfully_checked[lane] != 0xffff) {
if ((1 << lane) & failmask) {
if (timings[reg_178][channel]
[slot][rank][lane].
largest <=
timings[reg_178][channel]
[slot][rank][lane].
smallest) {
num_sucessfully_checked
[lane] = -1;
} else {
num_sucessfully_checked
[lane] = 0;
timings[reg_178]
[channel][slot]
[rank][lane].
largest--;
write_500(info, channel,
timings
[reg_178]
[channel]
[slot][rank]
[lane].
largest,
get_timing_register_addr
(lane, 0,
slot, rank),
9, 1);
write_500(info, channel,
timings
[reg_178]
[channel]
[slot][rank]
[lane].
largest +
info->
training.
lane_timings
[1][channel]
[slot][rank]
[lane]
-
info->
training.
lane_timings
[0][channel]
[slot][rank]
[lane],
get_timing_register_addr
(lane, 1,
slot, rank),
9, 1);
}
} else
num_sucessfully_checked[lane]++;
}
}
}
while (!check_bounded(num_sucessfully_checked, 3));
for (lane = 0; lane < 8; lane++) {
write_500(info, channel,
info->training.
lane_timings[0][channel][slot][rank][lane],
get_timing_register_addr(lane, 0, slot, rank),
9, 1);
write_500(info, channel,
info->training.
lane_timings[1][channel][slot][rank][lane],
get_timing_register_addr(lane, 1, slot, rank),
9, 1);
if (timings[reg_178][channel][slot][rank][lane].
largest <=
timings[reg_178][channel][slot][rank][lane].
smallest) {
timings[reg_178][channel][slot][rank][lane].
largest = 0;
timings[reg_178][channel][slot][rank][lane].
smallest = 0;
}
}
}
}
static void set_10b(struct raminfo *info, u8 val)
{
int channel;
int slot, rank;
int lane;
if (read_1d0(0x10b, 6) == val)
return;
write_1d0(val, 0x10b, 6, 1);
FOR_POPULATED_RANKS_BACKWARDS for (lane = 0; lane < 9; lane++) {
u16 reg_500;
reg_500 = read_500(info, channel,
get_timing_register_addr(lane, 0, slot,
rank), 9);
if (val == 1) {
if (lut16[info->clock_speed_index] <= reg_500)
reg_500 -= lut16[info->clock_speed_index];
else
reg_500 = 0;
} else {
reg_500 += lut16[info->clock_speed_index];
}
write_500(info, channel, reg_500,
get_timing_register_addr(lane, 0, slot, rank), 9, 1);
}
}
static void set_ecc(int onoff)
{
int channel;
for (channel = 0; channel < NUM_CHANNELS; channel++) {
u8 t;
t = read_mchbar8((channel << 10) + 0x5f8);
if (onoff)
t |= 1;
else
t &= ~1;
write_mchbar8((channel << 10) + 0x5f8, t);