nb/intel/x4x: Use SPI flash to cache raminit results

Stores information obtained from decoding dimms and receive enable
results for future use.

Depreciates using rtc nvram to store receive enable settings.

A notable change is that receive enable results are always reused, not
just on a resume from S3.

This requires cbmem to be initialized a bit earlier, right after the
raminit finished to be able to add the sysinfo struct to cbmem which
gets cached to the SPI flash in ramstage.

TESTED on Intel DG43GT with W25Q128.V. With 4 ddr2 dimms time in
raminit goes from 133,857ms (using i2c block read to fetch SPD) to
21,071ms for cached results.

Change-Id: I042dc5c52615d40781d9ef7ecd657ad0bf3ed08f
Signed-off-by: Arthur Heymans <arthur@aheymans.xyz>
Reviewed-on: https://review.coreboot.org/21677
Tested-by: build bot (Jenkins) <no-reply@coreboot.org>
Reviewed-by: Felix Held <felix-coreboot@felixheld.de>
diff --git a/src/northbridge/intel/x4x/Kconfig b/src/northbridge/intel/x4x/Kconfig
index 9239637..d9dbdc9 100644
--- a/src/northbridge/intel/x4x/Kconfig
+++ b/src/northbridge/intel/x4x/Kconfig
@@ -28,6 +28,7 @@
 	select RELOCATABLE_RAMSTAGE
 	select HAVE_LINEAR_FRAMEBUFFER if MAINBOARD_DO_NATIVE_VGA_INIT
 	select HAVE_VGA_TEXT_FRAMEBUFFER if MAINBOARD_DO_NATIVE_VGA_INIT
+	select CACHE_MRC_SETTINGS
 
 config CBFS_SIZE
 	hex
diff --git a/src/northbridge/intel/x4x/early_init.c b/src/northbridge/intel/x4x/early_init.c
index fbdbcb4..ab4864b 100644
--- a/src/northbridge/intel/x4x/early_init.c
+++ b/src/northbridge/intel/x4x/early_init.c
@@ -242,15 +242,6 @@
 
 static void x4x_prepare_resume(int s3resume)
 {
-	int cbmem_recovered;
-
-	cbmem_recovered = !cbmem_recovery(s3resume);
-	if (!cbmem_recovered && s3resume) {
-		/* Failed S3 resume, reset to come up cleanly */
-		outb(0x6, 0xcf9);
-		halt();
-	}
-
 	romstage_handoff_init(s3resume);
 }
 
diff --git a/src/northbridge/intel/x4x/raminit.c b/src/northbridge/intel/x4x/raminit.c
index 7bb5f2b..37120df 100644
--- a/src/northbridge/intel/x4x/raminit.c
+++ b/src/northbridge/intel/x4x/raminit.c
@@ -33,12 +33,51 @@
 #include <spd.h>
 #include <string.h>
 #include <device/dram/ddr2.h>
+#include <mrc_cache.h>
+
+#define MRC_CACHE_VERSION 0
 
 static inline int spd_read_byte(unsigned int device, unsigned int address)
 {
 	return smbus_read_byte(device, address);
 }
 
+static enum cb_err verify_spds(const u8 *spd_map,
+			const struct sysinfo *ctrl_cached)
+{
+	int i;
+	u8 raw_spd[256] = {};
+	u16 crc;
+
+	for (i = 0; i < TOTAL_DIMMS; i++) {
+		if (!(spd_map[i]))
+			continue;
+		int len = smbus_read_byte(spd_map[i], 0);
+		if (len < 0 && ctrl_cached->dimms[i].card_type
+				== RAW_CARD_UNPOPULATED)
+			continue;
+		if (len > 0 && ctrl_cached->dimms[i].card_type
+				== RAW_CARD_UNPOPULATED)
+			return CB_ERR;
+
+		if (ctrl_cached->spd_type == DDR2) {
+			i2c_block_read(spd_map[i], 64, 9, &raw_spd[64]);
+			i2c_block_read(spd_map[i], 93, 6, &raw_spd[93]);
+			crc = spd_ddr2_calc_unique_crc(raw_spd, len);
+		} else { /*
+			  * DDR3: TODO ddr2.h and ddr3.h
+			  * cannot be included directly
+			  */
+			crc = 0;
+			// i2c_block_read(spd_map[i], 117, 11, &raw_spd[117]);
+			// crc = spd_ddr3_calc_unique_crc(raw_spd, len);
+		}
+		if (crc != ctrl_cached->dimms[i].spd_crc)
+			return CB_ERR;
+	}
+	return CB_SUCCESS;
+}
+
 struct abs_timings {
 	u32 min_tclk;
 	u32 min_tRAS;
@@ -192,6 +231,9 @@
 				MAX(saved_timings->min_tCLK_cas[i],
 					decoded_dimm.cycle_time[i]);
 	}
+
+	s->dimms[dimm_idx].spd_crc = spd_ddr2_calc_unique_crc(raw_spd,
+					spd_decode_spd_size_ddr2(raw_spd[0]));
 	return CB_SUCCESS;
 }
 
@@ -292,10 +334,10 @@
 		if (s->spd_type == DDR2){
 			printk(BIOS_DEBUG,
 				"Reading SPD using i2c block operation.\n");
-			if (i2c_block_read(device, 0, 64, raw_spd) != 64) {
+			if (i2c_block_read(device, 0, 128, raw_spd) != 128) {
 				printk(BIOS_DEBUG, "i2c block operation failed,"
 					" trying smbus byte operation.\n");
-				for (j = 0; j < 64; j++)
+				for (j = 0; j < 128; j++)
 					raw_spd[j] = spd_read_byte(device, j);
 			}
 			if (ddr2_save_dimminfo(i, raw_spd, &saved_timings, s)) {
@@ -385,8 +427,10 @@
  */
 void sdram_initialize(int boot_path, const u8 *spd_map)
 {
-	struct sysinfo s;
+	struct sysinfo s, *ctrl_cached;
 	u8 reg8;
+	int fast_boot, cbmem_was_inited, cache_not_found;
+	struct region_device rdev;
 
 	printk(BIOS_DEBUG, "Setting up RAM controller.\n");
 
@@ -394,28 +438,62 @@
 
 	memset(&s, 0, sizeof(struct sysinfo));
 
-	s.boot_path = boot_path;
-	s.spd_map[0] = spd_map[0];
-	s.spd_map[1] = spd_map[1];
-	s.spd_map[2] = spd_map[2];
-	s.spd_map[3] = spd_map[3];
+	cache_not_found = mrc_cache_get_current(MRC_TRAINING_DATA,
+						MRC_CACHE_VERSION, &rdev);
 
-	checkreset_ddr2(s.boot_path);
+	if (cache_not_found || (region_device_sz(&rdev) < sizeof(s))) {
+		if (boot_path == BOOT_PATH_RESUME) {
+			/* Failed S3 resume, reset to come up cleanly */
+			outb(0x6, 0xcf9);
+			halt();
+		}
+		ctrl_cached = NULL;
+	} else {
+		ctrl_cached = rdev_mmap_full(&rdev);
+	}
 
-	/* Detect dimms per channel */
-	reg8 = pci_read_config8(PCI_DEV(0, 0, 0), 0xe9);
-	printk(BIOS_DEBUG, "Dimms per channel: %d\n", (reg8 & 0x10) ? 1 : 2);
+	/* verify MRC cache for fast boot */
+	if (boot_path != BOOT_PATH_RESUME && ctrl_cached) {
+		/* check SPD checksum to make sure the DIMMs haven't been
+		 * replaced */
+		fast_boot = verify_spds(spd_map, ctrl_cached) == CB_SUCCESS;
+		if (!fast_boot)
+			printk(BIOS_DEBUG, "SPD checksums don't match,"
+				" dimm's have been replaced\n");
+	} else {
+		fast_boot = boot_path == BOOT_PATH_RESUME;
+	}
 
-	mchinfo_ddr2(&s);
+	if (fast_boot) {
+		printk(BIOS_DEBUG, "Using cached raminit settings\n");
+		memcpy(&s, ctrl_cached, sizeof(s));
+		s.boot_path = boot_path;
+		mchinfo_ddr2(&s);
+		print_selected_timings(&s);
+	} else {
+		s.boot_path = boot_path;
+		s.spd_map[0] = spd_map[0];
+		s.spd_map[1] = spd_map[1];
+		s.spd_map[2] = spd_map[2];
+		s.spd_map[3] = spd_map[3];
+		checkreset_ddr2(s.boot_path);
 
-	find_fsb_speed(&s);
-	decode_spd_select_timings(&s);
-	print_selected_timings(&s);
-	find_dimm_config(&s);
+		/* Detect dimms per channel */
+		reg8 = pci_read_config8(PCI_DEV(0, 0, 0), 0xe9);
+		printk(BIOS_DEBUG, "Dimms per channel: %d\n",
+			(reg8 & 0x10) ? 1 : 2);
+
+		mchinfo_ddr2(&s);
+
+		find_fsb_speed(&s);
+		decode_spd_select_timings(&s);
+		print_selected_timings(&s);
+		find_dimm_config(&s);
+	}
 
 	switch (s.spd_type) {
 	case DDR2:
-		raminit_ddr2(&s);
+		raminit_ddr2(&s, fast_boot);
 		break;
 	case DDR3:
 		// FIXME Add: raminit_ddr3(&s);
@@ -431,4 +509,14 @@
 	reg8 = pci_read_config8(PCI_DEV(0, 0, 0), 0xf4);
 	pci_write_config8(PCI_DEV(0, 0, 0), 0xf4, reg8 | 1);
 	printk(BIOS_DEBUG, "RAM initialization finished.\n");
+
+	cbmem_was_inited = !cbmem_recovery(s.boot_path == BOOT_PATH_RESUME);
+	if (!fast_boot)
+		mrc_cache_stash_data(MRC_TRAINING_DATA, MRC_CACHE_VERSION,
+					&s, sizeof(s));
+	if (s.boot_path == BOOT_PATH_RESUME && !cbmem_was_inited) {
+		/* Failed S3 resume, reset to come up cleanly */
+		outb(0x6, 0xcf9);
+		halt();
+	}
 }
diff --git a/src/northbridge/intel/x4x/raminit_ddr2.c b/src/northbridge/intel/x4x/raminit_ddr2.c
index 022cbaa..336f77e 100644
--- a/src/northbridge/intel/x4x/raminit_ddr2.c
+++ b/src/northbridge/intel/x4x/raminit_ddr2.c
@@ -1045,77 +1045,46 @@
 	printk(BIOS_DEBUG, "MRS done\n");
 }
 
-static void sdram_save_receive_enable(void)
+static void sdram_recover_receive_enable(const struct sysinfo *s)
 {
-	int i = 0;
-	u16 reg16;
-	u8 values[18];
-	u8 lane, ch;
-
-	FOR_EACH_CHANNEL(ch) {
-		lane = 0;
-		while (lane < 8) {
-			values[i] = (MCHBAR8(0x400*ch + 0x560 + lane++ * 4) & 0xf);
-			values[i++] |= (MCHBAR8(0x400*ch + 0x560 + lane++ * 4) & 0xf) << 4;
-		}
-		values[i++] = (MCHBAR32(0x400*ch + 0x248) >> 16) & 0xf;
-		reg16 = MCHBAR16(0x400*ch + 0x5fa);
-		values[i++] = reg16 & 0xff;
-		values[i++] = (reg16 >> 8) & 0xff;
-		reg16 = MCHBAR16(0x400*ch + 0x58c);
-		values[i++] = reg16 & 0xff;
-		values[i++] = (reg16 >> 8) & 0xff;
-	}
-
-	for (i = 0; i < ARRAY_SIZE(values); i++)
-		cmos_write(values[i], 128 + i);
-}
-
-static void sdram_recover_receive_enable(void)
-{
-	u8 i;
 	u32 reg32;
-	u16 reg16;
-	u8 values[18];
-	u8 ch, lane;
+	u16 medium, coarse_offset;
+	u8 pi_tap;
+	int lane, channel;
 
-	for (i = 0; i < ARRAY_SIZE(values); i++)
-		values[i] = cmos_read(128 + i);
+	FOR_EACH_POPULATED_CHANNEL(s->dimms, channel) {
+		medium = 0;
+		coarse_offset = 0;
+		reg32 = MCHBAR32(0x400 * channel + 0x248);
+		reg32 &= ~0xf0000;
+		reg32 |= s->rcven_t[channel].min_common_coarse << 16;
+		MCHBAR32(0x400 * channel + 0x248) = reg32;
 
-	i = 0;
-	FOR_EACH_CHANNEL(ch) {
-		lane = 0;
-		while (lane < 8) {
-			MCHBAR8(0x400*ch + 0x560 + lane++ * 4) = 0x70 |
-				(values[i] & 0xf);
-			MCHBAR8(0x400*ch + 0x560 + lane++ * 4) = 0x70 |
-				((values[i++] >> 4) & 0xf);
+		for (lane = 0; lane < 8; lane++) {
+			medium |= s->rcven_t[channel].medium[lane]
+				<< (lane * 2);
+			coarse_offset |=
+				(s->rcven_t[channel].coarse_offset[lane] & 0x3)
+				<< (lane * 2);
+
+			pi_tap = MCHBAR8(0x400 * channel + 0x560 + lane * 4);
+			pi_tap &= ~0x7f;
+			pi_tap |= s->rcven_t[channel].tap[lane];
+			pi_tap |= s->rcven_t[channel].pi[lane] << 4;
+			MCHBAR8(0x400 * channel + 0x560 + lane * 4) = pi_tap;
 		}
-		reg32 = (MCHBAR32(0x400*ch + 0x248) & ~0xf0000)
-		  | ((values[i++] & 0xf) << 16);
-		MCHBAR32(0x400*ch + 0x248) = reg32;
-		reg16 = values[i++];
-		reg16 |= values[i++] << 8;
-		MCHBAR16(0x400*ch + 0x5fa) = reg16;
-		reg16 = values[i++];
-		reg16 |= values[i++] << 8;
-		MCHBAR16(0x400*ch + 0x58c) = reg16;
+		MCHBAR16(0x400 * channel + 0x58c) = medium;
+		MCHBAR16(0x400 * channel + 0x5fa) = coarse_offset;
 	}
 }
 
-static void sdram_program_receive_enable(struct sysinfo *s)
+static void sdram_program_receive_enable(struct sysinfo *s, int fast_boot)
 {
-	/* enable upper CMOS */
-	RCBA32(0x3400) = (1 << 2);
-
 	/* Program Receive Enable Timings */
-	if ((s->boot_path == BOOT_PATH_WARM_RESET)
-		|| (s->boot_path == BOOT_PATH_RESUME)) {
-		sdram_recover_receive_enable();
-	} else {
+	if (fast_boot)
+		sdram_recover_receive_enable(s);
+	else
 		rcven(s);
-		sdram_save_receive_enable();
-	}
 }
 
 static void dradrb_ddr2(struct sysinfo *s)
@@ -1470,7 +1439,7 @@
 		MCHBAR8(0x561 + (lane << 2)) = MCHBAR8(0x561 + (lane << 2)) & ~(1 << 3);
 }
 
-void raminit_ddr2(struct sysinfo *s)
+void raminit_ddr2(struct sysinfo *s, int fast_boot)
 {
 	u8 ch;
 	u8 r, bank;
@@ -1613,7 +1582,7 @@
 	}
 
 	// Receive enable
-	sdram_program_receive_enable(s);
+	sdram_program_receive_enable(s, fast_boot);
 	printk(BIOS_DEBUG, "Done rcven\n");
 
 	// Finish rcven
diff --git a/src/northbridge/intel/x4x/rcven.c b/src/northbridge/intel/x4x/rcven.c
index cc45aa9..30ec4be 100644
--- a/src/northbridge/intel/x4x/rcven.c
+++ b/src/northbridge/intel/x4x/rcven.c
@@ -304,7 +304,7 @@
 	return 0;
 }
 
-void rcven(const struct sysinfo *s)
+void rcven(struct sysinfo *s)
 {
 	int i;
 	u8 channel, lane, reg8;
@@ -354,6 +354,7 @@
 				mincoarse = timing[lane].coarse;
 		}
 		printk(BIOS_DEBUG, "Found min coarse value = %d\n", mincoarse);
+		s->rcven_t[channel].min_common_coarse = mincoarse;
 		printk(BIOS_DEBUG, "Receive enable, final timings:\n");
 		/* Normalise coarse */
 		for (lane = 0; lane < 8; lane++) {
@@ -365,6 +366,10 @@
 				"medium: %d; tap: %d\n",
 				channel, lane, reg8, timing[lane].medium,
 				timing[lane].tap);
+			s->rcven_t[channel].coarse_offset[lane] = reg8;
+			s->rcven_t[channel].medium[lane] = timing[lane].medium;
+			s->rcven_t[channel].tap[lane] = timing[lane].tap;
+			s->rcven_t[channel].pi[lane] = timing[lane].pi;
 			MCHBAR16(0x400 * channel + 0x5fa) =
 				(MCHBAR16(0x400 * channel + 0x5fa) &
 				~(3 << (lane * 2))) | (reg8 << (lane * 2));
diff --git a/src/northbridge/intel/x4x/x4x.h b/src/northbridge/intel/x4x/x4x.h
index cbb1853..70c6525 100644
--- a/src/northbridge/intel/x4x/x4x.h
+++ b/src/northbridge/intel/x4x/x4x.h
@@ -279,6 +279,15 @@
 	unsigned int	ranks;
 	unsigned int	rows;
 	unsigned int	cols;
+	u16             spd_crc;
+};
+
+struct rcven_timings {
+	u8 min_common_coarse;
+	u8 coarse_offset[8];
+	u8 medium[8];
+	u8 tap[8];
+	u8 pi[8];
 };
 
 /* The setup is up to two DIMMs per channel */
@@ -293,6 +302,7 @@
 	struct timings	selected_timings;
 	struct dimminfo	dimms[4];
 	u8		spd_map[4];
+	struct rcven_timings rcven_t[TOTAL_CHANNELS];
 };
 #define BOOT_PATH_NORMAL	0
 #define BOOT_PATH_WARM_RESET	1
@@ -331,8 +341,8 @@
 u32 decode_igd_gtt_size(u32 gsm);
 u8 decode_pciebar(u32 *const base, u32 *const len);
 void sdram_initialize(int boot_path, const u8 *spd_map);
-void raminit_ddr2(struct sysinfo *);
-void rcven(const struct sysinfo *);
+void raminit_ddr2(struct sysinfo *s, int fast_boot);
+void rcven(struct sysinfo *s);
 u32 fsb2mhz(u32 speed);
 u32 ddr2mhz(u32 speed);