soc/intel/apollolake: Implement stage cache to improve resume time

This patch enables stage cache to save ~40ms during S3 resume.
It saves ramstage in the stage cache and restores it on resume
so that ramstage does not have to reinitialize during the
resume flow. Stage cache functionality is added to postcar stage
since ramstage is called from postcar.

BUG=chrome-os-partner:56941
BRANCH=none
TEST=built for Reef and tested ramstage being cached

Change-Id: I1551fd0faca536bd8c8656f0a8ec7f900aae1f72
Signed-off-by: Brandon Breitenstein <brandon.breitenstein@intel.com>
Reviewed-on: https://review.coreboot.org/16833
Tested-by: build bot (Jenkins)
Reviewed-by: Aaron Durbin <adurbin@chromium.org>
Reviewed-by: Paul Menzel <paulepanter@users.sourceforge.net>
diff --git a/src/drivers/intel/fsp2_0/Makefile.inc b/src/drivers/intel/fsp2_0/Makefile.inc
index 3986fe6..beeec7c 100644
--- a/src/drivers/intel/fsp2_0/Makefile.inc
+++ b/src/drivers/intel/fsp2_0/Makefile.inc
@@ -37,6 +37,8 @@
 ramstage-$(CONFIG_DISPLAY_UPD_DATA) += upd_display.c
 ramstage-y += util.c
 
+postcar-$(CONFIG_CACHE_RELOCATED_RAMSTAGE_OUTSIDE_CBMEM) += stage_cache.c
+
 CPPFLAGS_common += -I$(src)/drivers/intel/fsp2_0/include
 
 # Add FSP blobs into cbfs. SoC code may supply  additional options with
diff --git a/src/lib/Makefile.inc b/src/lib/Makefile.inc
index 394491c..67f8364 100644
--- a/src/lib/Makefile.inc
+++ b/src/lib/Makefile.inc
@@ -159,6 +159,7 @@
 ifeq ($(CONFIG_CACHE_RELOCATED_RAMSTAGE_OUTSIDE_CBMEM),y)
 ramstage-y += ext_stage_cache.c
 romstage-y += ext_stage_cache.c
+postcar-y += ext_stage_cache.c
 else
 ramstage-$(CONFIG_RELOCATABLE_RAMSTAGE) += cbmem_stage_cache.c
 romstage-$(CONFIG_RELOCATABLE_RAMSTAGE) += cbmem_stage_cache.c
diff --git a/src/lib/ext_stage_cache.c b/src/lib/ext_stage_cache.c
index 770097f..2a99188 100644
--- a/src/lib/ext_stage_cache.c
+++ b/src/lib/ext_stage_cache.c
@@ -126,3 +126,4 @@
 
 ROMSTAGE_CBMEM_INIT_HOOK(stage_cache_setup)
 RAMSTAGE_CBMEM_INIT_HOOK(stage_cache_setup)
+POSTCAR_CBMEM_INIT_HOOK(stage_cache_setup)
diff --git a/src/soc/intel/apollolake/Kconfig b/src/soc/intel/apollolake/Kconfig
index 3a23dbd..6c178c3 100644
--- a/src/soc/intel/apollolake/Kconfig
+++ b/src/soc/intel/apollolake/Kconfig
@@ -26,6 +26,7 @@
 	select SOC_INTEL_COMMON_NHLT
 	# Misc options
 	select C_ENVIRONMENT_BOOTBLOCK
+	select CACHE_RELOCATED_RAMSTAGE_OUTSIDE_CBMEM if RELOCATABLE_RAMSTAGE
 	select COLLECT_TIMESTAMPS
 	select COMMON_FADT
 	select GENERIC_GPIO_LIB
@@ -34,7 +35,6 @@
 	select MMCONF_SUPPORT
 	select MMCONF_SUPPORT_DEFAULT
 	select NO_FIXED_XIP_ROM_SIZE
-	select NO_STAGE_CACHE
 	select NO_XIP_EARLY_STAGES
 	select PARALLEL_MP
 	select PCIEXP_ASPM
@@ -254,4 +254,8 @@
 	bool
 	default n
 
+config SMM_RESERVED_SIZE
+	hex
+	default 0x100000
+
 endif
diff --git a/src/soc/intel/apollolake/cpu.c b/src/soc/intel/apollolake/cpu.c
index f3cf050..e67842c 100644
--- a/src/soc/intel/apollolake/cpu.c
+++ b/src/soc/intel/apollolake/cpu.c
@@ -102,19 +102,23 @@
 {
 	void *smm_base;
 	size_t smm_size;
+	void *handler_base;
+	size_t handler_size;
 
 	/* All range registers are aligned to 4KiB */
 	const uint32_t rmask = ~((1 << 12) - 1);
 
 	/* Initialize global tracking state. */
 	smm_region(&smm_base, &smm_size);
+	smm_subregion(SMM_SUBREGION_HANDLER, &handler_base, &handler_size);
+
 	relo_attrs.smbase = (uint32_t)smm_base;
 	relo_attrs.smrr_base = relo_attrs.smbase | MTRR_TYPE_WRBACK;
 	relo_attrs.smrr_mask = ~(smm_size - 1) & rmask;
 	relo_attrs.smrr_mask |= MTRR_PHYS_MASK_VALID;
 
-	*perm_smbase = relo_attrs.smbase;
-	*perm_smsize = smm_size - CONFIG_SMM_RESERVED_SIZE;
+	*perm_smbase = (uintptr_t)handler_base;
+	*perm_smsize = handler_size;
 	*smm_save_state_size = sizeof(em64t100_smm_state_save_area_t);
 }
 
diff --git a/src/soc/intel/apollolake/include/soc/smm.h b/src/soc/intel/apollolake/include/soc/smm.h
index 7a9846e..740d02b 100644
--- a/src/soc/intel/apollolake/include/soc/smm.h
+++ b/src/soc/intel/apollolake/include/soc/smm.h
@@ -20,6 +20,7 @@
 
 #include <stdint.h>
 #include <soc/gpio.h>
+#include <fsp/memmap.h>
 
 /* These helpers are for performing SMM relocation. */
 void southbridge_clear_smi_status(void);
@@ -35,7 +36,4 @@
 /* Mainboard handler for GPI SMIs*/
 void mainboard_smi_gpi_handler(const struct gpi_status *sts);
 
-/* Fills in the arguments for the entire SMM region covered by chipset
- * protections. e.g. TSEG. */
-void smm_region(void **start, size_t *size);
 #endif
diff --git a/src/soc/intel/apollolake/memmap.c b/src/soc/intel/apollolake/memmap.c
index ea6f447..cccbffd 100644
--- a/src/soc/intel/apollolake/memmap.c
+++ b/src/soc/intel/apollolake/memmap.c
@@ -24,6 +24,7 @@
 #define __SIMPLE_DEVICE__
 
 #include <arch/io.h>
+#include <assert.h>
 #include <cbmem.h>
 #include <device/pci.h>
 #include <soc/northbridge.h>
@@ -52,3 +53,34 @@
 	*start = (void *)smm_region_start();
 	*size = smm_region_size();
 }
+
+int smm_subregion(int sub, void **start, size_t *size)
+{
+	uintptr_t sub_base;
+	size_t sub_size;
+	const size_t cache_size = CONFIG_SMM_RESERVED_SIZE;
+
+	sub_base = smm_region_start();
+	sub_size = smm_region_size();
+
+	assert(sub_size > CONFIG_SMM_RESERVED_SIZE);
+
+	switch (sub) {
+	case SMM_SUBREGION_HANDLER:
+		/* Handler starts at the base of TSEG. */
+		sub_size -= cache_size;
+		break;
+	case SMM_SUBREGION_CACHE:
+		/* External cache is in the middle of TSEG. */
+		sub_base += sub_size - cache_size;
+		sub_size = cache_size;
+		break;
+	default:
+		return -1;
+	}
+
+	*start = (void *)sub_base;
+	*size = sub_size;
+
+	return 0;
+}
diff --git a/src/soc/intel/apollolake/romstage.c b/src/soc/intel/apollolake/romstage.c
index b9733de..2384ceb 100644
--- a/src/soc/intel/apollolake/romstage.c
+++ b/src/soc/intel/apollolake/romstage.c
@@ -29,6 +29,7 @@
 #include <device/pci_def.h>
 #include <device/resource.h>
 #include <fsp/api.h>
+#include <fsp/memmap.h>
 #include <fsp/util.h>
 #include <soc/iomap.h>
 #include <soc/northbridge.h>
@@ -105,6 +106,9 @@
 	uintptr_t top_of_ram;
 	bool s3wake;
 	struct chipset_power_state *ps = car_get_var_ptr(&power_state);
+	void *smm_base;
+	size_t smm_size;
+	uintptr_t tseg_base;
 
 	timestamp_add_now(TS_START_ROMSTAGE);
 
@@ -135,6 +139,17 @@
 		postcar_frame_add_mtrr(&pcf, -CONFIG_ROM_SIZE, CONFIG_ROM_SIZE,
 					MTRR_TYPE_WRPROT);
 
+	/*
+	* Cache the TSEG region at the top of ram. This region is
+	* not restricted to SMM mode until SMM has been relocated.
+	* By setting the region to cacheable it provides faster access
+	* when relocating the SMM handler as well as using the TSEG
+	* region for other purposes.
+	*/
+	smm_region(&smm_base, &smm_size);
+	tseg_base = (uintptr_t)smm_base;
+	postcar_frame_add_mtrr(&pcf, tseg_base, smm_size, MTRR_TYPE_WRBACK);
+
 	run_postcar_phase(&pcf);
 }