soc/amd: Do SMM relocation via MSR

AMD CPUs have a convenient MSR that allows to set the SMBASE in the save
state without ever entering SMM (e.g. at the default 0x30000 address).
This has been a feature in all AMD CPUs since at least AMD K8. This
allows to do relocation in parallel in ramstage and without setting up a
relocation handler, which likely results in a speedup. The more cores
the higher the speedup as relocation was happening sequentially. On a 4
core AMD picasso system this results in 33ms boot speedup.

TESTED on google/vilboz (Picasso) with CONFIG_SMI_DEBUG: verify that SMM
is correctly relocated with the BSP correctly entering the smihandler.

Change-Id: I9729fb94ed5c18cfd57b8098c838c08a04490e4b
Signed-off-by: Arthur Heymans <arthur@aheymans.xyz>
Reviewed-on: https://review.coreboot.org/c/coreboot/+/64872
Tested-by: build bot (Jenkins) <no-reply@coreboot.org>
Reviewed-by: Felix Held <felix-coreboot@felixheld.de>
diff --git a/src/cpu/x86/Kconfig b/src/cpu/x86/Kconfig
index 8d76638..bd3be78 100644
--- a/src/cpu/x86/Kconfig
+++ b/src/cpu/x86/Kconfig
@@ -18,6 +18,15 @@
 	 Allow APs to do other work after initialization instead of going
 	 to sleep.
 
+config X86_SMM_SKIP_RELOCATION_HANDLER
+	bool
+	default n
+	depends on PARALLEL_MP && HAVE_SMI_HANDLER
+	help
+	  Skip SMM relocation using a relocation handler running in SMM
+	  with a stub at 0x30000. This is useful on platforms that have
+	  an alternative way to set SMBASE.
+
 config LEGACY_SMP_INIT
 	bool
 
diff --git a/src/cpu/x86/mp_init.c b/src/cpu/x86/mp_init.c
index 0d9cd41..2255841 100644
--- a/src/cpu/x86/mp_init.c
+++ b/src/cpu/x86/mp_init.c
@@ -755,6 +755,9 @@
 
 static enum cb_err install_relocation_handler(int num_cpus, size_t save_state_size)
 {
+	if (CONFIG(X86_SMM_SKIP_RELOCATION_HANDLER))
+		return CB_SUCCESS;
+
 	struct smm_loader_params smm_params = {
 		.num_cpus = num_cpus,
 		.cpu_save_state_size = save_state_size,
@@ -1136,9 +1139,13 @@
 	}
 
 	/* Sanity check SMM state. */
-	if (mp_state.perm_smsize != 0 && mp_state.smm_save_state_size != 0 &&
-		mp_state.ops.relocation_handler != NULL)
-		smm_enable();
+	smm_enable();
+	if (mp_state.perm_smsize == 0)
+		smm_disable();
+	if (mp_state.smm_save_state_size == 0)
+		smm_disable();
+	if (!CONFIG(X86_SMM_SKIP_RELOCATION_HANDLER) && mp_state.ops.relocation_handler == NULL)
+		smm_disable();
 
 	if (is_smm_enabled())
 		printk(BIOS_INFO, "Will perform SMM setup.\n");
@@ -1151,12 +1158,14 @@
 	mp_params.flight_plan = &mp_steps[0];
 	mp_params.num_records = ARRAY_SIZE(mp_steps);
 
-	/* Perform backup of default SMM area. */
-	default_smm_area = backup_default_smm_area();
+	/* Perform backup of default SMM area when using SMM relocation handler. */
+	if (!CONFIG(X86_SMM_SKIP_RELOCATION_HANDLER))
+		default_smm_area = backup_default_smm_area();
 
 	ret = mp_init(cpu_bus, &mp_params);
 
-	restore_default_smm_area(default_smm_area);
+	if (!CONFIG(X86_SMM_SKIP_RELOCATION_HANDLER))
+		restore_default_smm_area(default_smm_area);
 
 	/* Signal callback on success if it's provided. */
 	if (ret == CB_SUCCESS && mp_state.ops.post_mp_init != NULL)
diff --git a/src/include/cpu/x86/mp.h b/src/include/cpu/x86/mp.h
index 8ee7d02..343bd9c4 100644
--- a/src/include/cpu/x86/mp.h
+++ b/src/include/cpu/x86/mp.h
@@ -57,6 +57,9 @@
 	/*
 	 * Optional function to use to trigger SMM to perform relocation. If
 	 * not provided, smm_initiate_relocation() is used.
+	 * This function is called on each CPU.
+	 * On platforms that select CONFIG(X86_SMM_SKIP_RELOCATION_HANDLER) to
+	 * not relocate in SMM, this function can be used to relocate CPUs.
 	 */
 	void (*per_cpu_smm_trigger)(void);
 	/*
@@ -66,6 +69,7 @@
 	 * running the relocation handler, current SMBASE of relocation handler,
 	 * and the pre-calculated staggered CPU SMBASE address of the permanent
 	 * SMM handler.
+	 * This function is only called with !CONFIG(X86_SMM_SKIP_RELOCATION_HANDLER) set.
 	 */
 	void (*relocation_handler)(int cpu, uintptr_t curr_smbase,
 		uintptr_t staggered_smbase);
diff --git a/src/soc/amd/common/block/cpu/Kconfig b/src/soc/amd/common/block/cpu/Kconfig
index 0665e7d..cdd5c3a 100644
--- a/src/soc/amd/common/block/cpu/Kconfig
+++ b/src/soc/amd/common/block/cpu/Kconfig
@@ -57,6 +57,7 @@
 
 config SOC_AMD_COMMON_BLOCK_SMM
 	bool
+	select X86_SMM_SKIP_RELOCATION_HANDLER if HAVE_SMI_HANDLER
 	help
 	  Add common SMM relocation, finalization and handler functionality to
 	  the build.
diff --git a/src/soc/amd/common/block/cpu/smm/smm_relocate.c b/src/soc/amd/common/block/cpu/smm/smm_relocate.c
index 87636df..4d33b65 100644
--- a/src/soc/amd/common/block/cpu/smm/smm_relocate.c
+++ b/src/soc/amd/common/block/cpu/smm/smm_relocate.c
@@ -57,10 +57,8 @@
 	wrmsr(SMM_MASK_MSR, mask);
 }
 
-static void smm_relocation_handler(int cpu, uintptr_t curr_smbase, uintptr_t staggered_smbase)
+static void smm_relocation_handler(void)
 {
-	amd64_smm_state_save_area_t *smm_state;
-
 	uintptr_t tseg_base;
 	size_t tseg_size;
 
@@ -76,8 +74,12 @@
 	msr.hi = (1 << (cpu_phys_address_size() - 32)) - 1;
 	wrmsr(SMM_MASK_MSR, msr);
 
-	smm_state = (void *)(SMM_AMD64_SAVE_STATE_OFFSET + curr_smbase);
-	smm_state->smbase = staggered_smbase;
+	uintptr_t smbase = smm_get_cpu_smbase(cpu_index());
+	msr_t smm_base = {
+		.hi = 0,
+		.lo = smbase
+	};
+	wrmsr(SMM_BASE_MSR, smm_base);
 
 	tseg_valid();
 	lock_smm();
@@ -87,6 +89,6 @@
 	.pre_mp_init = pre_mp_init,
 	.get_cpu_count = get_cpu_count,
 	.get_smm_info = get_smm_info,
-	.relocation_handler = smm_relocation_handler,
+	.per_cpu_smm_trigger = smm_relocation_handler,
 	.post_mp_init = global_smi_enable,
 };