cpu/intel/model_2065x: Use parallel MP init

TESTED on Thinkpad X201 with a i7 CPU M620 CPU (hyperthread dual core).
Boots ~28ms faster.

Change-Id: I56b352f9d76ee58f5c82cd431a4e0fa206f848a0
Signed-off-by: Arthur Heymans <arthur@aheymans.xyz>
Reviewed-on: https://review.coreboot.org/c/coreboot/+/26297
Tested-by: build bot (Jenkins) <no-reply@coreboot.org>
Reviewed-by: Patrick Rudolph <siro@das-labor.org>
diff --git a/src/cpu/intel/model_2065x/model_2065x_init.c b/src/cpu/intel/model_2065x/model_2065x_init.c
index f08a058..d8de7c0 100644
--- a/src/cpu/intel/model_2065x/model_2065x_init.c
+++ b/src/cpu/intel/model_2065x/model_2065x_init.c
@@ -15,6 +15,7 @@
  * GNU General Public License for more details.
  */
 
+#include <assert.h>
 #include <console/console.h>
 #include <device/device.h>
 #include <arch/acpi.h>
@@ -22,6 +23,7 @@
 #include <cpu/x86/mtrr.h>
 #include <cpu/x86/msr.h>
 #include <cpu/x86/lapic.h>
+#include <cpu/x86/mp.h>
 #include <cpu/intel/microcode.h>
 #include <cpu/intel/speedstep.h>
 #include <cpu/intel/turbo.h>
@@ -109,29 +111,6 @@
 	{ 0 }
 };
 
-int cpu_get_apic_id_map(int *apic_id_map)
-{
-	int i;
-	struct cpuid_result result;
-	unsigned int threads_per_package, threads_per_core;
-
-	/* Logical processors (threads) per core */
-	result = cpuid_ext(0xb, 0);
-	threads_per_core = result.ebx & 0xffff;
-
-	/* Logical processors (threads) per package */
-	result = cpuid_ext(0xb, 1);
-	threads_per_package = result.ebx & 0xffff;
-
-	for (i = 0; i < threads_per_package && i < CONFIG_MAX_CPUS; ++i) {
-		apic_id_map[i] = (i % threads_per_core)
-			+ ((i / threads_per_core) << 2);
-	}
-
-	return threads_per_package;
-}
-
-
 int cpu_config_tdp_levels(void)
 {
 	msr_t platform_info;
@@ -250,58 +229,6 @@
 		wrmsr(IA32_MC0_STATUS + (i * 4), msr);
 }
 
-/*
- * Initialize any extra cores/threads in this package.
- */
-static void intel_cores_init(struct device *cpu)
-{
-	struct cpuid_result result;
-	unsigned int threads_per_package, threads_per_core, i;
-
-	/* Logical processors (threads) per core */
-	result = cpuid_ext(0xb, 0);
-	threads_per_core = result.ebx & 0xffff;
-
-	/* Logical processors (threads) per package */
-	result = cpuid_ext(0xb, 1);
-	threads_per_package = result.ebx & 0xffff;
-
-	/* Only initialize extra cores from BSP */
-	if (cpu->path.apic.apic_id)
-		return;
-
-	printk(BIOS_DEBUG, "CPU: %u has %u cores, %u threads per core\n",
-	       cpu->path.apic.apic_id, threads_per_package/threads_per_core,
-	       threads_per_core);
-
-	for (i = 1; i < threads_per_package; ++i) {
-		struct device_path cpu_path;
-		struct device *new;
-
-		/* Build the CPU device path */
-		cpu_path.type = DEVICE_PATH_APIC;
-		cpu_path.apic.apic_id =
-		  cpu->path.apic.apic_id + (i % threads_per_core)
-			+ ((i / threads_per_core) << 2);
-
-		/* Allocate the new CPU device structure */
-		new = alloc_dev(cpu->bus, &cpu_path);
-		if (!new)
-			continue;
-
-		printk(BIOS_DEBUG, "CPU: %u has core %u\n",
-		       cpu->path.apic.apic_id,
-		       new->path.apic.apic_id);
-
-		/* Start the new CPU */
-		if (is_smp_boot() && !start_cpu(new)) {
-			/* Record the error in cpu? */
-			printk(BIOS_ERR, "CPU %u would not start!\n",
-			       new->path.apic.apic_id);
-		}
-	}
-}
-
 static void model_2065x_init(struct device *cpu)
 {
 	char processor_name[49];
@@ -309,8 +236,6 @@
 	/* Turn on caching if we haven't already */
 	x86_enable_cache();
 
-	intel_update_microcode_from_cbfs();
-
 	/* Clear out pending MCEs */
 	configure_mca();
 
@@ -320,10 +245,6 @@
 	printk(BIOS_INFO, "CPU:lapic=%ld, boot_cpu=%d\n", lapicid(),
 		boot_cpu());
 
-	/* Setup MTRRs based on physical address size */
-	x86_setup_mtrrs_with_detect();
-	x86_mtrr_check();
-
 	/* Setup Page Attribute Tables (PAT) */
 	// TODO set up PAT
 
@@ -348,9 +269,75 @@
 
 	/* Enable Turbo */
 	enable_turbo();
+}
 
-	/* Start up extra cores */
-	intel_cores_init(cpu);
+/* MP initialization support. */
+static const void *microcode_patch;
+
+static void pre_mp_init(void)
+{
+	/* Setup MTRRs based on physical address size. */
+	x86_setup_mtrrs_with_detect();
+	x86_mtrr_check();
+}
+
+static int get_cpu_count(void)
+{
+	msr_t msr;
+	int num_threads;
+	int num_cores;
+
+	msr = rdmsr(CORE_THREAD_COUNT_MSR);
+	num_threads = (msr.lo >> 0) & 0xffff;
+	num_cores = (msr.lo >> 16) & 0xffff;
+	printk(BIOS_DEBUG, "CPU has %u cores, %u threads enabled.\n",
+	       num_cores, num_threads);
+
+	return num_threads;
+}
+
+static void get_microcode_info(const void **microcode, int *parallel)
+{
+	microcode_patch = intel_microcode_find();
+	*microcode = microcode_patch;
+	*parallel = 1;
+}
+
+static void per_cpu_smm_trigger(void)
+{
+	/* Relocate the SMM handler. */
+	smm_relocate();
+
+	/* After SMM relocation a 2nd microcode load is required. */
+	intel_microcode_load_unlocked(microcode_patch);
+}
+
+static void post_mp_init(void)
+{
+	/* Now that all APs have been relocated as well as the BSP let SMIs
+	 * start flowing. */
+	southbridge_smm_init();
+
+	/* Lock down the SMRAM space. */
+	smm_lock();
+}
+
+
+static const struct mp_ops mp_ops = {
+	.pre_mp_init = pre_mp_init,
+	.get_cpu_count = get_cpu_count,
+	.get_smm_info = smm_info,
+	.get_microcode_info = get_microcode_info,
+	.pre_mp_smm_init = smm_initialize,
+	.per_cpu_smm_trigger = per_cpu_smm_trigger,
+	.relocation_handler = smm_relocation_handler,
+	.post_mp_init = post_mp_init,
+};
+
+void bsp_init_and_start_aps(struct bus *cpu_bus)
+{
+	if (mp_init_with_smm(cpu_bus, &mp_ops))
+		printk(BIOS_ERR, "MP initialization failure.\n");
 }
 
 static struct device_operations cpu_dev_ops = {