amd/stoneyridge: Convert MP init to mp_init_with_smm

Change the Stoney Ridge SOC to a more modern method for setting up
the multiple cores.

Add a new cpu.c file for most of the processor initiliazation.  Build
mp_ops with the necessary callbacks.  Note also that this patch removes
cpu_bus_scan.  Rather than manually find CPUs and add them to the
devicetree, allow this to be done automatically in the generic
mp_init.c file.

SMM information is left blank in mp_ops to avoid having mp_init.c
install a handler at this time.  A later patch will add TSEG SMM
capabilities for the APU.

This patch also contains a hack to mask the behavior of AGESA which
configures the MTRRs and Tom2ForceMemTypeWB coming out of AmdInitPost.
The hack immediately changes all WB variable MTRRs, on the BSP, to UC
so that all writes to memory space will make it to the DRAM.

BUG=b:66200075

Change-Id: Ie54295cb00c6835947456e8818a289b7eb260914
Signed-off-by: Marshall Dawson <marshalldawson3rd@gmail.com>
Reviewed-on: https://review.coreboot.org/21498
Tested-by: build bot (Jenkins) <no-reply@coreboot.org>
Reviewed-by: Kyösti Mälkki <kyosti.malkki@gmail.com>
Reviewed-by: Aaron Durbin <adurbin@chromium.org>
diff --git a/src/soc/amd/stoneyridge/Kconfig b/src/soc/amd/stoneyridge/Kconfig
index 1e13cc0..f708750 100644
--- a/src/soc/amd/stoneyridge/Kconfig
+++ b/src/soc/amd/stoneyridge/Kconfig
@@ -49,6 +49,8 @@
 	select SOC_AMD_COMMON_BLOCK_CAR
 	select C_ENVIRONMENT_BOOTBLOCK
 	select BOOTBLOCK_CONSOLE
+	select RELOCATABLE_MODULES
+	select PARALLEL_MP
 
 config VBOOT
 	select AMDFW_OUTSIDE_CBFS
diff --git a/src/soc/amd/stoneyridge/Makefile.inc b/src/soc/amd/stoneyridge/Makefile.inc
index 06d9f58..5d89736 100644
--- a/src/soc/amd/stoneyridge/Makefile.inc
+++ b/src/soc/amd/stoneyridge/Makefile.inc
@@ -64,6 +64,7 @@
 verstage-y += tsc_freq.c
 
 ramstage-y += chip.c
+ramstage-y += cpu.c
 ramstage-$(CONFIG_USBDEBUG) += enable_usbdebug.c
 ramstage-$(CONFIG_HAVE_ACPI_TABLES) += acpi.c
 ramstage-y += fixme.c
diff --git a/src/soc/amd/stoneyridge/chip.c b/src/soc/amd/stoneyridge/chip.c
index 14f76b7..325d2ea 100644
--- a/src/soc/amd/stoneyridge/chip.c
+++ b/src/soc/amd/stoneyridge/chip.c
@@ -18,20 +18,16 @@
 #include <cpu/cpu.h>
 #include <device/device.h>
 #include <device/pci.h>
-#include <soc/southbridge.h>
+#include <soc/cpu.h>
 #include <soc/northbridge.h>
-
-static void cpu_bus_init(device_t dev)
-{
-	initialize_cpus(dev->link_list);
-}
+#include <soc/southbridge.h>
 
 struct device_operations cpu_bus_ops = {
 	.read_resources	  = DEVICE_NOOP,
 	.set_resources	  = DEVICE_NOOP,
 	.enable_resources = DEVICE_NOOP,
-	.init		  = &cpu_bus_init,
-	.scan_bus	  = cpu_bus_scan,
+	.init		  = stoney_init_cpus,
+	.scan_bus	  = NULL,
 	.acpi_fill_ssdt_generator = generate_cpu_entries,
 };
 
diff --git a/src/soc/amd/stoneyridge/cpu.c b/src/soc/amd/stoneyridge/cpu.c
new file mode 100644
index 0000000..0df3e30
--- /dev/null
+++ b/src/soc/amd/stoneyridge/cpu.c
@@ -0,0 +1,54 @@
+/*
+ * This file is part of the coreboot project.
+ *
+ * Copyright (C) 2015-2016 Intel Corp.
+ * Copyright (C) 2017 Advanced Micro Devices, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <cpu/x86/mp.h>
+#include <cpu/x86/mtrr.h>
+#include <device/device.h>
+#include <soc/pci_devs.h>
+#include <soc/cpu.h>
+#include <soc/northbridge.h>
+#include <console/console.h>
+
+/*
+ * Do essential initialization tasks before APs can be fired up -
+ *
+ *  1. Prevent race condition in MTRR solution. Enable MTRRs on the BSP. This
+ *  creates the MTRR solution that the APs will use. Otherwise APs will try to
+ *  apply the incomplete solution as the BSP is calculating it.
+ */
+static void pre_mp_init(void)
+{
+	x86_setup_mtrrs_with_detect();
+	x86_mtrr_check();
+}
+
+static int get_cpu_count(void)
+{
+	device_t nb = dev_find_slot(0, HT_DEVFN);
+	return (pci_read_config16(nb, D18F0_CPU_CNT) & CPU_CNT_MASK) + 1;
+}
+
+static const struct mp_ops mp_ops = {
+	.pre_mp_init = pre_mp_init,
+	.get_cpu_count = get_cpu_count,
+};
+
+void stoney_init_cpus(struct device *dev)
+{
+	/* Clear for take-off */
+	if (mp_init_with_smm(dev->link_list, &mp_ops) < 0)
+		printk(BIOS_ERR, "MP initialization failure.\n");
+}
diff --git a/src/soc/amd/stoneyridge/include/soc/cpu.h b/src/soc/amd/stoneyridge/include/soc/cpu.h
new file mode 100644
index 0000000..d2c412f
--- /dev/null
+++ b/src/soc/amd/stoneyridge/include/soc/cpu.h
@@ -0,0 +1,21 @@
+/*
+ * This file is part of the coreboot project.
+ *
+ * Copyright (C) 2017 Advanced Micro Devices, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __STONEYRIDGE_CPU_H__
+#define __STONEYRIDGE_CPU_H__
+
+void stoney_init_cpus(struct device *dev);
+
+#endif /* __STONEYRIDGE_CPU_H__ */
diff --git a/src/soc/amd/stoneyridge/include/soc/northbridge.h b/src/soc/amd/stoneyridge/include/soc/northbridge.h
index 5984637..7e9b51a 100644
--- a/src/soc/amd/stoneyridge/include/soc/northbridge.h
+++ b/src/soc/amd/stoneyridge/include/soc/northbridge.h
@@ -20,12 +20,16 @@
 #include <arch/io.h>
 #include <device/device.h>
 
+/* D18F0 - HT Configuration Registers */
+#define D18F0_NODE_ID		0x60
+#define D18F0_CPU_CNT		0x62 /* BKDG defines as a field in DWORD 0x60 */
+# define CPU_CNT_MASK		0x1f /*  CpuCnt + 1 = no. CPUs */
+
 /* D18F1 - Address Map Registers */
 #define D18F1_DRAM_HOLE		0xf0
 # define DRAM_HOIST_VALID	(1 << 1)
 # define DRAM_HOLE_VALID	(1 << 0)
 
-void cpu_bus_scan(device_t dev);
 void domain_enable_resources(device_t dev);
 void domain_read_resources(device_t dev);
 void domain_set_resources(device_t dev);
diff --git a/src/soc/amd/stoneyridge/model_15_init.c b/src/soc/amd/stoneyridge/model_15_init.c
index e896718..c3e9bf5 100644
--- a/src/soc/amd/stoneyridge/model_15_init.c
+++ b/src/soc/amd/stoneyridge/model_15_init.c
@@ -22,7 +22,6 @@
 #include <cpu/x86/msr.h>
 #include <cpu/x86/pae.h>
 #include <pc80/mc146818rtc.h>
-#include <cpu/x86/lapic.h>
 
 #include <cpu/cpu.h>
 #include <cpu/x86/cache.h>
@@ -30,36 +29,46 @@
 #include <cpu/amd/amdfam15.h>
 #include <arch/acpi.h>
 
+static void msr_rw_dram(unsigned int reg)
+{
+#define RW_DRAM (MTRR_READ_MEM | MTRR_WRITE_MEM)
+#define ALL_RW_DRAM ((RW_DRAM << 24) | (RW_DRAM << 16) | \
+		     (RW_DRAM << 8)  | (RW_DRAM))
+
+	msr_t mtrr = rdmsr(reg);
+	mtrr.hi |= ALL_RW_DRAM;
+	mtrr.lo |= ALL_RW_DRAM;
+	wrmsr(reg, mtrr);
+}
+
 static void model_15_init(device_t dev)
 {
 	printk(BIOS_DEBUG, "Model 15 Init.\n");
 
-	u8 i;
+	int i;
 	msr_t msr;
-	int msrno;
 
 	disable_cache();
+
 	/* Enable access to AMD RdDram and WrDram extension bits */
 	msr = rdmsr(SYSCFG_MSR);
 	msr.lo |= SYSCFG_MSR_MtrrFixDramModEn;
 	msr.lo &= ~SYSCFG_MSR_MtrrFixDramEn;
 	wrmsr(SYSCFG_MSR, msr);
 
-	// BSP: make a0000-bffff UC, c0000-fffff WB
-	msr.lo = msr.hi = 0;
-	wrmsr(MTRR_FIX_16K_A0000, msr);
-	msr.lo = msr.hi = 0x1e1e1e1e;
-	wrmsr(MTRR_FIX_64K_00000, msr);
-	wrmsr(MTRR_FIX_16K_80000, msr);
-	for (msrno = MTRR_FIX_4K_C0000 ; msrno <= MTRR_FIX_4K_F8000 ; msrno++)
-		wrmsr(msrno, msr);
+	/* Send all but A0000-BFFFF to DRAM */
+	msr_rw_dram(MTRR_FIX_64K_00000);
+	msr_rw_dram(MTRR_FIX_16K_80000);
+	for (i = MTRR_FIX_4K_C0000 ; i <= MTRR_FIX_4K_F8000 ; i++)
+		msr_rw_dram(i);
 
+	/* Hide RdDram and WrDram bits, and clear Tom2ForceMemTypeWB */
 	msr = rdmsr(SYSCFG_MSR);
+	msr.lo &= ~SYSCFG_MSR_TOM2WB;
 	msr.lo &= ~SYSCFG_MSR_MtrrFixDramModEn;
 	msr.lo |= SYSCFG_MSR_MtrrFixDramEn;
 	wrmsr(SYSCFG_MSR, msr);
 
-	x86_mtrr_check();
 	x86_enable_cache();
 
 	/* zero the machine check error status registers */
@@ -68,9 +77,6 @@
 	for (i = 0 ; i < 6 ; i++)
 		wrmsr(MCI_STATUS + (i * 4), msr);
 
-	/* Enable the local CPU APICs */
-	setup_lapic();
-
 	/* Write protect SMM space with SMMLOCK. */
 	msr = rdmsr(HWCR_MSR);
 	msr.lo |= (1 << 0);
diff --git a/src/soc/amd/stoneyridge/northbridge.c b/src/soc/amd/stoneyridge/northbridge.c
index 049bcd0..11fb336 100644
--- a/src/soc/amd/stoneyridge/northbridge.c
+++ b/src/soc/amd/stoneyridge/northbridge.c
@@ -507,80 +507,6 @@
 	reserved_ram_resource(dev, 0xc0000, 0xc0000 / KiB, 0x40000 / KiB);
 }
 
-void cpu_bus_scan(device_t dev)
-{
-	struct bus *cpu_bus;
-	device_t cpu;
-	device_t cdb_dev;
-	device_t dev_mc;
-	int j;
-	int core_max;
-	int core_nums;
-	int siblings;
-	int family;
-	int enable_node;
-	u32 lapicid_start;
-	u32 apic_id;
-	u32 pccount;
-
-
-	dev_mc = dev_find_slot(CONFIG_CBB, PCI_DEVFN(CONFIG_CDB, 0));
-	if (!dev_mc) {
-		printk(BIOS_ERR, "%02x:%02x.0 not found", CONFIG_CBB,
-				CONFIG_CDB);
-		die("");
-	}
-
-	/* Get max and actual number of cores */
-	pccount = cpuid_ecx(0x80000008);
-	core_max = 1 << ((pccount >> 12) & 0xf);
-	core_nums = (pccount & 0xF);
-
-	family = (cpuid_eax(1) >> 20) & 0xff;
-
-	cdb_dev = dev_find_slot(CONFIG_CBB, PCI_DEVFN(CONFIG_CDB, 5));
-	siblings = pci_read_config32(cdb_dev, 0x84) & 0xff;
-
-	printk(BIOS_SPEW, "%s family%xh, core_max=%d, core_nums=%d,"
-			" siblings=%d\n", dev_path(cdb_dev), 0x0f + family,
-			core_max, core_nums, siblings);
-
-	/*
-	 * APIC ID calucation is tightly coupled with AGESA v5 code.
-	 * This calculation MUST match the assignment calculation done
-	 * in LocalApicInitializationAtEarly() function.
-	 * And reference GetLocalApicIdForCore()
-	 *
-	 * Apply apic enumeration rules
-	 * For systems with >= 16 APICs, put the IO-APICs at 0..n and
-	 * put the local-APICs at m..z
-	 *
-	 * This is needed because many IO-APIC devices only have 4 bits
-	 * for their APIC id and therefore must reside at 0..15
-	 */
-
-	 /*
-	  * While the above statement is true, we know some things about
-	  * this silicon. It is an SOC and can't have  >= 16 APICs, but
-	  * we will start numbering at 0x10. We also know there is only
-	  * on physical node (module in AMD speak).
-	  */
-
-	lapicid_start = 0x10; /* Get this from devicetree? see comment above. */
-	enable_node = cdb_dev->enabled;
-	cpu_bus = dev->link_list;
-
-	for (j = 0 ; j <= siblings ; j++) {
-		apic_id = lapicid_start + j;
-		printk(BIOS_SPEW, "lapicid_start 0x%x, core 0x%x,"
-				"  apicid=0x%x\n", lapicid_start, j, apic_id);
-
-		cpu = add_cpu_device(cpu_bus, apic_id, enable_node);
-		if (cpu)
-			amd_cpu_topology(cpu, 1, j);
-	}
-}
-
 /*********************************************************************
  * Change the vendor / device IDs to match the generic VBIOS header. *
  *********************************************************************/
diff --git a/src/soc/amd/stoneyridge/romstage.c b/src/soc/amd/stoneyridge/romstage.c
index c69bbf6..b062c6c 100644
--- a/src/soc/amd/stoneyridge/romstage.c
+++ b/src/soc/amd/stoneyridge/romstage.c
@@ -13,6 +13,9 @@
  * GNU General Public License for more details.
  */
 
+#include <cpu/x86/msr.h>
+#include <cpu/x86/mtrr.h>
+#include <cpu/amd/mtrr.h>
 #include <cbmem.h>
 #include <console/console.h>
 #include <program_loading.h>
@@ -24,15 +27,47 @@
 
 asmlinkage void car_stage_entry(void)
 {
+	msr_t base, mask;
+	msr_t mtrr_cap = rdmsr(MTRR_CAP_MSR);
+	int vmtrrs = mtrr_cap.lo & MTRR_CAP_VCNT;
+	int i;
+
 	console_init();
 
 	post_code(0x40);
 	AGESAWRAPPER(amdinitpost);
 
 	post_code(0x41);
-	psp_notify_dram();
+	/*
+	 * TODO: This is a hack to work around current AGESA behavior.  AGESA
+	 *       needs to change to reflect that coreboot owns the MTRRs.
+	 *
+	 * After setting up DRAM, AGESA also completes the configuration of the
+	 * MTRRs, setting regions to WB.  Anything written to memory between
+	 * now and and when CAR is dismantled will be in cache and lost.  For
+	 * now, set the regions UC to ensure the writes get to DRAM.
+	 */
+	for (i = 0 ; i < vmtrrs ; i++) {
+		base = rdmsr(MTRR_PHYS_BASE(i));
+		mask = rdmsr(MTRR_PHYS_MASK(i));
+		if (!(mask.lo & MTRR_PHYS_MASK_VALID))
+			continue;
+
+		if ((base.lo & 0x7) == MTRR_TYPE_WRBACK) {
+			base.lo &= ~0x7;
+			base.lo |= MTRR_TYPE_UNCACHEABLE;
+			wrmsr(MTRR_PHYS_BASE(i), base);
+		}
+	}
+	/* Disable WB from to region 4GB-TOM2. */
+	msr_t sys_cfg = rdmsr(SYSCFG_MSR);
+	sys_cfg.lo &= ~SYSCFG_MSR_TOM2WB;
+	wrmsr(SYSCFG_MSR, sys_cfg);
 
 	post_code(0x42);
+	psp_notify_dram();
+
+	post_code(0x43);
 	cbmem_initialize_empty();
 
 	/*
@@ -42,7 +77,7 @@
 	 */
 	chipset_teardown_car();
 
-	post_code(0x43);
+	post_code(0x44);
 	AGESAWRAPPER(amdinitenv);
 
 	post_code(0x50);