src/cpu/power9: add file structure for power9, implement SCOM access

Change-Id: Ib555ce51294c94b22d9a7c0db84d38d7928f7015
Signed-off-by: Igor Bagnucki <igor.bagnucki@3mdeb.com>
Signed-off-by: Krystian Hebel <krystian.hebel@3mdeb.com>
Signed-off-by: Sergii Dmytruk <sergii.dmytruk@3mdeb.com>
Reviewed-on: https://review.coreboot.org/c/coreboot/+/57078
Tested-by: build bot (Jenkins) <no-reply@coreboot.org>
Reviewed-by: Michał Żygowski <michal.zygowski@3mdeb.com>
diff --git a/src/arch/ppc64/include/arch/byteorder.h b/src/arch/ppc64/include/arch/byteorder.h
index 79f15b1..8ff8576 100644
--- a/src/arch/ppc64/include/arch/byteorder.h
+++ b/src/arch/ppc64/include/arch/byteorder.h
@@ -5,4 +5,16 @@
 
 #define __BIG_ENDIAN 4321
 
+#define PPC_BIT(bit)		(0x8000000000000000UL >> (bit))
+#define PPC_BITMASK(bs, be)	((PPC_BIT(bs) - PPC_BIT(be)) | PPC_BIT(bs))
+
+#ifndef __ASSEMBLER__
+
+#include <types.h>
+#define PPC_SHIFT(val, lsb)	(((uint64_t)(val)) << (63 - (lsb)))
+
+#else
+#define PPC_SHIFT(val, lsb)	((val) << (63 - (lsb)))
+#endif
+
 #endif /* _BYTEORDER_H */
diff --git a/src/arch/ppc64/include/arch/io.h b/src/arch/ppc64/include/arch/io.h
index 132a5ce..6dc0a84 100644
--- a/src/arch/ppc64/include/arch/io.h
+++ b/src/arch/ppc64/include/arch/io.h
@@ -9,6 +9,7 @@
 #define MMIO_GROUP0_CHIP0_LPC_BASE_ADDR 0x8006030000000000
 #define LPCHC_IO_SPACE 0xD0010000
 #define LPC_BASE_ADDR (MMIO_GROUP0_CHIP0_LPC_BASE_ADDR + LPCHC_IO_SPACE)
+#define MMIO_GROUP0_CHIP0_SCOM_BASE_ADDR 0x800603FC00000000
 
 /* Enforce In-order Execution of I/O */
 static inline void eieio(void)
diff --git a/src/cpu/Makefile.inc b/src/cpu/Makefile.inc
index 9f1e6c4..2d90638 100644
--- a/src/cpu/Makefile.inc
+++ b/src/cpu/Makefile.inc
@@ -7,6 +7,7 @@
 subdirs-y += ti
 subdirs-$(CONFIG_ARCH_X86) += x86
 subdirs-$(CONFIG_CPU_QEMU_X86) += qemu-x86
+subdirs-$(CONFIG_CPU_POWER9) += power9
 
 $(eval $(call create_class_compiler,cpu_microcode,x86_32))
 ################################################################################
diff --git a/src/cpu/power9/Kconfig b/src/cpu/power9/Kconfig
new file mode 100644
index 0000000..c3a628c
--- /dev/null
+++ b/src/cpu/power9/Kconfig
@@ -0,0 +1,8 @@
+## SPDX-License-Identifier: GPL-2.0-only
+
+config CPU_POWER9
+	bool
+	select ARCH_BOOTBLOCK_PPC64
+	select ARCH_VERSTAGE_PPC64
+	select ARCH_ROMSTAGE_PPC64
+	select ARCH_RAMSTAGE_PPC64
diff --git a/src/cpu/power9/Makefile.inc b/src/cpu/power9/Makefile.inc
new file mode 100644
index 0000000..2fe9e57
--- /dev/null
+++ b/src/cpu/power9/Makefile.inc
@@ -0,0 +1,6 @@
+## SPDX-License-Identifier: GPL-2.0-or-later
+
+ramstage-y += power9.c
+
+bootblock-y += scom.c
+romstage-y += scom.c
diff --git a/src/cpu/power9/power9.c b/src/cpu/power9/power9.c
new file mode 100644
index 0000000..fd33ff2
--- /dev/null
+++ b/src/cpu/power9/power9.c
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include <cpu/cpu.h>
+#include <device/device.h>
+
+static void power9_cpu_init(struct device *dev)
+{
+}
+
+static struct device_operations cpu_dev_ops = {
+	.init = power9_cpu_init,
+};
+
+static const struct cpu_driver driver __cpu_driver = {
+	.ops      = &cpu_dev_ops,
+};
+
+struct chip_operations cpu_power8_qemu_ops = {
+	CHIP_NAME("POWER9 CPU")
+};
diff --git a/src/cpu/power9/scom.c b/src/cpu/power9/scom.c
new file mode 100644
index 0000000..e55d149b
--- /dev/null
+++ b/src/cpu/power9/scom.c
@@ -0,0 +1,135 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include <cpu/power/scom.h>
+#include <cpu/power/spr.h>		// HMER
+#include <console/console.h>
+
+#define XSCOM_DATA_IND_READ			PPC_BIT(0)
+#define XSCOM_DATA_IND_COMPLETE			PPC_BIT(32)
+#define XSCOM_DATA_IND_ERR			PPC_BITMASK(33, 35)
+#define XSCOM_DATA_IND_DATA			PPC_BITMASK(48, 63)
+#define XSCOM_DATA_IND_FORM1_DATA		PPC_BITMASK(12, 63)
+#define XSCOM_IND_MAX_RETRIES			10
+
+#define XSCOM_RCVED_STAT_REG			0x00090018
+#define XSCOM_LOG_REG				0x00090012
+#define XSCOM_ERR_REG				0x00090013
+
+uint64_t read_scom_direct(uint64_t reg_address)
+{
+	uint64_t val;
+	uint64_t hmer = 0;
+	do {
+		/*
+		 * Clearing HMER on every SCOM access seems to slow down CCS up
+		 * to a point where it starts hitting timeout on "less ideal"
+		 * DIMMs for write centering. Clear it only if this do...while
+		 * executes more than once.
+		 */
+		if ((hmer & SPR_HMER_XSCOM_STATUS) == SPR_HMER_XSCOM_OCCUPIED)
+			clear_hmer();
+
+		eieio();
+		asm volatile(
+			"ldcix %0, %1, %2" :
+			"=r"(val) :
+			"b"(MMIO_GROUP0_CHIP0_SCOM_BASE_ADDR),
+			"r"(reg_address << 3));
+		eieio();
+		hmer = read_hmer();
+	} while ((hmer & SPR_HMER_XSCOM_STATUS) == SPR_HMER_XSCOM_OCCUPIED);
+
+	if (hmer & SPR_HMER_XSCOM_STATUS) {
+		reset_scom_engine();
+		/*
+		 * All F's are returned in case of error, but code polls for a set bit
+		 * after changes that can make such error appear (e.g. clock settings).
+		 * Return 0 so caller won't have to test for all F's in that case.
+		 */
+		return 0;
+	}
+	return val;
+}
+
+void write_scom_direct(uint64_t reg_address, uint64_t data)
+{
+	uint64_t hmer = 0;
+	do {
+		/* See comment in read_scom_direct() */
+		if ((hmer & SPR_HMER_XSCOM_STATUS) == SPR_HMER_XSCOM_OCCUPIED)
+			clear_hmer();
+
+		eieio();
+		asm volatile(
+			"stdcix %0, %1, %2"::
+			"r"(data),
+			"b"(MMIO_GROUP0_CHIP0_SCOM_BASE_ADDR),
+			"r"(reg_address << 3));
+		eieio();
+		hmer = read_hmer();
+	} while ((hmer & SPR_HMER_XSCOM_STATUS) == SPR_HMER_XSCOM_OCCUPIED);
+
+	if (hmer & SPR_HMER_XSCOM_STATUS)
+		reset_scom_engine();
+}
+
+void write_scom_indirect(uint64_t reg_address, uint64_t value)
+{
+	uint64_t addr;
+	uint64_t data;
+	addr = reg_address & 0x7FFFFFFF;
+	data = reg_address & XSCOM_ADDR_IND_ADDR;
+	data |= value & XSCOM_ADDR_IND_DATA;
+
+	write_scom_direct(addr, data);
+
+	for (int retries = 0; retries < XSCOM_IND_MAX_RETRIES; ++retries) {
+		data = read_scom_direct(addr);
+		if ((data & XSCOM_DATA_IND_COMPLETE) && ((data & XSCOM_DATA_IND_ERR) == 0)) {
+			return;
+		} else if (data & XSCOM_DATA_IND_COMPLETE) {
+			printk(BIOS_EMERG, "SCOM WR error  %16.16llx = %16.16llx : %16.16llx\n",
+			       reg_address, value, data);
+		}
+		// TODO: delay?
+	}
+}
+
+uint64_t read_scom_indirect(uint64_t reg_address)
+{
+	uint64_t addr;
+	uint64_t data;
+	addr = reg_address & 0x7FFFFFFF;
+	data = XSCOM_DATA_IND_READ | (reg_address & XSCOM_ADDR_IND_ADDR);
+
+	write_scom_direct(addr, data);
+
+	for (int retries = 0; retries < XSCOM_IND_MAX_RETRIES; ++retries) {
+		data = read_scom_direct(addr);
+		if ((data & XSCOM_DATA_IND_COMPLETE) && ((data & XSCOM_DATA_IND_ERR) == 0)) {
+			break;
+		} else if (data & XSCOM_DATA_IND_COMPLETE) {
+			printk(BIOS_EMERG, "SCOM RD error  %16.16llx : %16.16llx\n",
+			       reg_address, data);
+		}
+		// TODO: delay?
+	}
+
+	return data & XSCOM_DATA_IND_DATA;
+}
+
+/* This function should be rarely called, don't make it inlined */
+void reset_scom_engine(void)
+{
+	/*
+	 * With cross-CPU SCOM accesses, first register should be cleared on the
+	 * executing CPU, the other two on target CPU. In that case it may be
+	 * necessary to do the remote writes in assembly directly to skip checking
+	 * HMER and possibly end in a loop.
+	 */
+	write_scom_direct(XSCOM_RCVED_STAT_REG, 0);
+	write_scom_direct(XSCOM_LOG_REG, 0);
+	write_scom_direct(XSCOM_ERR_REG, 0);
+	clear_hmer();
+	eieio();
+}
diff --git a/src/include/cpu/power/scom.h b/src/include/cpu/power/scom.h
new file mode 100644
index 0000000..ef5796c
--- /dev/null
+++ b/src/include/cpu/power/scom.h
@@ -0,0 +1,161 @@
+#ifndef CPU_PPC64_SCOM_H
+#define CPU_PPC64_SCOM_H
+
+#include <arch/byteorder.h>	// PPC_BIT(), PPC_BITMASK()
+
+// 32b SCOM address:
+//
+//      8         7         6         5         4         3         2         1
+//
+//  |       | |       | |    1 1| |1 1 1 1| |1 1 1 1| |2 2 2 2| |2 2 2 2| |2 2 3 3|
+//  |0 1 2 3| |4 5 6 7| |8 9 0 1| |2 3 4 5| |6 7 8 9| |0 1 2 3| |4 5 6 7| |8 9 0 1|
+//    {A}{     B      }           {   C   }     {    D    }{   E   }{      F      }
+//
+// A - Is multiCast if bit 1 = 0x1
+// B - Contains Chiplet ID (6 bits) [2:7]
+// C - Contains Port Number (4 bits) [12:15]
+// D - Ring (4 bits) [18:21]
+// E - Sat ID (4 bits) [22:25]
+// F - Sat Offset (6 bits) [26:31]
+//
+// For 64b SCOM address all of the fields are shifted 32b to the right:
+// A - Is multiCast if bit 33 = 0x1
+// B - Contains Chiplet ID (6 bits) [34:39]
+// C - Contains Port Number (4 bits) [44:47]
+// D - Ring (4 bits) [50:53]
+// E - Sat ID (4 bits) [54:57]
+// F - Sat Offset (6 bits) [58:63]
+// Higher bits specify indirect address
+
+#define XSCOM_ADDR_IND_FLAG		PPC_BIT(0)
+#define XSCOM_ADDR_IND_ADDR		PPC_BITMASK(11, 31)
+#define XSCOM_ADDR_IND_DATA		PPC_BITMASK(48, 63)
+
+#ifndef __ASSEMBLER__
+#include <types.h>
+#include <arch/io.h>
+#include <cpu/power/spr.h>
+
+// TODO: these are probably specific to POWER9
+typedef enum {
+	PIB_CHIPLET_ID  = 0x00,     ///< PIB chiplet
+	PERV_CHIPLET_ID = 0x01,     ///< TP chiplet
+	N0_CHIPLET_ID   = 0x02,     ///< Nest0 (North) chiplet
+	N1_CHIPLET_ID   = 0x03,     ///< Nest1 (East) chiplet
+	N2_CHIPLET_ID   = 0x04,     ///< Nest2 (South) chiplet
+	N3_CHIPLET_ID   = 0x05,     ///< Nest3 (West) chiplet
+	XB_CHIPLET_ID   = 0x06,     ///< XBus chiplet
+	MC01_CHIPLET_ID = 0x07,     ///< MC01 (West) chiplet
+	MC23_CHIPLET_ID = 0x08,     ///< MC23 (East) chiplet
+	OB0_CHIPLET_ID  = 0x09,     ///< OBus0 chiplet
+	OB1_CHIPLET_ID  = 0x0A,     ///< OBus1 chiplet (Cumulus only)
+	OB2_CHIPLET_ID  = 0x0B,     ///< OBus2 chiplet (Cumulus only)
+	OB3_CHIPLET_ID  = 0x0C,     ///< OBus3 chiplet
+	PCI0_CHIPLET_ID = 0x0D,     ///< PCIe0 chiplet
+	PCI1_CHIPLET_ID = 0x0E,     ///< PCIe1 chiplet
+	PCI2_CHIPLET_ID = 0x0F,     ///< PCIe2 chiplet
+	EP00_CHIPLET_ID = 0x10,     ///< Quad0 chiplet (EX0/1)
+	EP01_CHIPLET_ID = 0x11,     ///< Quad1 chiplet (EX2/3)
+	EP02_CHIPLET_ID = 0x12,     ///< Quad2 chiplet (EX4/5)
+	EP03_CHIPLET_ID = 0x13,     ///< Quad3 chiplet (EX6/7)
+	EP04_CHIPLET_ID = 0x14,     ///< Quad4 chiplet (EX8/9)
+	EP05_CHIPLET_ID = 0x15,     ///< Quad5 chiplet (EX10/11)
+	EC00_CHIPLET_ID = 0x20,     ///< Core0 chiplet (Quad0, EX0, C0)
+	EC01_CHIPLET_ID = 0x21,     ///< Core1 chiplet (Quad0, EX0, C1)
+	EC02_CHIPLET_ID = 0x22,     ///< Core2 chiplet (Quad0, EX1, C0)
+	EC03_CHIPLET_ID = 0x23,     ///< Core3 chiplet (Quad0, EX1, C1)
+	EC04_CHIPLET_ID = 0x24,     ///< Core4 chiplet (Quad1, EX2, C0)
+	EC05_CHIPLET_ID = 0x25,     ///< Core5 chiplet (Quad1, EX2, C1)
+	EC06_CHIPLET_ID = 0x26,     ///< Core6 chiplet (Quad1, EX3, C0)
+	EC07_CHIPLET_ID = 0x27,     ///< Core7 chiplet (Quad1, EX3, C1)
+	EC08_CHIPLET_ID = 0x28,     ///< Core8 chiplet (Quad2, EX4, C0)
+	EC09_CHIPLET_ID = 0x29,     ///< Core9 chiplet (Quad2, EX4, C1)
+	EC10_CHIPLET_ID = 0x2A,     ///< Core10 chiplet (Quad2, EX5, C0)
+	EC11_CHIPLET_ID = 0x2B,     ///< Core11 chiplet (Quad2, EX5, C1)
+	EC12_CHIPLET_ID = 0x2C,     ///< Core12 chiplet (Quad3, EX6, C0)
+	EC13_CHIPLET_ID = 0x2D,     ///< Core13 chiplet (Quad3, EX6, C1)
+	EC14_CHIPLET_ID = 0x2E,     ///< Core14 chiplet (Quad3, EX7, C0)
+	EC15_CHIPLET_ID = 0x2F,     ///< Core15 chiplet (Quad3, EX7, C1)
+	EC16_CHIPLET_ID = 0x30,     ///< Core16 chiplet (Quad4, EX8, C0)
+	EC17_CHIPLET_ID = 0x31,     ///< Core17 chiplet (Quad4, EX8, C1)
+	EC18_CHIPLET_ID = 0x32,     ///< Core18 chiplet (Quad4, EX9, C0)
+	EC19_CHIPLET_ID = 0x33,     ///< Core19 chiplet (Quad4, EX9, C1)
+	EC20_CHIPLET_ID = 0x34,     ///< Core20 chiplet (Quad5, EX10, C0)
+	EC21_CHIPLET_ID = 0x35,     ///< Core21 chiplet (Quad5, EX10, C1)
+	EC22_CHIPLET_ID = 0x36,     ///< Core22 chiplet (Quad5, EX11, C0)
+	EC23_CHIPLET_ID = 0x37      ///< Core23 chiplet (Quad5, EX11, C1)
+} chiplet_id_t;
+
+void reset_scom_engine(void);
+
+uint64_t read_scom_direct(uint64_t reg_address);
+void write_scom_direct(uint64_t reg_address, uint64_t data);
+
+uint64_t read_scom_indirect(uint64_t reg_address);
+void write_scom_indirect(uint64_t reg_address, uint64_t data);
+
+static inline void write_scom(uint64_t addr, uint64_t data)
+{
+	if (addr & XSCOM_ADDR_IND_FLAG)
+		write_scom_indirect(addr, data);
+	else
+		write_scom_direct(addr, data);
+}
+
+static inline uint64_t read_scom(uint64_t addr)
+{
+	if (addr & XSCOM_ADDR_IND_FLAG)
+		return read_scom_indirect(addr);
+	else
+		return read_scom_direct(addr);
+}
+
+static inline void scom_and_or(uint64_t addr, uint64_t and, uint64_t or)
+{
+	uint64_t data = read_scom(addr);
+	write_scom(addr, (data & and) | or);
+}
+
+static inline void scom_and(uint64_t addr, uint64_t and)
+{
+	scom_and_or(addr, and, 0);
+}
+
+static inline void scom_or(uint64_t addr, uint64_t or)
+{
+	scom_and_or(addr, ~0, or);
+}
+
+static inline void write_scom_for_chiplet(chiplet_id_t chiplet, uint64_t addr, uint64_t data)
+{
+	addr &= ~PPC_BITMASK(34, 39);
+	addr |= ((chiplet & 0x3F) << 24);
+	write_scom(addr, data);
+}
+
+static inline uint64_t read_scom_for_chiplet(chiplet_id_t chiplet, uint64_t addr)
+{
+	addr &= ~PPC_BITMASK(34, 39);
+	addr |= ((chiplet & 0x3F) << 24);
+	return read_scom(addr);
+}
+
+static inline void scom_and_or_for_chiplet(chiplet_id_t chiplet, uint64_t addr,
+					   uint64_t and, uint64_t or)
+{
+	uint64_t data = read_scom_for_chiplet(chiplet, addr);
+	write_scom_for_chiplet(chiplet, addr, (data & and) | or);
+}
+
+static inline void scom_and_for_chiplet(chiplet_id_t chiplet, uint64_t addr, uint64_t and)
+{
+	scom_and_or_for_chiplet(chiplet, addr, and, 0);
+}
+
+static inline void scom_or_for_chiplet(chiplet_id_t chiplet, uint64_t addr, uint64_t or)
+{
+	scom_and_or_for_chiplet(chiplet, addr, ~0, or);
+}
+
+#endif /* __ASSEMBLER__ */
+#endif /* CPU_PPC64_SCOM_H */
diff --git a/src/include/cpu/power/spr.h b/src/include/cpu/power/spr.h
new file mode 100644
index 0000000..f22a6ca
--- /dev/null
+++ b/src/include/cpu/power/spr.h
@@ -0,0 +1,70 @@
+#ifndef CPU_PPC64_SPR_H
+#define CPU_PPC64_SPR_H
+
+#include <arch/byteorder.h>	// PPC_BIT()
+
+#define SPR_TB					0x10C
+
+#define SPR_PVR					0x11F
+#define SPR_PVR_REV_MASK			(PPC_BITMASK(52, 55) | PPC_BITMASK(60, 63))
+#define SPR_PVR_REV(maj, min)			(PPC_SHIFT((maj), 55) | PPC_SHIFT((min), 63))
+
+#define SPR_HRMOR				0x139
+
+#define SPR_HMER				0x150
+/* Bits in HMER/HMEER */
+#define SPR_HMER_MALFUNCTION_ALERT		PPC_BIT(0)
+#define SPR_HMER_PROC_RECV_DONE			PPC_BIT(2)
+#define SPR_HMER_PROC_RECV_ERROR_MASKED		PPC_BIT(3)
+#define SPR_HMER_TFAC_ERROR			PPC_BIT(4)
+#define SPR_HMER_TFMR_PARITY_ERROR		PPC_BIT(5)
+#define SPR_HMER_XSCOM_FAIL			PPC_BIT(8)
+#define SPR_HMER_XSCOM_DONE			PPC_BIT(9)
+#define SPR_HMER_PROC_RECV_AGAIN		PPC_BIT(11)
+#define SPR_HMER_WARN_RISE			PPC_BIT(14)
+#define SPR_HMER_WARN_FALL			PPC_BIT(15)
+#define SPR_HMER_SCOM_FIR_HMI			PPC_BIT(16)
+#define SPR_HMER_TRIG_FIR_HMI			PPC_BIT(17)
+#define SPR_HMER_HYP_RESOURCE_ERR		PPC_BIT(20)
+#define SPR_HMER_XSCOM_STATUS			PPC_BITMASK(21, 23)
+#define SPR_HMER_XSCOM_OCCUPIED			PPC_BIT(23)
+
+#ifndef __ASSEMBLER__
+#include <types.h>
+
+static inline uint64_t read_spr(int spr)
+{
+	uint64_t val;
+	asm volatile("mfspr %0,%1" : "=r"(val) : "i"(spr) : "memory");
+	return val;
+}
+
+static inline void write_spr(int spr, uint64_t val)
+{
+	asm volatile("mtspr %0, %1" :: "i"(spr), "r"(val) : "memory");
+}
+
+static inline uint64_t read_hmer(void)
+{
+	return read_spr(SPR_HMER);
+}
+
+static inline void clear_hmer(void)
+{
+	write_spr(SPR_HMER, 0);
+}
+
+static inline uint64_t read_msr(void)
+{
+	uint64_t val;
+	asm volatile("mfmsr %0" : "=r"(val) :: "memory");
+	return val;
+}
+
+static inline uint64_t pvr_revision(void)
+{
+	return read_spr(SPR_PVR) & SPR_PVR_REV_MASK;
+}
+
+#endif /* __ASSEMBLER__ */
+#endif /* CPU_PPC64_SPR_H */