src/intel/xeon_sp: add hardware error support (HEST)

This patch adds the ACPI hardware error source table (HEST) support.
This involves a few different parts: (1) The ACPI HEST table which is filled
with the appropriate fields (2) Reserved memory which is used by runtime
SW to provide error information. OS will not accept a HEST table with
this memory set to 0.

The ASL code to enable APEI bit will be submitted in a separate patch.

Tested on DeltaLake mainboard with following options enabled
SOC_INTEL_XEON_RAS

After boot to Linux, the following will show in dmesg:
HEST: Table parsing has been initialized

Change-Id: If76b2af153616182cc053ca878f30fe056e9c8bd
Signed-off-by: Rocky Phagura <rphagura@fb.com>
Reviewed-on: https://review.coreboot.org/c/coreboot/+/52090
Tested-by: build bot (Jenkins) <no-reply@coreboot.org>
Reviewed-by: Arthur Heymans <arthur@aheymans.xyz>
diff --git a/src/commonlib/include/commonlib/cbmem_id.h b/src/commonlib/include/commonlib/cbmem_id.h
index ae644de..84d0a31 100644
--- a/src/commonlib/include/commonlib/cbmem_id.h
+++ b/src/commonlib/include/commonlib/cbmem_id.h
@@ -6,6 +6,7 @@
 #define CBMEM_ID_ACPI		0x41435049
 #define CBMEM_ID_ACPI_BERT      0x42455254
 #define CBMEM_ID_ACPI_GNVS	0x474e5653
+#define CMBMEM_ID_ACPI_HEST	0x48455354
 #define CBMEM_ID_ACPI_UCSI	0x55435349
 #define CBMEM_ID_AFTER_CAR	0xc4787a93
 #define CBMEM_ID_AGESA_RUNTIME	0x41474553
@@ -81,6 +82,7 @@
 	{ CBMEM_ID_ACPI,		"ACPI       " }, \
 	{ CBMEM_ID_ACPI_BERT,		"ACPI BERT  " }, \
 	{ CBMEM_ID_ACPI_GNVS,		"ACPI GNVS  " }, \
+	{ CMBMEM_ID_ACPI_HEST,		"ACPI HEST  " }, \
 	{ CBMEM_ID_ACPI_UCSI,		"ACPI UCSI  " }, \
 	{ CBMEM_ID_AGESA_RUNTIME,	"AGESA RSVD " }, \
 	{ CBMEM_ID_AFTER_CAR,		"AFTER CAR  " }, \
diff --git a/src/soc/intel/common/block/acpi/acpi/globalnvs.asl b/src/soc/intel/common/block/acpi/acpi/globalnvs.asl
index 161381f..75215f8 100644
--- a/src/soc/intel/common/block/acpi/acpi/globalnvs.asl
+++ b/src/soc/intel/common/block/acpi/acpi/globalnvs.asl
@@ -24,4 +24,5 @@
 	UIOR,	8,	// 0x2f - UART debug controller init on S3 resume
 	A4GB,	64,	// 0x30 - 0x37 Base of above 4GB MMIO Resource
 	A4GS,	64,	// 0x38 - 0x3f Length of above 4GB MMIO Resource
+	,	8, 	// 0x40 - 0x48 Hest log buffer (used in SMM, not ASL code)
 }
diff --git a/src/soc/intel/common/block/include/intelblocks/nvs.h b/src/soc/intel/common/block/include/intelblocks/nvs.h
index c98fa01..89b682e 100644
--- a/src/soc/intel/common/block/include/intelblocks/nvs.h
+++ b/src/soc/intel/common/block/include/intelblocks/nvs.h
@@ -26,6 +26,7 @@
 	u8	uior; /* 0x2f - UART debug controller init on S3 resume */
 	u64	a4gb; /* 0x30 - 0x37 Base of above 4GB MMIO Resource */
 	u64	a4gs; /* 0x38 - 0x3f Length of above 4GB MMIO Resource */
+	u64	hest_log_addr; /* 0x40 - 48 err log addr (used in SMM, not ASL code) */
 };
 
 #endif
diff --git a/src/soc/intel/xeon_sp/Kconfig b/src/soc/intel/xeon_sp/Kconfig
index 09f72a3..a2876b5 100644
--- a/src/soc/intel/xeon_sp/Kconfig
+++ b/src/soc/intel/xeon_sp/Kconfig
@@ -2,6 +2,7 @@
 
 source "src/soc/intel/xeon_sp/skx/Kconfig"
 source "src/soc/intel/xeon_sp/cpx/Kconfig"
+source "src/soc/intel/xeon_sp/ras/Kconfig"
 
 config XEON_SP_COMMON_BASE
 	bool
@@ -114,4 +115,9 @@
 	hex
 	default 0x80000
 
+config SOC_INTEL_XEON_RAS
+	bool
+	select SOC_ACPI_HEST
+	select SOC_RAS_ELOG
+
 endif ## SOC_INTEL_XEON_SP
diff --git a/src/soc/intel/xeon_sp/Makefile.inc b/src/soc/intel/xeon_sp/Makefile.inc
index 89e43fc..4c351a4 100644
--- a/src/soc/intel/xeon_sp/Makefile.inc
+++ b/src/soc/intel/xeon_sp/Makefile.inc
@@ -16,6 +16,8 @@
 smm-y += smihandler.c pmutil.c
 postcar-y += spi.c
 
+subdirs-$(CONFIG_SOC_INTEL_XEON_RAS) += ras
+
 CPPFLAGS_common += -I$(src)/soc/intel/xeon_sp/include
 CPPFLAGS_common += -I$(CONFIG_FSP_HEADER_PATH)
 
diff --git a/src/soc/intel/xeon_sp/include/soc/hest.h b/src/soc/intel/xeon_sp/include/soc/hest.h
new file mode 100644
index 0000000..ad79d45
--- /dev/null
+++ b/src/soc/intel/xeon_sp/include/soc/hest.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifndef _HEST_H_
+#define _HEST_H_
+#include <acpi/acpi.h>
+
+#define MCE_ERR_POLL_MS_INTERVAL	1000
+#define HEST_PCIE_RP_AER_DESC_TYPE	6
+#define HEST_GHES_DESC_TYPE		9
+#define GHES_MAX_RAW_DATA_LENGTH	(((CONFIG_ERROR_LOG_BUFFER_SIZE) >> 1) - 8)
+#define GHEST_ERROR_STATUS_BLOCK_LENGTH	((CONFIG_ERROR_LOG_BUFFER_SIZE) >> 1)
+#define GHEST_ASSIST			(1 << 2)
+#define FIRMWARE_FIRST			(1 << 0)
+#define MEM_VALID_BITS			0x66ff
+#define PCIE_VALID_BITS			0xef
+#define QWORD_ACCESS			4
+#define NOTIFY_TYPE_SCI			3
+
+/* Generic Error Source Descriptor */
+typedef struct acpi_ghes_esd {
+	u16 type;
+	u16 source_id;
+	u16 related_src_id;
+	u8 flags;
+	u8 enabled;
+	u32 prealloc_erecords;
+	u32 max_section_per_record;
+} __packed acpi_ghes_esd_t;
+
+typedef struct ghes_record {
+	acpi_ghes_esd_t esd;
+	u32 max_raw_data_length;
+	acpi_addr64_t sts_addr;
+	acpi_hest_hen_t notify;
+	u32 err_sts_blk_len;
+} __packed ghes_record_t;
+
+unsigned long hest_create(unsigned long current, struct acpi_rsdp *rsdp);
+
+#endif
diff --git a/src/soc/intel/xeon_sp/nb_acpi.c b/src/soc/intel/xeon_sp/nb_acpi.c
index 19c3921..0c1c5ab 100644
--- a/src/soc/intel/xeon_sp/nb_acpi.c
+++ b/src/soc/intel/xeon_sp/nb_acpi.c
@@ -7,6 +7,7 @@
 #include <device/pci.h>
 #include <soc/acpi.h>
 #include <soc/cpu.h>
+#include <soc/hest.h>
 #include <soc/iomap.h>
 #include <soc/pci_devs.h>
 #include <soc/soc_util.h>
@@ -449,5 +450,8 @@
 		acpi_add_table(rsdp, dmar);
 	}
 
+	if (CONFIG(SOC_ACPI_HEST))
+		current = hest_create(current, rsdp);
+
 	return current;
 }
diff --git a/src/soc/intel/xeon_sp/ras/Kconfig b/src/soc/intel/xeon_sp/ras/Kconfig
new file mode 100644
index 0000000..79c599c
--- /dev/null
+++ b/src/soc/intel/xeon_sp/ras/Kconfig
@@ -0,0 +1,20 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+config SOC_ACPI_HEST
+	def_bool n
+	depends on HAVE_ACPI_TABLES
+	help
+	  This variable provides control for ACPI hardware error source table (HEST)
+
+config SOC_RAS_ELOG
+	def_bool n
+	depends on SOC_ACPI_HEST
+	help
+	  This variable provides enhanced error logging support used with HEST
+
+config ERROR_LOG_BUFFER_SIZE
+	hex
+	default 0x4000
+	depends on SOC_RAS_ELOG
+	help
+	  This variable allows a configurable error log based on system requirements
diff --git a/src/soc/intel/xeon_sp/ras/Makefile.inc b/src/soc/intel/xeon_sp/ras/Makefile.inc
new file mode 100644
index 0000000..93c8705
--- /dev/null
+++ b/src/soc/intel/xeon_sp/ras/Makefile.inc
@@ -0,0 +1,3 @@
+## SPDX-License-Identifier: GPL-2.0-or-later
+
+ramstage-$(CONFIG_SOC_ACPI_HEST) += hest.c
diff --git a/src/soc/intel/xeon_sp/ras/hest.c b/src/soc/intel/xeon_sp/ras/hest.c
new file mode 100644
index 0000000..6164edf
--- /dev/null
+++ b/src/soc/intel/xeon_sp/ras/hest.c
@@ -0,0 +1,100 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include <soc/acpi.h>
+#include <acpi/acpi_gnvs.h>
+#include <cbmem.h>
+#include <console/console.h>
+#include <soc/hest.h>
+#include <intelblocks/nvs.h>
+
+static u64 hest_get_elog_addr(void)
+{
+	/* The elog address comes from reserved memory */
+	struct global_nvs *gnvs;
+	gnvs = acpi_get_gnvs();
+	if (!gnvs) {
+		printk(BIOS_ERR, "Unable to get gnvs\n");
+		return 0;
+	}
+
+	/* Runtime logging address */
+	printk(BIOS_DEBUG, "\t status blk start addr = %llx\n", gnvs->hest_log_addr);
+	printk(BIOS_DEBUG, "\t size = %x\n", CONFIG_ERROR_LOG_BUFFER_SIZE);
+	return gnvs->hest_log_addr;
+}
+
+static u32 acpi_hest_add_ghes(void *current)
+{
+	ghes_record_t *rec = (ghes_record_t *)current;
+	u32 size = sizeof(ghes_record_t);
+
+	/* Fill GHES error source descriptor  */
+	memset(rec, 0, size);
+	rec->esd.type = HEST_GHES_DESC_TYPE;
+	rec->esd.source_id = 0; /* 0 for MCE check exception source */
+	rec->esd.enabled = 1;
+	rec->esd.related_src_id = 0xffff;
+	rec->esd.prealloc_erecords = 1;
+	rec->esd.max_section_per_record = 0xf;
+	rec->max_raw_data_length = GHES_MAX_RAW_DATA_LENGTH;
+
+	/* Add error_status_address */
+	rec->sts_addr.space_id = 0;
+	rec->sts_addr.bit_width = 0x40;
+	rec->sts_addr.bit_offset = 0;
+	rec->sts_addr.access_size = QWORD_ACCESS;
+
+	/* Add notification structure */
+	rec->notify.type = NOTIFY_TYPE_SCI;
+	rec->notify.length = sizeof(acpi_hest_hen_t);
+	rec->err_sts_blk_len = GHEST_ERROR_STATUS_BLOCK_LENGTH;
+
+	/* error status block entries start address */
+	if (CONFIG(SOC_ACPI_HEST))
+		rec->sts_addr.addr = hest_get_elog_addr();
+
+	return size;
+}
+
+static unsigned long acpi_fill_hest(acpi_hest_t *hest)
+{
+	acpi_header_t *header = &(hest->header);
+	void *current;
+	current = (void *)(hest);
+	void *next = current;
+	next = hest + 1;
+	next += acpi_hest_add_ghes(next);
+	hest->error_source_count += 1;
+	header->length += next - current;
+	return header->length;
+}
+
+unsigned long hest_create(unsigned long current, struct acpi_rsdp *rsdp)
+{
+	struct global_nvs *gnvs;
+	acpi_hest_t *hest;
+
+	/* Reserve memory for Enhanced error logging */
+	void *mem = cbmem_add(CMBMEM_ID_ACPI_HEST, CONFIG_ERROR_LOG_BUFFER_SIZE);
+	if (!mem) {
+		printk(BIOS_ERR, "Unable to allocate HEST memory\n");
+		return current;
+	}
+
+	printk(BIOS_DEBUG, "HEST memory created: %p\n", mem);
+	gnvs = acpi_get_gnvs();
+	if (!gnvs) {
+		printk(BIOS_ERR, "Unable to get gnvs\n");
+		return current;
+	}
+	gnvs->hest_log_addr = (uintptr_t)mem;
+	printk(BIOS_DEBUG, "elog_addr: %llx, size:%x\n", gnvs->hest_log_addr,
+		CONFIG_ERROR_LOG_BUFFER_SIZE);
+
+	current = ALIGN(current, 8);
+	hest = (acpi_hest_t *)current;
+	acpi_write_hest(hest, acpi_fill_hest);
+	acpi_add_table(rsdp, (void *)current);
+	current += hest->header.length;
+	return current;
+}