drivers/ipmi/ocp: add PCIe SEL support

Add Kconfig SOC_RAS_BMS_SEL and corresponding support for
generating PCIe error SEL records and sending them to BMC.

Add PCIe error definitions.

This is needed for SMM, so build the ipmi kcs driver in SMM.

Signed-off-by: Tim Chu <Tim.Chu@quantatw.com>
Signed-off-by: Rocky Phagura <rphagura@fb.com>
Signed-off-by: Jonathan Zhang <jonzhang@meta.com>
Change-Id: I1ee46c8da7dbccbe1e2cc00bfe62e5df2f072d65
Reviewed-on: https://review.coreboot.org/c/coreboot/+/68758
Reviewed-by: Christian Walter <christian.walter@9elements.com>
Tested-by: build bot (Jenkins) <no-reply@coreboot.org>
diff --git a/src/drivers/ipmi/Makefile.inc b/src/drivers/ipmi/Makefile.inc
index 85f3dde..28ff78c 100644
--- a/src/drivers/ipmi/Makefile.inc
+++ b/src/drivers/ipmi/Makefile.inc
@@ -8,3 +8,4 @@
 romstage-$(CONFIG_IPMI_KCS_ROMSTAGE) += ipmi_ops_premem.c
 romstage-$(CONFIG_IPMI_KCS_ROMSTAGE) += ipmi_kcs.c
 romstage-$(CONFIG_IPMI_KCS_ROMSTAGE) += ipmi_ops.c
+smm-$(CONFIG_SOC_RAS_BMC_SEL) += ipmi_kcs.c
diff --git a/src/drivers/ipmi/ocp/Kconfig b/src/drivers/ipmi/ocp/Kconfig
index 7899e69..c048e20 100644
--- a/src/drivers/ipmi/ocp/Kconfig
+++ b/src/drivers/ipmi/ocp/Kconfig
@@ -3,3 +3,17 @@
 	default n
 	help
 	  This implements OCP specific IPMI command
+
+config IPMI_BMC_SEL
+	bool
+	depends on IPMI_OCP
+	default n
+	help
+	  This implements OCP specific command to generate/send SEL record
+
+config RAS_SEL_VENDOR_ID
+	hex
+	depends on IPMI_BMC_SEL
+	default 0xff
+	help
+	  This option specifies a vendor ID for BMC SEL messages
diff --git a/src/drivers/ipmi/ocp/Makefile.inc b/src/drivers/ipmi/ocp/Makefile.inc
index c77ee4a..fc56364 100644
--- a/src/drivers/ipmi/ocp/Makefile.inc
+++ b/src/drivers/ipmi/ocp/Makefile.inc
@@ -1,4 +1,5 @@
 ramstage-$(CONFIG_IPMI_OCP) += ipmi_ocp.c
 ifeq ($(CONFIG_IPMI_OCP),y)
 romstage-$(CONFIG_IPMI_KCS_ROMSTAGE) += ipmi_ocp_romstage.c
+smm-$(CONFIG_IPMI_BMC_SEL) += ipmi_sel.c
 endif
diff --git a/src/drivers/ipmi/ocp/ipmi_ocp.h b/src/drivers/ipmi/ocp/ipmi_ocp.h
index bd15aa3..c479e52 100644
--- a/src/drivers/ipmi/ocp/ipmi_ocp.h
+++ b/src/drivers/ipmi/ocp/ipmi_ocp.h
@@ -4,6 +4,7 @@
 #define __IPMI_OCP_H
 
 #include <commonlib/bsd/cb_err.h>
+#include <device/pci_type.h>
 
 #define IPMI_NETFN_OEM				0x30
 #define  IPMI_OEM_SET_PPIN			0x77
@@ -33,6 +34,137 @@
 	uint8_t boot_dev4;
 } __packed;
 
+struct pci_dev_fn {
+	u32 func:15;
+	u32 dev:5;
+	u32 bus:12;
+};
+
+struct ipmi_pci_dev_fn {
+	uint16_t func:3;
+	uint16_t dev:5;
+	uint16_t bus:8;
+};
+
+struct ipmi_sel_pcie_dev_err {
+	uint16_t record_id;
+	uint8_t record_type;
+	uint8_t general_info;
+	uint32_t timestamp;
+	uint16_t aux_loc;
+	struct ipmi_pci_dev_fn bdf;
+	uint16_t primary_err_count;
+	uint8_t secondary_id;
+	uint8_t primary_id;
+} __packed;
+
+struct iio_port_location {
+	uint8_t socket:4;
+	uint8_t sled:2;
+	uint8_t rsvd:2;
+};
+
+struct ipmi_sel_iio_err {
+	uint16_t record_id;
+	uint8_t record_type;
+	uint8_t general_info;
+	uint32_t timestamp;
+	struct iio_port_location loc;
+	uint8_t iio_stack_num;
+	uint8_t rsvd0;
+	uint8_t rsvd1;
+	uint8_t iio_err_id;
+	uint8_t rsvd2;
+	uint8_t rsvd3;
+	uint8_t rsvd4;
+} __packed;
+
+enum fail_type {
+	PCIE_DPC_EVNT = 0,
+	PCIE_LER_EVNT = 1,
+	PCIE_LRTRN_REC = 2,
+	PCIE_CRC_RETRY = 3,
+	PCIE_CRPT_DATA_CONTMT = 4,
+	PCIE_ECRC_EVNT = 5,
+};
+
+struct ipmi_sel_pcie_dev_fail {
+	uint16_t record_id;
+	uint8_t record_type;
+	uint8_t general_info;
+	uint32_t timestamp;
+	enum fail_type type;
+	uint8_t rsvd0;
+	uint16_t failure_details1; /* if DPC, DPC sts reg of root port */
+	uint16_t failure_details2; /* if DPC, source ID of root port */
+	uint8_t rsvd1;
+	uint8_t rsvd2;
+} __packed;
+
+#define SEL_RECORD_ID			0x01
+#define SEL_PCIE_DEV_ERR		0x20
+#define SEL_PCIE_IIO_ERR		0x23
+#define SEL_PCIE_DEV_FAIL_ID		0x29
+
+/* PCIE Unified Messages */
+
+/* PCIE CE */
+#define RECEIVER_ERROR			0x00
+#define BAD_TLP				0x01
+#define BAD_DLLP			0x02
+#define REPLAY_TIME_OUT			0x03
+#define REPLAY_NUMBER_ROLLOVER		0x04
+#define ADVISORY_NONFATAL_ERROR_STATUS	0x05
+#define CORRECTED_INTERNAL_ERROR_STATUS	0x06
+#define HEADER_LOG_OVERFLOW_STATUS	0x07
+
+/* PCIE UCE */
+#define PCI_EXPRESS_DATA_LINK_PROTOCOL_ERROR						0x20
+#define SURPRISE_DOWN_ERROR								0x21
+#define RECEIVED_PCI_EXPRESS_POISONED_TLP						0x22
+#define PCI_EXPRESS_FLOW_CONTROL_PROTOCOL_ERROR						0x23
+#define COMPLETION_TIMEOUT_ON_NP_TRANSACTIONS_OUTSTANDING_ON_PCI_EXPRESS_DMI		0x24
+#define RECEIVED_A_REQUEST_FROM_A_DOWNSTREAM_COMPONENT_THAT_IS_TO_BE_COMPLETER_ABORTED	0x25
+#define RECEIVED_PCI_EXPRESS_UNEXPECTED_COMPLETION					0x26
+#define PCI_EXPRESS_RECEIVER_OVERFLOW							0x27
+#define PCI_EXPRESS_MALFORMED_TLP							0x28
+#define ECRC_ERROR_STATUS								0x29
+#define RECEIVED_A_REQUEST_FROM_A_DOWNSTREAM_COMPONENT_THAT_IS_UNSUPPORTED		0x2A
+#define ACS_VIOLATION									0x2B
+#define UNCORRECTABLE_INTERNAL_ERROR_STATUS						0x2C
+#define MC_BLOCKED_TLP									0x2D
+#define ATOMICOP_EGRESS_BLOCKED_STATUS							0x2E
+#define TLP_PREFIX_BLOCKED_ERROR_STATUS							0x2F
+#define POISONED_TLP_EGRESS_BLOCKED							0x30
+
+/* Root error status (from PCIE spec) */
+#define RECEIVED_ERR_COR_MESSAGE_FROM_DOWNSTREAM_DEVICE					0x50
+#define RECEIVED_ERR_NONFATAL_MESSAGE_FROM_DOWNSTREAM_DEVICE				0x51
+#define RECEIVED_ERR_FATAL_MESSAGE_FROM_DOWNSTREAM_DEVICE				0x52
+
+/* DPC Trigger Reason */
+#define DPC_WAS_TRIGGERED_DUE_TO_AN_UNMASKED_UNCORRECTABLE_ERROR			0x53
+#define DPC_WAS_TRIGGERED_DUE_TO_RECEIVING_AN_ERR_NONFATAL				0x54
+#define DPC_WAS_TRIGGERED_DUE_TO_RECEIVING_AN_ERR_FATAL					0x55
+#define DPC_WAS_TRIGGERED_DUE_TO_RP_PIO_ERROR						0x56
+#define DPC_WAS_TRIGGERED_DUE_TO_THE_DPC_SOFTWARE_TRIGGER_BIT				0x57
+
+#define OUTBOUND_SWITCH_FIFO_DATA_PARITY_ERROR_DETECTED					0x80
+#define SENT_A_PCI_EXPRESS_COMPLETER_ABORT						0x81
+#define SENT_A_PCI_EXPRESS_UNSUPPORTED_REQUEST						0x82
+#define RECEIVED_COMPLETER_ABORT							0x83
+#define RECEIVED_UNSUPPORTED_REQUEST_COMPLETION_STATUS_FROM_DOWNSTREAM_DEVICE		0x84
+#define RECEIVED_MSI_WRITES_GREATER_THAN_A_DWORD					0x85
+#define OUTBOUND_POISONED_DATA								0x86
+#define PERR_NON_AER									0xA0
+#define SERR_NON_AER									0xA1
+
 enum cb_err ipmi_set_post_start(const int port);
 enum cb_err ipmi_set_cmos_clear(void);
+
+void ipmi_send_to_bmc(unsigned char *data, size_t size);
+void ipmi_send_sel_pcie_dev_err(pci_devfn_t bdf, uint16_t prmry_cnt, uint8_t sec_id,
+	uint8_t prmry_id);
+void ipmi_send_sel_pcie_dev_fail(uint16_t sts_reg, uint16_t src_id, enum fail_type code);
+void ipmi_send_sel_iio_err(uint8_t iio_stack_num, uint8_t err_id);
 #endif
diff --git a/src/drivers/ipmi/ocp/ipmi_sel.c b/src/drivers/ipmi/ocp/ipmi_sel.c
new file mode 100644
index 0000000..55c0c3e
--- /dev/null
+++ b/src/drivers/ipmi/ocp/ipmi_sel.c
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include <console/console.h>
+#include <drivers/ipmi/ipmi_if.h>
+#include <string.h>
+
+#include "ipmi_ocp.h"
+
+static int ipmi_add_sel_entry(int port, unsigned char *data, int size)
+{
+	return ipmi_message(port, IPMI_NETFN_STORAGE, 0, IPMI_ADD_SEL_ENTRY, data, size,
+				    NULL, 0);
+}
+
+void ipmi_send_to_bmc(unsigned char *data, size_t size)
+{
+	if (CONFIG(IPMI_KCS)) {
+		_Static_assert(CONFIG_BMC_KCS_BASE != 0,
+			"\tBMC_ERROR: Unable to send record: Port #:0\n");
+
+		ipmi_add_sel_entry(CONFIG_BMC_KCS_BASE, data, size);
+	}
+}
+
+void ipmi_send_sel_iio_err(uint8_t iio_stack_num, uint8_t err_id)
+{
+	struct ipmi_sel_iio_err ubslp = {
+		.record_id = SEL_RECORD_ID,
+		.record_type = CONFIG_RAS_SEL_VENDOR_ID,
+		.general_info = SEL_PCIE_IIO_ERR,
+		.iio_stack_num = iio_stack_num,
+		.iio_err_id = err_id,
+	};
+
+	ipmi_send_to_bmc((unsigned char *)&ubslp, sizeof(ubslp));
+	printk(BIOS_DEBUG, "\tsending PCIE IIO device error record to BMC\n");
+	printk(BIOS_DEBUG, "\tstack # = %x\n", ubslp.iio_stack_num);
+}
+
+void ipmi_send_sel_pcie_dev_err(pci_devfn_t bdf, uint16_t prmry_cnt, uint8_t sec_id,
+				uint8_t prmry_id)
+{
+	struct pci_dev_fn *inbdf = (struct pci_dev_fn *)&bdf;
+	struct ipmi_sel_pcie_dev_err ubslp = {
+		.record_id = SEL_RECORD_ID,
+		.record_type = CONFIG_RAS_SEL_VENDOR_ID,
+		.general_info = SEL_PCIE_DEV_ERR,
+		.timestamp = 0, /* BMC will apply timestamp */
+		.aux_loc = 0,
+		.bdf.bus = inbdf->bus,
+		.bdf.dev = inbdf->dev,
+		.bdf.func = inbdf->func >> 12,
+		.primary_err_count = prmry_cnt,
+		.primary_id = prmry_id,
+		.secondary_id = sec_id,
+	};
+
+	ipmi_send_to_bmc((unsigned char *)&ubslp, sizeof(ubslp));
+	printk(BIOS_DEBUG, "\tsending PCIE device error record to BMC\n");
+	printk(BIOS_DEBUG, "\tbdf = %x:%x:%x\n", ubslp.bdf.bus, ubslp.bdf.dev, ubslp.bdf.func);
+	printk(BIOS_DEBUG, "\tubslp.primary_id = %x\n", ubslp.primary_id);
+	printk(BIOS_DEBUG, "\tsecondary_id = %x\n", ubslp.secondary_id);
+}
+
+void ipmi_send_sel_pcie_dev_fail(uint16_t sts_reg, uint16_t src_id, enum fail_type code)
+{
+	struct ipmi_sel_pcie_dev_fail ubslp = {
+		.record_id = SEL_RECORD_ID,
+		.record_type = CONFIG_RAS_SEL_VENDOR_ID,
+		.general_info = SEL_PCIE_DEV_FAIL_ID,
+		.timestamp = 0, /* BMC will apply timestamp */
+		.type = code,
+		.failure_details1 = sts_reg,
+		.failure_details2 = src_id,
+	};
+
+	ipmi_send_to_bmc((unsigned char *)&ubslp, sizeof(ubslp));
+	printk(BIOS_DEBUG, "\tsending PCI device FAILURE record to BMC\n");
+	printk(BIOS_DEBUG, "\terror_code = %x, src_id = %x\n", ubslp.type,
+	       ubslp.failure_details2);
+}