soc/intel/cmn/cse: Handle EOP completion asynchronously

coreboot supports three instances of sending EOP:
1. At CSE `.final' device operation
2. Early as with Alder Lake in chip_operations.init if
   `SOC_INTEL_CSE_SEND_EOP_EARLY' is selected
3. At BS_PAYLOAD_BOOT as designed for Meteor Lake if
   `SOC_INTEL_CSE_SEND_EOP_LATE' is selected

Currently, Alder Lake uses #3 as it results in better and more stable
boot time. However, what would deliver even better result is to not
actively wait for CSE completion.

This patch introduces a new `SOC_INTEL_CSE_SEND_EOP_ASYNC' Kconfig
which split the action of sending EOP request and receiving EOP
completion response from the CSE.

This patch used in conjunction with #1 can significantly
improves the overall boot time on a Raptor Lake design. For example
`SOC_INTEL_CSE_SEND_EOP_ASYNC' on a skolas board can deliver up to 36
ms boot time improvement as illustrated below.

   |    #     | Late EOP | Async EOP |
   |----------+----------+-----------|
   |    1     | 1020.052 |   971.272 |
   |    2     | 1015.911 |   971.821 |
   |    3     | 1038.415 |  1021.841 |
   |    4     | 1020.657 |   993.751 |
   |    5     | 1065.128 |  1020.951 |
   |    6     | 1037.859 |  1023.326 |
   |    7     | 1042.010 |   984.412 |
   |----------+----------+-----------|
   | Mean     |  1034.29 |    998.20 |
   | Variance |   4.76 % |    5.21 % |

The improvement is not stable but comparing coreboot and FSP
performance timestamps demonstrate that the slowness is caused by a
lower memory frequency (SaGv point) at early boot which is not an
issue addressed by this patch.

We also observe some improvement on an Alder Lake design. For example,
the same configuration on a kano board can deliver up to 10 ms boot time
improvement as illustrated below.

   |        # | Late EOP | Async EOP |
   |----------+----------+-----------|
   |        0 | 1067.719 |  1050.106 |
   |        1 | 1058.263 |  1056.836 |
   |        2 | 1064.091 |  1056.709 |
   |        3 | 1068.614 |  1055.042 |
   |        4 | 1065.749 |  1056.732 |
   |        5 | 1069.838 |  1057.846 |
   |        6 | 1066.897 |  1053.548 |
   |        7 | 1060.850 |  1051.911 |
   |----------+----------+-----------|
   |     Mean |  1065.25 |   1054.84 |

The improvement is more limited on kano because a longer PCIe
initialization delays EOP in the Late EOP configuration which make it
faster to complete.

CSME team confirms that:
1. End-Of-Post is a blocking command in the sense that BIOS is
   requested to wait for the command completion before loading the OS or
   second stage bootloader.
2. The BIOS is not required to actively wait for completion of the
   command and can perform other operations in the meantime as long as
   they do not involve HECI commands.

On Raptor Lake, coreboot does not send any HECI command after
End-Of-Post.  FSP-s code review did not reveal any HECI command being
sent as part of the `AFTER_PCI_ENUM', `READY_TO_BOOT' or
`END_OF_FIRMWARE' notifications.

If any HECI send and receive command has been sent the extra code
added in `cse_receive_eop()' should catch it.

According to commit 387ec919d9f7 ("soc/intel/alderlake: Select
SOC_INTEL_CSE_SEND_EOP_LATE"), FSP-silicon can sometimes (on the first
boot after flashing of a Marasov board for instance) request coreboot
to perform a global request out of AFTER_PCI_ENUM notification. Global
request relies on a HECI command. Even though, we tested that it does
not create any issue, `SOC_INTEL_CSE_SEND_EOP_ASYNC' flag should not
be associated to the `SOC_INTEL_CSE_SEND_EOP_EARLY' flag to prevent
potential a global reset command to "conflict" with the EOP command.

BUG=b:276339544
BRANCH=firmware-brya-14505.B
TEST=Tests on brya0 with and `SOC_INTEL_CSE_SEND_EOP_ASYNC' show
     End-Of-Post sent soon after FSP-s and EOP message receive at
     `BS_PAYLOAD_BOOT'.  Verify robustness by injecting a
     `GET_BOOT_STATE' HECI command with or without `heci_reset'. The
     implementation always successfully completed the EOP before
     moving to the payload. As expected, the boot time benefit of the
     asynchronous solution was under some injection scenario
     undermined by this unexpected HECI command.

Change-Id: Ib09dcf9140eb8a00807a09e2af711021df4b416f
Signed-off-by: Jeremy Compostella <jeremy.compostella@intel.com>
Reviewed-on: https://review.coreboot.org/c/coreboot/+/73619
Reviewed-by: Tarun Tuli <taruntuli@google.com>
Reviewed-by: Subrata Banik <subratabanik@google.com>
Tested-by: build bot (Jenkins) <no-reply@coreboot.org>
Reviewed-by: Nick Vaccaro <nvaccaro@google.com>
diff --git a/src/soc/intel/common/block/cse/Kconfig b/src/soc/intel/common/block/cse/Kconfig
index 72e7a8e..55c9e79 100644
--- a/src/soc/intel/common/block/cse/Kconfig
+++ b/src/soc/intel/common/block/cse/Kconfig
@@ -67,6 +67,25 @@
 	  Starting with Jasper Lake, coreboot sends EOP before loading payload hence, this
 	  config is applicable for those platforms.
 
+config SOC_INTEL_CSE_SEND_EOP_ASYNC
+	bool
+	depends on SOC_INTEL_COMMON_BLOCK_CSE
+	depends on !SOC_INTEL_CSE_SEND_EOP_LATE
+	depends on !SOC_INTEL_CSE_SEND_EOP_EARLY
+	help
+	  Use this config to handle End Of Post (EOP) completion
+	  asynchronously. The EOP command is sent first and the result
+	  is checked later leaving time to CSE to complete the
+	  operation while coreboot perform other activities.
+	  Performing EOP asynchronously reduces the time spent
+	  actively waiting for command completion which can have a
+	  significant impact on boot time.
+
+	  Using this asynchronous approach comes with the limitation
+	  that no HECI command should be sent between the time the EOP
+	  request is posted (at CSE .final device operation) and the
+	  time coreboot check for its completion (BS_PAYLOAD_LOAD).
+
 config SOC_INTEL_CSE_LITE_SKU
 	bool
 	default n
diff --git a/src/soc/intel/common/block/cse/cse.c b/src/soc/intel/common/block/cse/cse.c
index 9902094..6c4bb73 100644
--- a/src/soc/intel/common/block/cse/cse.c
+++ b/src/soc/intel/common/block/cse/cse.c
@@ -1407,7 +1407,8 @@
  */
 void cse_late_finalize(void)
 {
-	if (!CONFIG(SOC_INTEL_CSE_SEND_EOP_LATE))
+	if (!CONFIG(SOC_INTEL_CSE_SEND_EOP_LATE) &&
+	    !CONFIG(SOC_INTEL_CSE_SEND_EOP_ASYNC))
 		return;
 
 	if (!CONFIG(USE_FSP_NOTIFY_PHASE_READY_TO_BOOT))
@@ -1431,6 +1432,14 @@
 	if (CONFIG(SOC_INTEL_CSE_SET_EOP))
 		cse_send_end_of_post();
 
+	/*
+	 * In asynchronous mode, the EOP command has most likely not been
+	 * completed yet. Finalization steps will be run once the EOP command
+	 * has successfully been completed.
+	 */
+	if (CONFIG(SOC_INTEL_CSE_SEND_EOP_ASYNC))
+		return;
+
 	if (!CONFIG(USE_FSP_NOTIFY_PHASE_READY_TO_BOOT))
 		cse_final_ready_to_boot();
 
diff --git a/src/soc/intel/common/block/cse/cse_eop.c b/src/soc/intel/common/block/cse/cse_eop.c
index 53a4fa5..16cf523 100644
--- a/src/soc/intel/common/block/cse/cse_eop.c
+++ b/src/soc/intel/common/block/cse/cse_eop.c
@@ -67,13 +67,54 @@
 	return CSE_CMD_RESULT_SUCCESS;
 }
 
-static enum cse_cmd_result cse_send_eop(void)
+static enum cse_cmd_result cse_receive_eop(void)
 {
-	enum cse_tx_rx_status ret;
 	enum {
 		EOP_REQUESTED_ACTION_CONTINUE = 0,
 		EOP_REQUESTED_ACTION_GLOBAL_RESET = 1,
 	};
+	enum cse_tx_rx_status ret;
+	struct end_of_post_resp {
+		struct mkhi_hdr hdr;
+		uint32_t requested_actions;
+	} __packed resp = {};
+	size_t resp_size = sizeof(resp);
+
+	ret = heci_receive(&resp, &resp_size);
+	if (ret)
+		return decode_heci_send_receive_error(ret);
+
+	if (resp.hdr.group_id != MKHI_GROUP_ID_GEN ||
+	    resp.hdr.command != MKHI_END_OF_POST) {
+		printk(BIOS_ERR, "HECI: EOP Unexpected response group or command.\n");
+		if (CONFIG(SOC_INTEL_CSE_SEND_EOP_ASYNC))
+			printk(BIOS_ERR, "HECI: It could be a HECI command conflict.\n");
+		return CSE_CMD_RESULT_ERROR;
+	}
+
+	if (resp.hdr.result) {
+		printk(BIOS_ERR, "HECI: EOP Resp Failed: %u\n", resp.hdr.result);
+		return CSE_CMD_RESULT_ERROR;
+	}
+
+	printk(BIOS_INFO, "CSE: EOP requested action: ");
+
+	switch (resp.requested_actions) {
+	case EOP_REQUESTED_ACTION_GLOBAL_RESET:
+		printk(BIOS_INFO, "global reset\n");
+		return CSE_CMD_RESULT_GLOBAL_RESET_REQUESTED;
+	case EOP_REQUESTED_ACTION_CONTINUE:
+		printk(BIOS_INFO, "continue boot\n");
+		return CSE_CMD_RESULT_SUCCESS;
+	default:
+		printk(BIOS_INFO, "unknown %u\n", resp.requested_actions);
+		return CSE_CMD_RESULT_ERROR;
+	}
+}
+
+static enum cse_cmd_result cse_send_eop(void)
+{
+	enum cse_tx_rx_status ret;
 	struct end_of_post_msg {
 		struct mkhi_hdr hdr;
 	} __packed msg = {
@@ -82,11 +123,6 @@
 			.command = MKHI_END_OF_POST,
 		},
 	};
-	struct end_of_post_resp {
-		struct mkhi_hdr hdr;
-		uint32_t requested_actions;
-	} __packed resp = {};
-	size_t resp_size = sizeof(resp);
 
 	/* For a CSE-Lite SKU, if the CSE is running RO FW and the board is
 	   running vboot in recovery mode, the CSE is expected to be in SOFT
@@ -119,28 +155,22 @@
 
 	printk(BIOS_INFO, "HECI: Sending End-of-Post\n");
 
-	ret = heci_send_receive(&msg, sizeof(msg), &resp, &resp_size, HECI_MKHI_ADDR);
+	ret = heci_send(&msg, sizeof(msg), BIOS_HOST_ADDR, HECI_MKHI_ADDR);
 	if (ret)
 		return decode_heci_send_receive_error(ret);
 
-	if (resp.hdr.result) {
-		printk(BIOS_ERR, "HECI: EOP Resp Failed: %u\n", resp.hdr.result);
-		return CSE_CMD_RESULT_ERROR;
-	}
+	return CSE_CMD_RESULT_SUCCESS;
+}
 
-	printk(BIOS_INFO, "CSE: EOP requested action: ");
+static enum cse_cmd_result cse_send_and_receive_eop(void)
+{
+	enum cse_cmd_result ret;
 
-	switch (resp.requested_actions) {
-	case EOP_REQUESTED_ACTION_GLOBAL_RESET:
-		printk(BIOS_INFO, "global reset\n");
-		return CSE_CMD_RESULT_GLOBAL_RESET_REQUESTED;
-	case EOP_REQUESTED_ACTION_CONTINUE:
-		printk(BIOS_INFO, "continue boot\n");
-		return CSE_CMD_RESULT_SUCCESS;
-	default:
-		printk(BIOS_INFO, "unknown %u\n", resp.requested_actions);
-		return CSE_CMD_RESULT_ERROR;
-	}
+	ret = cse_send_eop();
+	if (ret != CSE_CMD_RESULT_SUCCESS)
+		return ret;
+
+	return cse_receive_eop();
 }
 
 static enum cse_cmd_result cse_send_cmd_retries(enum cse_cmd_result (*cse_send_command)(void))
@@ -199,11 +229,12 @@
 	}
 }
 
-static void do_send_end_of_post(void)
+static void do_send_end_of_post(bool wait_for_completion)
 {
-	static bool eop_sent = false;
+	static bool eop_sent = false, eop_complete = false;
+	enum cse_cmd_result ret;
 
-	if (eop_sent) {
+	if (eop_complete) {
 		printk(BIOS_WARNING, "EOP already sent\n");
 		return;
 	}
@@ -222,26 +253,40 @@
 		return;
 	}
 
-	set_cse_device_state(PCH_DEVFN_CSE, DEV_ACTIVE);
+	if (!eop_sent) {
+		set_cse_device_state(PCH_DEVFN_CSE, DEV_ACTIVE);
+		timestamp_add_now(TS_ME_END_OF_POST_START);
+		ret = cse_send_cmd_retries(cse_send_eop);
+		if (ret == CSE_CMD_RESULT_SUCCESS)
+			eop_sent = true;
+	}
 
-	timestamp_add_now(TS_ME_END_OF_POST_START);
-	handle_cse_eop_result(cse_send_cmd_retries(cse_send_eop));
+	if (!wait_for_completion)
+		return;
+
+	set_cse_device_state(PCH_DEVFN_CSE, DEV_ACTIVE);
+	ret = cse_receive_eop();
+	if (ret != CSE_CMD_RESULT_SUCCESS) {
+		ret = cse_send_cmd_retries(cse_send_and_receive_eop);
+		handle_cse_eop_result(ret);
+	}
 	timestamp_add_now(TS_ME_END_OF_POST_END);
 
 	set_cse_device_state(PCH_DEVFN_CSE, DEV_IDLE);
 
-	eop_sent = true;
+	eop_complete = true;
 }
 
 void cse_send_end_of_post(void)
 {
-	return do_send_end_of_post();
+	return do_send_end_of_post(!CONFIG(SOC_INTEL_CSE_SEND_EOP_ASYNC));
 }
 
 static void send_cse_eop_with_late_finalize(void *unused)
 {
-	do_send_end_of_post();
-	if (CONFIG(SOC_INTEL_CSE_SEND_EOP_LATE))
+	do_send_end_of_post(true);
+	if (CONFIG(SOC_INTEL_CSE_SEND_EOP_LATE) ||
+	    CONFIG(SOC_INTEL_CSE_SEND_EOP_ASYNC))
 		cse_late_finalize();
 }