libpayload: Add x86_64 (64-bit) support

This patch introduces x86_64 (64-bit) support to the payload, building
upon the existing x86 (32-bit) architecture. Files necessary for 64-bit
compilation are now guarded by the `CONFIG_LP_ARCH_X86_64` Kconfig
option.

BUG=b:242829490
TEST=Able to verify all valid combinations between coreboot and
payload with this patch.

Payload Entry Point Behavior with below code.

+----------------+--------------------+----------------------------+
| LP_ARCH_X86_64 | Payload Entry Mode | Description                |
+----------------+--------------------+----------------------------+
| No             | 32-bit             | Direct protected mode init |
+----------------+--------------------+----------------------------+
| Yes            | 32-bit             | Protected to long mode     |
+----------------+--------------------+----------------------------+
| Yes            | 64-bit             | Long mode initialization   |
+----------------+--------------------+----------------------------+

Change-Id: I69fda47bedf1a14807b1515c4aed6e3a1d5b8585
Signed-off-by: Subrata Banik <subratabanik@google.com>
Reviewed-on: https://review.coreboot.org/c/coreboot/+/81968
Reviewed-by: Julius Werner <jwerner@chromium.org>
Tested-by: build bot (Jenkins) <no-reply@coreboot.org>
diff --git a/payloads/libpayload/Kconfig b/payloads/libpayload/Kconfig
index 39d316b..0c07ecd 100644
--- a/payloads/libpayload/Kconfig
+++ b/payloads/libpayload/Kconfig
@@ -106,17 +106,22 @@
 
 choice
         prompt "Target Architecture"
-        default ARCH_X86
+        default ARCH_X86_32
 
 config ARCH_ARM
         bool "ARM"
         help
           Support the ARM architecture
 
-config ARCH_X86
-        bool "x86"
+config ARCH_X86_32
+        bool "x86_32"
         help
-          Support the x86 architecture
+          Support the x86_32 architecture
+
+config ARCH_X86_64
+        bool "x86_64"
+        help
+          Support the x86_64 architecture
 
 config ARCH_ARM64
         bool "ARM64"
@@ -133,6 +138,12 @@
 
 endchoice
 
+config ARCH_X86
+	bool
+	default y if ARCH_X86_32 || ARCH_X86_64
+	help
+	  Support the x86 architecture
+
 config MULTIBOOT
 	bool "Multiboot header support"
 	depends on ARCH_X86
diff --git a/payloads/libpayload/Makefile b/payloads/libpayload/Makefile
index 71d60bc..afe38dc 100644
--- a/payloads/libpayload/Makefile
+++ b/payloads/libpayload/Makefile
@@ -118,7 +118,8 @@
 # override here.
 ARCH-$(CONFIG_LP_ARCH_ARM)     := arm
 ARCH-$(CONFIG_LP_ARCH_ARM64)   := arm64
-ARCH-$(CONFIG_LP_ARCH_X86)     := x86_32
+ARCH-$(CONFIG_LP_ARCH_X86_32)  := x86_32
+ARCH-$(CONFIG_LP_ARCH_X86_64)  := x86_64
 ARCH-$(CONFIG_LP_ARCH_MOCK)    := mock
 
 # Five cases where we don't need fully populated $(obj) lists:
diff --git a/payloads/libpayload/Makefile.mk b/payloads/libpayload/Makefile.mk
index d654995..d5a9a81 100644
--- a/payloads/libpayload/Makefile.mk
+++ b/payloads/libpayload/Makefile.mk
@@ -96,7 +96,7 @@
 	cmp $@ $< 2>/dev/null || cp $< $@
 
 library-targets = $(addsuffix .a,$(addprefix $(obj)/,$(libraries))) $(obj)/libpayload.a
-lib: $$(library-targets)
+lib: $$(library-targets) $(obj)/libpayload.ldscript
 
 extract_nth=$(word $(1), $(subst |, ,$(2)))
 
@@ -115,11 +115,15 @@
 	printf "    AR         $(subst $(CURDIR)/,,$(@))\n"
 	printf "create $@\n$(foreach objc,$(filter-out %.a,$^),addmod $(objc)\n)$(foreach lib,$(filter %.a,$^),addlib $(lib)\n)save\nend\n" | $(AR) -M
 
+$(obj)/libpayload.ldscript: arch/$(ARCHDIR-y)/libpayload.ldscript
+	@printf "  LDSCRIPT  $@\n"
+	$(CC) $(CFLAGS) $(EXTRA_CFLAGS) -E -P -x assembler-with-cpp -undef -o $@ $<
+
 install: real-target
 	printf "    INSTALL    $(DESTDIR)/libpayload/lib\n"
 	install -m 755 -d $(DESTDIR)/libpayload/lib
 	install -m 644 $(library-targets) $(DESTDIR)/libpayload/lib/
-	install -m 644 arch/$(ARCHDIR-y)/libpayload.ldscript $(DESTDIR)/libpayload/lib/
+	install -m 644 $(obj)/libpayload.ldscript $(DESTDIR)/libpayload/lib/
 	install -m 755 -d $(DESTDIR)/libpayload/lib/$(ARCHDIR-y)
 	printf "    INSTALL    $(DESTDIR)/libpayload/include\n"
 	install -m 755 -d $(DESTDIR)/libpayload/include
diff --git a/payloads/libpayload/arch/x86/Makefile.mk b/payloads/libpayload/arch/x86/Makefile.mk
index d2c33b4..35e69cb 100644
--- a/payloads/libpayload/arch/x86/Makefile.mk
+++ b/payloads/libpayload/arch/x86/Makefile.mk
@@ -27,16 +27,26 @@
 ##
 
 ifneq ($(CONFIG_LP_COMPILER_LLVM_CLANG),y)
+ifeq ($(CONFIG_LP_ARCH_X86_64),y)
+CFLAGS += -mpreferred-stack-boundary=4
+else
 CFLAGS += -mpreferred-stack-boundary=2
 endif
+endif
 
-libc-y += head.S
+libc-$(CONFIG_LP_ARCH_X86_32)  += head.S
+libc-$(CONFIG_LP_ARCH_X86_64)  += head_64.S
+libc-$(CONFIG_LP_ARCH_X86_64) += pt.S
 libc-y += main.c sysinfo.c
 libc-y += timer.c coreboot.c util.S
-libc-y += exec.S virtual.c
+libc-y += virtual.c
 libc-y += selfboot.c cache.c
-libc-y += exception_asm.S exception.c
+libc-y += exception.c
 libc-y += delay.c
+libc-$(CONFIG_LP_ARCH_X86_32) += exec.c
+libc-$(CONFIG_LP_ARCH_X86_32) += exec.S
+libc-$(CONFIG_LP_ARCH_X86_32) += exception_asm.S
+libc-$(CONFIG_LP_ARCH_X86_64) += exception_asm_64.S
 
 # Will fall back to default_memXXX() in libc/memory.c if GPL not allowed.
 libc-$(CONFIG_LP_GPL) += string.c
diff --git a/payloads/libpayload/arch/x86/exception.c b/payloads/libpayload/arch/x86/exception.c
index 23d1b05..a43a993 100644
--- a/payloads/libpayload/arch/x86/exception.c
+++ b/payloads/libpayload/arch/x86/exception.c
@@ -34,7 +34,13 @@
 
 #define IF_FLAG				(1 << 9)
 
-u32 exception_stack[0x400] __attribute__((aligned(8)));
+#if CONFIG(LP_ARCH_X86_64)
+#define REGISTER_FMT "0x%016zx"
+#else
+#define REGISTER_FMT "0x%08zx"
+#endif
+
+u8 exception_stack[0x400] __aligned(16);
 
 static interrupt_handler handlers[256];
 
@@ -143,17 +149,27 @@
 		break;
 	}
 	printf("\n");
-	printf("EIP:    0x%08x\n", exception_state->regs.eip);
+	printf("REG_IP:    " REGISTER_FMT "\n", exception_state->regs.reg_ip);
+	printf("REG_FLAGS: " REGISTER_FMT "\n", exception_state->regs.reg_flags);
+	printf("REG_AX:    " REGISTER_FMT "\n", exception_state->regs.reg_ax);
+	printf("REG_BX:    " REGISTER_FMT "\n", exception_state->regs.reg_bx);
+	printf("REG_CX:    " REGISTER_FMT "\n", exception_state->regs.reg_cx);
+	printf("REG_DX:    " REGISTER_FMT "\n", exception_state->regs.reg_dx);
+	printf("REG_SP:    " REGISTER_FMT "\n", exception_state->regs.reg_sp);
+	printf("REG_BP:    " REGISTER_FMT "\n", exception_state->regs.reg_bp);
+	printf("REG_SI:    " REGISTER_FMT "\n", exception_state->regs.reg_si);
+	printf("REG_DI:    " REGISTER_FMT "\n", exception_state->regs.reg_di);
+#if CONFIG(LP_ARCH_X86_64)
+	printf("REG_R8:    0x%016zx\n", exception_state->regs.reg_r8);
+	printf("REG_R9:    0x%016zx\n", exception_state->regs.reg_r9);
+	printf("REG_R10:    0x%016zx\n", exception_state->regs.reg_r10);
+	printf("REG_R11:    0x%016zx\n", exception_state->regs.reg_r11);
+	printf("REG_R12:    0x%016zx\n", exception_state->regs.reg_r12);
+	printf("REG_R13:    0x%016zx\n", exception_state->regs.reg_r13);
+	printf("REG_R14:    0x%016zx\n", exception_state->regs.reg_r14);
+	printf("REG_R15:    0x%016zx\n", exception_state->regs.reg_r15);
+#endif
 	printf("CS:     0x%04x\n", exception_state->regs.cs);
-	printf("EFLAGS: 0x%08x\n", exception_state->regs.eflags);
-	printf("EAX:    0x%08x\n", exception_state->regs.eax);
-	printf("ECX:    0x%08x\n", exception_state->regs.ecx);
-	printf("EDX:    0x%08x\n", exception_state->regs.edx);
-	printf("EBX:    0x%08x\n", exception_state->regs.ebx);
-	printf("ESP:    0x%08x\n", exception_state->regs.esp);
-	printf("EBP:    0x%08x\n", exception_state->regs.ebp);
-	printf("ESI:    0x%08x\n", exception_state->regs.esi);
-	printf("EDI:    0x%08x\n", exception_state->regs.edi);
 	printf("DS:     0x%04x\n", exception_state->regs.ds);
 	printf("ES:     0x%04x\n", exception_state->regs.es);
 	printf("SS:     0x%04x\n", exception_state->regs.ss);
@@ -164,7 +180,7 @@
 void exception_dispatch(void)
 {
 	die_if(exception_state->vector >= ARRAY_SIZE(handlers),
-	       "Invalid vector %u\n", exception_state->vector);
+	       "Invalid vector %zu\n", exception_state->vector);
 
 	u8 vec = exception_state->vector;
 
@@ -184,7 +200,7 @@
 	       vec);
 
 	dump_exception_state();
-	dump_stack(exception_state->regs.esp, 512);
+	dump_stack(exception_state->regs.reg_sp, 512);
 	/* We don't call apic_eoi because we don't want to ack the interrupt and
 	   allow another interrupt to wake the processor. */
 	halt();
@@ -197,6 +213,10 @@
 
 void exception_init(void)
 {
+	/* TODO: Add exception init code for x64, currently only supporting 32-bit code */
+	if (CONFIG(LP_ARCH_X86_64))
+		return;
+
 	exception_stack_end = exception_stack + ARRAY_SIZE(exception_stack);
 	exception_init_asm();
 }
@@ -206,6 +226,17 @@
 	handlers[vector] = handler;
 }
 
+#if CONFIG(LP_ARCH_X86_64)
+static uint64_t eflags(void)
+{
+	uint64_t eflags;
+	asm volatile(
+		"pushfq\n\t"
+		"popq %0\n\t"
+	: "=rm" (eflags));
+	return eflags;
+}
+#else
 static uint32_t eflags(void)
 {
 	uint32_t eflags;
@@ -215,6 +246,7 @@
 	: "=rm" (eflags));
 	return eflags;
 }
+#endif
 
 void enable_interrupts(void)
 {
diff --git a/payloads/libpayload/arch/x86/exception_asm_64.S b/payloads/libpayload/arch/x86/exception_asm_64.S
new file mode 100644
index 0000000..ee4e9e5
--- /dev/null
+++ b/payloads/libpayload/arch/x86/exception_asm_64.S
@@ -0,0 +1,221 @@
+/*
+ *
+ * Copyright 2024 Google Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+	.align 16
+	.global exception_stack_end
+exception_stack_end:
+	.quad 0
+	.global exception_state
+exception_state:
+	.quad 0
+
+/* Some temporary variables which are used while saving exception state. */
+vector:
+	.quad 0
+error_code:
+	.quad 0
+old_rsp:
+	.quad 0
+old_rax:
+	.quad 0
+
+	.align 16
+
+/*
+ * Each exception vector has a small stub associated with it which sets aside
+ * the error code, if any, records which vector we entered from, and calls
+ * the common exception entry point. Some exceptions have error codes and some
+ * don't, so we have a macro for each type.
+ */
+
+	.macro stub num
+exception_stub_\num:
+	movq	$0, error_code
+	movq	$\num, vector
+	jmp	exception_common
+	.endm
+
+	.macro stub_err num
+exception_stub_\num:
+	pop	error_code
+	movq	$\num, vector
+	jmp	exception_common
+	.endm
+
+	.altmacro
+	.macro	user_defined_stubs from, to
+	stub	\from
+	.if	\to-\from
+	user_defined_stubs	%(from+1),\to
+	.endif
+	.endm
+
+	stub 0
+	stub 1
+	stub 2
+	stub 3
+	stub 4
+	stub 5
+	stub 6
+	stub 7
+	stub_err 8
+	stub 9
+	stub_err 10
+	stub_err 11
+	stub_err 12
+	stub_err 13
+	stub_err 14
+	stub 15
+	stub 16
+	stub_err 17
+	stub 18
+	stub 19
+	stub 20
+	stub 21
+	stub 22
+	stub 23
+	stub 24
+	stub 25
+	stub 26
+	stub 27
+	stub 28
+	stub 29
+	stub_err 30
+	stub 31
+	/* Split the macro so we avoid a stack overflow. */
+	user_defined_stubs 32, 63
+	user_defined_stubs 64, 127
+	user_defined_stubs 128, 191
+	user_defined_stubs 192, 255
+
+exception_common:
+
+	/* Return from the exception. */
+	iretl
+
+/*
+ * We need segment selectors for the IDT, so we need to know where things are
+ * in the GDT. We set one up here which is pretty standard and largely copied
+ * from coreboot.
+ */
+	.align 16
+gdt:
+	/* selgdt 0, unused */
+	.word 0x0000, 0x0000
+	.byte 0x00, 0x00, 0x00, 0x00
+
+	/* selgdt 8, unused */
+	.word 0x0000, 0x0000
+	.byte 0x00, 0x00, 0x00, 0x00
+
+	/* selgdt 0x10, flat 4GB code segment */
+	.word 0xffff, 0x0000
+	.byte 0x00, 0x9b, 0xcf, 0x00
+
+	/* selgdt 0x18, flat 4GB data segment */
+	.word 0xffff, 0x0000
+	.byte	0x00, 0x92, 0xcf, 0x00
+
+	/* selgdt 0x20, flat x64 code segment */
+	.word	0xffff, 0x0000
+	.byte	0x00, 0x9b, 0xaf, 0x00
+gdt_end:
+
+/* GDT pointer for use with lgdt */
+.global gdt_ptr
+gdt_ptr:
+	.word	gdt_end - gdt - 1
+	.quad	gdt
+
+	/*
+	 * Record the target and construct the actual entry at init time. This
+	 * is necessary because the linker doesn't want to construct the entry
+	 * for us.
+	 */
+	.macro interrupt_gate target
+	.quad \target
+	.quad \target
+	.endm
+
+	.altmacro
+	.macro	user_defined_gates from, to
+	interrupt_gate	exception_stub_\from
+	.if	\to-\from
+	user_defined_gates	%(from+1),\to
+	.endif
+	.endm
+
+	.align 16
+	.global	idt
+idt:
+	interrupt_gate exception_stub_0
+	interrupt_gate exception_stub_1
+	interrupt_gate exception_stub_2
+	interrupt_gate exception_stub_3
+	interrupt_gate exception_stub_4
+	interrupt_gate exception_stub_5
+	interrupt_gate exception_stub_6
+	interrupt_gate exception_stub_7
+	interrupt_gate exception_stub_8
+	interrupt_gate exception_stub_9
+	interrupt_gate exception_stub_10
+	interrupt_gate exception_stub_11
+	interrupt_gate exception_stub_12
+	interrupt_gate exception_stub_13
+	interrupt_gate exception_stub_14
+	interrupt_gate exception_stub_15
+	interrupt_gate exception_stub_16
+	interrupt_gate exception_stub_17
+	interrupt_gate exception_stub_18
+	interrupt_gate exception_stub_19
+	interrupt_gate exception_stub_20
+	interrupt_gate exception_stub_21
+	interrupt_gate exception_stub_22
+	interrupt_gate exception_stub_23
+	interrupt_gate exception_stub_24
+	interrupt_gate exception_stub_25
+	interrupt_gate exception_stub_26
+	interrupt_gate exception_stub_27
+	interrupt_gate exception_stub_28
+	interrupt_gate exception_stub_29
+	interrupt_gate exception_stub_30
+	interrupt_gate exception_stub_31
+	user_defined_gates 32, 63
+	user_defined_gates 64, 127
+	user_defined_gates 128, 191
+	user_defined_gates 192, 255
+idt_end:
+
+/* IDT pointer for use with lidt */
+idt_ptr:
+	.word idt_end - idt - 1
+	.quad idt
+
+	.global exception_init_asm
+exception_init_asm:
+	ret
diff --git a/payloads/libpayload/libc/exec.c b/payloads/libpayload/arch/x86/exec.c
similarity index 94%
rename from payloads/libpayload/libc/exec.c
rename to payloads/libpayload/arch/x86/exec.c
index ae75a3e..06a5ed2 100644
--- a/payloads/libpayload/libc/exec.c
+++ b/payloads/libpayload/arch/x86/exec.c
@@ -29,8 +29,10 @@
 #include <libpayload-config.h>
 #include <libpayload.h>
 
-#if CONFIG(LP_ARCH_X86)
+#if CONFIG(LP_ARCH_X86_32)
 extern void i386_do_exec(long addr, int argc, char **argv, int *ret);
+#else
+#error "exec does not currently support x86_64."
 #endif
 
 /**
@@ -45,7 +47,7 @@
 {
 	int val = -1;
 
-#if CONFIG(LP_ARCH_X86)
+#if CONFIG(LP_ARCH_X86_32)
 	i386_do_exec(addr, argc, argv, &val);
 #endif
 	return val;
diff --git a/payloads/libpayload/arch/x86/gdb.c b/payloads/libpayload/arch/x86/gdb.c
index 7d29512..ad249b8 100644
--- a/payloads/libpayload/arch/x86/gdb.c
+++ b/payloads/libpayload/arch/x86/gdb.c
@@ -15,6 +15,7 @@
 #include <exception.h>
 #include <gdb.h>
 #include <libpayload.h>
+#include <stddef.h>
 
 static const u8 type_to_signal[] = {
 	[EXC_DE]  = GDB_SIGFPE,
@@ -53,12 +54,15 @@
 
 void gdb_arch_enter(void)
 {
-	u32 *esp;
-
-	asm volatile ("mov %%esp, %0" : "=r"(esp) );
+	u8 *stack_pointer;
+#if CONFIG(LP_ARCH_X86_64)
+	asm volatile ("movq %%rsp, %0" : "=r"(stack_pointer));
+#else
+	asm volatile ("mov %%esp, %0" : "=r"(stack_pointer));
+#endif
 
 	/* Avoid reentrant exceptions, just call the hook if in one already. */
-	if (esp >= exception_stack && esp <= exception_stack_end)
+	if (stack_pointer >= exception_stack && stack_pointer <= exception_stack_end)
 		gdb_exception_hook(EXC_BP);
 	else
 		asm volatile ("int3");
@@ -66,12 +70,12 @@
 
 int gdb_arch_set_single_step(int on)
 {
-	const u32 tf_bit = 1 << 8;
+	const size_t tf_bit = 1 << 8;
 
 	if (on)
-		exception_state->regs.eflags |= tf_bit;
+		exception_state->regs.reg_flags |= tf_bit;
 	else
-		exception_state->regs.eflags &= ~tf_bit;
+		exception_state->regs.reg_flags &= ~tf_bit;
 
 	return 0;
 }
diff --git a/payloads/libpayload/arch/x86/head_64.S b/payloads/libpayload/arch/x86/head_64.S
new file mode 100644
index 0000000..6524274
--- /dev/null
+++ b/payloads/libpayload/arch/x86/head_64.S
@@ -0,0 +1,141 @@
+/*
+ *
+ * Copyright (C) 2024 Google Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#define IA32_EFER	0xC0000080
+#define  EFER_LME	(1 << 8)
+
+	.code32
+	.global _entry
+	.section .text._entry
+	.align 4
+
+/*
+ * WARNING: Critical Code Section - 32/64-bit Compatibility
+ * This code between `_entry` and `jnz _init64` is executed during system initialization.
+ * It MUST function correctly regardless of whether the system is booting in:
+ *   - 32-bit protected mode
+ *   - 64-bit long mode
+ * To achieve this, ONLY use instructions that produce identical binary output in both modes.
+ * Thoroughly test ALL modifications to this section in BOTH 32-bit and 64-bit boot
+ * environments.
+ */
+
+_entry:
+
+	/* Add multiboot header and jump around it when building with multiboot support. */
+#if CONFIG(LP_MULTIBOOT)
+	#include "multiboot_header.inc"
+#endif
+	/* No interrupts, please. */
+	cli
+
+	movl $IA32_EFER, %ecx
+	rdmsr
+	testl $EFER_LME, %eax
+	jnz _init64
+
+	lgdt %cs:gdt_ptr
+
+	/* save pointer to coreboot tables */
+	movl 4(%esp), %eax
+	/*
+	 * NOTE: coreboot tables has passed over the top of the stack
+	 * while calling in protected mode.
+	 */
+	movl %eax, cb_header_ptr
+
+	call init_page_table
+	movl $pm4le, %eax
+
+	/* load identity mapped page tables */
+	movl %eax, %cr3
+
+	/* enable PAE */
+	movl %cr4, %eax
+	btsl $5, %eax
+	movl %eax, %cr4
+
+	/* enable long mode */
+	movl $(IA32_EFER), %ecx
+	rdmsr
+	btsl $8, %eax
+	wrmsr
+
+	/* enable paging */
+	movl %cr0, %eax
+	btsl $31, %eax
+	movl %eax, %cr0
+
+	/* Jump to selgdt 0x20, flat x64 code segment */
+	ljmp $0x20, $_entry64
+
+.code64
+.align 16
+_init64:
+	movabs	$gdt_ptr, %rax
+	lgdt	(%rax)
+
+	/*
+	 * Note: The `cb_header_ptr` has passed as the first argument
+	 * to the x86-64 calling convention.
+	 */
+	movq %rdi, cb_header_ptr
+
+	call init_page_table
+	movq $pm4le, %rax
+
+	/* load identity mapped page tables */
+	movq %rax, %cr3
+
+_entry64:
+	/* Store current stack pointer and set up new stack. */
+	movq %rsp, %rax
+	movabs	$_estack, %rsp
+
+	push %rax
+
+	fninit
+	movq %cr0, %rax
+	andq $0xFFFFFFFFFFFFFFFB, %rax	/* clear EM */
+	orq $0x00000022, %rax	/* set MP, NE */
+	movq %rax, %cr0
+
+	movq %cr4, %rax
+	orq $0x00000600, %rax	/* set OSFXSR, OSXMMEXCPT */
+	movq %rax, %cr4
+
+	/* Let's rock. */
+	call start_main
+
+	/* %rax has the return value - pass it on unmolested */
+_leave:
+	/* Restore old stack. */
+	pop %rsp
+
+	/* Return to the original context. */
+	ret
diff --git a/payloads/libpayload/arch/x86/libpayload.ldscript b/payloads/libpayload/arch/x86/libpayload.ldscript
index 0f27ed9..49c7e37 100644
--- a/payloads/libpayload/arch/x86/libpayload.ldscript
+++ b/payloads/libpayload/arch/x86/libpayload.ldscript
@@ -26,8 +26,13 @@
  * SUCH DAMAGE.
  */
 
+#if CONFIG(LP_ARCH_X86_64)
+OUTPUT_FORMAT(elf64-x86-64)
+OUTPUT_ARCH(x86_64)
+#else
 OUTPUT_FORMAT(elf32-i386)
 OUTPUT_ARCH(i386)
+#endif
 
 ENTRY(_entry)
 
diff --git a/payloads/libpayload/arch/x86/pt.S b/payloads/libpayload/arch/x86/pt.S
new file mode 100644
index 0000000..9b085e5
--- /dev/null
+++ b/payloads/libpayload/arch/x86/pt.S
@@ -0,0 +1,149 @@
+/*
+ *
+ * Copyright 2024 Google Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * For reference see "AMD64 Architecture Programmer's Manual Volume 2",
+ * Document 24593-Rev. 3.31-July 2019 Chapter 5.3.4
+ *
+ * Page table attributes: WB, User+Supervisor, Present, Writeable, Accessed, Dirty
+ */
+
+.section .bss
+#define _PRES (1ULL << 0)
+#define _RW   (1ULL << 1)
+#define _US   (1ULL << 2)
+#define _A    (1ULL << 5)
+#define _D    (1ULL << 6)
+#define _PS   (1ULL << 7)
+
+.section .bss.pm4le
+.global pm4le
+.align 4096
+pm4le:
+.skip 8
+
+.section .bss.main_page_table
+.global main_page_table
+.align 4096
+main_page_table:
+.skip 8192
+
+.section .bss.extra_page_table
+.global extra_page_table
+.align 4096
+extra_page_table:
+.skip 32
+
+/*
+ * WARNING: 32-bit/64-bit Mode Compatibility for Page Table Initialization
+ * This `init_page_table` function is designed to work in both 32-bit protected
+ * mode AND 64-bit long mode.
+ *
+ * Key Considerations:
+ * - Assembly Instructions:  Use ONLY instructions that have the SAME binary representation
+ *                           in both 32-bit and 64-bit modes.
+ * - `.code64` Directive:  We're compiling with `.code64` to ensure the assembler uses
+ *                         the correct 64-bit version of instructions (e.g., `inc`).
+ * - Register Notation:
+ *     - Use 64-bit register names (like `%rsi`) for register-indirect addressing to avoid
+ *       incorrect address size prefixes.
+ *     - It's safe to use `%esi` with `mov` instructions, as the high 32 bits are zeroed
+ *       in 64-bit mode.
+ *
+ * IMPORTANT:
+ * Thoroughly test ANY changes to this function in BOTH 32-bit and 64-bit boot environments.
+ */
+
+.code64
+.section .text.init_page_table
+.globl init_page_table
+.type init_page_table, @function
+
+init_page_table:
+	mov $0x80000001, %eax
+	cpuid
+	test $(1 << 26), %edx
+	jnz setup_1gb
+
+setup_2mb:
+	mov $2048, %edi
+	mov $(_PRES + _RW + _US + _PS + _A + _D), %eax
+	mov $0, %ecx
+	mov $main_page_table, %esi
+
+loop_2mb:
+	mov %eax, (%rsi, %rcx, 8)
+	mov $0, 4(%rsi, %rcx, 8)
+	add $0x200000, %eax
+	inc %ecx
+	cmp %edi, %ecx
+	jb loop_2mb
+
+	mov $4, %edi
+	mov $main_page_table, %eax
+	add $(_PRES + _RW + _US + _A), %eax
+	mov $0, %ecx
+	mov $extra_page_table, %esi
+
+fill_extra_page_table:
+	mov %eax, (%rsi, %rcx, 8)
+	mov $0, 4(%rsi, %rcx, 8)
+	add $4096, %eax
+	inc %ecx
+	cmp %edi, %ecx
+	jb fill_extra_page_table
+
+	mov $extra_page_table, %eax
+	jmp leave
+
+setup_1gb:
+	mov $512, %edi
+	mov $(_PRES + _RW + _US + _PS + _A + _D), %eax
+	mov $0, %ebx
+	mov $0, %ecx
+	mov $main_page_table, %esi
+
+loop_1gb:
+	mov %eax, (%rsi, %rcx, 8)
+	mov %ebx, 4(%rsi, %rcx, 8)
+	add $0x40000000, %eax
+	cmp $0x40000000, %eax
+	ja no_overflow_1gb
+	inc %ebx
+no_overflow_1gb:
+	inc %ecx
+	cmp %edi, %ecx
+	jb loop_1gb
+
+	mov $main_page_table, %eax
+
+leave:
+	or $(_PRES + _RW + _US + _A), %eax
+	mov %eax, pm4le
+
+	ret
diff --git a/payloads/libpayload/arch/x86/string.c b/payloads/libpayload/arch/x86/string.c
index 836d049..11bcae7 100644
--- a/payloads/libpayload/arch/x86/string.c
+++ b/payloads/libpayload/arch/x86/string.c
@@ -81,6 +81,16 @@
 {
 	unsigned long d0, d1, d2;
 
+#if CONFIG(LP_ARCH_X86_64)
+	asm volatile(
+		"rep ; movsq\n\t"
+		"mov %4,%%rcx\n\t"
+		"rep ; movsb\n\t"
+		: "=&c" (d0), "=&D" (d1), "=&S" (d2)
+		: "0" (n >> 3), "g" (n & 7), "1" (dest), "2" (src)
+		: "memory"
+	);
+#else
 	asm volatile(
 		"rep ; movsl\n\t"
 		"movl %4,%%ecx\n\t"
@@ -89,6 +99,7 @@
 		: "0" (n >> 2), "g" (n & 3), "1" (dest), "2" (src)
 		: "memory"
 	);
+#endif
 
 	return dest;
 }
diff --git a/payloads/libpayload/bin/lpgcc b/payloads/libpayload/bin/lpgcc
index a954d4f..08fcc29 100755
--- a/payloads/libpayload/bin/lpgcc
+++ b/payloads/libpayload/bin/lpgcc
@@ -84,7 +84,11 @@
 fi
 if [ "$CONFIG_LP_ARCH_X86" = "y" ]; then
   _ARCHDIR=x86
-  _ARCHEXTRA="-m32 "
+  if [ "$CONFIG_LP_ARCH_X86_32" = "y" ]; then
+    _ARCHEXTRA="-m32 "
+  else
+    _ARCHEXTRA="-m64 "
+  fi
 fi
 if [ "$CONFIG_LP_ARCH_MOCK" = "y" ]; then
   _ARCHDIR=mock
@@ -122,13 +126,10 @@
 
 while [ $# -gt 0 ]; do
 	case $1 in
-	    -m32|-fno-stack-protector)
+	    -m32|-fno-stack-protector|-m64)
 		shift
 		continue
 		;;
-	    -m64)
-		error "Invalid option --64 - only 32 bit architectures are supported"
-		;;
 	    -c)
 		DOLINK=0
 		;;
diff --git a/payloads/libpayload/drivers/storage/ahci_common.c b/payloads/libpayload/drivers/storage/ahci_common.c
index f3abc5f..abc67c3 100644
--- a/payloads/libpayload/drivers/storage/ahci_common.c
+++ b/payloads/libpayload/drivers/storage/ahci_common.c
@@ -66,7 +66,7 @@
 			    u8 *const user_buf, const size_t len,
 			    const int out)
 {
-	if ((u32)user_buf & 1) {
+	if ((uintptr_t)user_buf & 1) {
 		printf("ahci: Odd buffer pointer (%p).\n", user_buf);
 		if (dev->buf) /* orphaned buffer */
 			free(dev->buf - *(dev->buf - 1));
@@ -76,7 +76,7 @@
 		dev->user_buf = user_buf;
 		dev->write_back = !out;
 		dev->buflen = len;
-		if ((u32)dev->buf & 1) {
+		if ((uintptr_t)dev->buf & 1) {
 			dev->buf[0] = 1;
 			dev->buf += 1;
 		} else {
diff --git a/payloads/libpayload/drivers/usb/uhci.c b/payloads/libpayload/drivers/usb/uhci.c
index eb252cd..7590ab3 100644
--- a/payloads/libpayload/drivers/usb/uhci.c
+++ b/payloads/libpayload/drivers/usb/uhci.c
@@ -274,7 +274,7 @@
 
 #define UHCI_SLEEP_TIME_US 30
 #define UHCI_TIMEOUT (USB_MAX_PROCESSING_TIME_US / UHCI_SLEEP_TIME_US)
-#define GET_TD(x) ((void*)(((unsigned int)(x))&~0xf))
+#define GET_TD(x) ((void *)(((unsigned long)(x))&~0xf))
 
 static td_t *
 wait_for_completed_qh(hci_t *controller, qh_t *qh)
diff --git a/payloads/libpayload/include/x86/arch/exception.h b/payloads/libpayload/include/x86/arch/exception.h
index d88029b..76099f2 100644
--- a/payloads/libpayload/include/x86/arch/exception.h
+++ b/payloads/libpayload/include/x86/arch/exception.h
@@ -29,6 +29,7 @@
 #ifndef _ARCH_EXCEPTION_H
 #define _ARCH_EXCEPTION_H
 
+#include <stddef.h>
 #include <stdint.h>
 
 void exception_init_asm(void);
@@ -38,20 +39,28 @@
 /** Returns 1 if interrupts are enabled. */
 int interrupts_enabled(void);
 
-struct exception_state
-{
+#if CONFIG(LP_ARCH_X86_64)
+struct exception_state {
 	/* Careful: x86/gdb.c currently relies on the size and order of regs. */
 	struct {
-		u32 eax;
-		u32 ecx;
-		u32 edx;
-		u32 ebx;
-		u32 esp;
-		u32 ebp;
-		u32 esi;
-		u32 edi;
-		u32 eip;
-		u32 eflags;
+		size_t reg_ax;
+		size_t reg_bx;
+		size_t reg_cx;
+		size_t reg_dx;
+		size_t reg_si;
+		size_t reg_di;
+		size_t reg_bp;
+		size_t reg_sp;
+		size_t reg_r8;
+		size_t reg_r9;
+		size_t reg_r10;
+		size_t reg_r11;
+		size_t reg_r12;
+		size_t reg_r13;
+		size_t reg_r14;
+		size_t reg_r15;
+		size_t reg_ip;
+		size_t reg_flags;
 		u32 cs;
 		u32 ss;
 		u32 ds;
@@ -59,13 +68,39 @@
 		u32 fs;
 		u32 gs;
 	} regs;
-	u32 error_code;
-	u32 vector;
+	size_t error_code;
+	size_t vector;
 } __packed;
+#else
+struct exception_state {
+	/* Careful: x86/gdb.c currently relies on the size and order of regs. */
+	struct {
+		size_t reg_ax;
+		size_t reg_cx;
+		size_t reg_dx;
+		size_t reg_bx;
+		size_t reg_sp;
+		size_t reg_bp;
+		size_t reg_si;
+		size_t reg_di;
+		size_t reg_ip;
+		size_t reg_flags;
+		u32 cs;
+		u32 ss;
+		u32 ds;
+		u32 es;
+		u32 fs;
+		u32 gs;
+	} regs;
+	size_t error_code;
+	size_t vector;
+} __packed;
+#endif
+
 extern struct exception_state *exception_state;
 
-extern u32 exception_stack[];
-extern u32 *exception_stack_end;
+extern u8 exception_stack[];
+extern u8 *exception_stack_end;
 
 enum {
 	EXC_DE = 0, /* Divide by zero */
diff --git a/payloads/libpayload/libc/Makefile.mk b/payloads/libpayload/libc/Makefile.mk
index 2840476..c9fc17a 100644
--- a/payloads/libpayload/libc/Makefile.mk
+++ b/payloads/libpayload/libc/Makefile.mk
@@ -29,7 +29,7 @@
 
 libc-$(CONFIG_LP_LIBC) += malloc.c printf.c console.c string.c
 libc-$(CONFIG_LP_LIBC) += memory.c ctype.c lib.c libgcc.c
-libc-$(CONFIG_LP_LIBC) += rand.c time.c exec.c
+libc-$(CONFIG_LP_LIBC) += rand.c time.c
 libc-$(CONFIG_LP_LIBC) += readline.c getopt_long.c sysinfo.c
 libc-$(CONFIG_LP_LIBC) += args.c
 libc-$(CONFIG_LP_LIBC) += strlcpy.c
diff --git a/payloads/libpayload/vboot/Makefile.mk b/payloads/libpayload/vboot/Makefile.mk
index 554fec8..15af980 100644
--- a/payloads/libpayload/vboot/Makefile.mk
+++ b/payloads/libpayload/vboot/Makefile.mk
@@ -15,7 +15,8 @@
 		       $(filter-out -I$(coreboottop)/%,$(1)))))
 
 VBOOT_FIRMWARE_ARCH-$(CONFIG_LP_ARCH_ARM) := arm
-VBOOT_FIRMWARE_ARCH-$(CONFIG_LP_ARCH_X86) := x86
+VBOOT_FIRMWARE_ARCH-$(CONFIG_LP_ARCH_X86_32) := x86
+VBOOT_FIRMWARE_ARCH-$(CONFIG_LP_ARCH_X86_64) := x86_64
 VBOOT_FIRMWARE_ARCH-$(CONFIG_LP_ARCH_ARM64) := arm64
 
 ifneq ($(CONFIG_LP_ARCH_MOCK),)