libpayload/x86: Extend exception handling to x86_64 architecture

Adds 64-bit (x86_64) support to libpayload's exception handler,
previously limited to x86_32.

Ensures `exception_init_asm` is called when building with
LP_ARCH_X86_64 Kconfig.

BUG=b:336265399
TEST=Successful build and boot of google/rex and google/rex64 on
ChromeOS.

Verified correct x86_64 exception handling by triggering "Debug
Exception" using firmware-shell as below:

firmware-shell: mm.l -0
Debug Exception
Error code: n/a
REG_IP:    0x0000000030023e9f
REG_FLAGS: 0x0000000000000046
REG_AX:    0x0000000000000009
REG_BX:    0x0000000000000000
REG_CX:    0x0000002000000000
REG_DX:    0x0000000000000001
REG_SP:    0x0000000034072ec0
REG_BP:    0x0000000000000009
REG_SI:    0x0000000000000029
REG_DI:    0x0000000034072eef
REG_R8:    0x0000000000000009
REG_R9:    0x0000000000000000
REG_R10:   0x0000000000000000
REG_R11:   0x0000000034072d70
REG_R12:   0x0000000000000004
REG_R13:   0x0000000000000001
REG_R14:   0x0000000034072ee6
REG_R15:   0x0000000000000004
CS:     0x0020
DS:     0x0000
ES:     0x0000
SS:     0x0018
FS:     0x0018
GS:     0x0050
Dumping stack:
0x340730c0: 3003c32e 00000000 ... 00000000 00000000
0x340730a0: 30034bc6 00000000 ... 0000002a 00000000
0x34073080: 34073234 00000000 ... 00002e65 00000000
...
...
0x34072ee0: 340730ed 30300000 ... 34073000 00000000
0x34072ec0: 34072ed8 00000000 ... 00000000 00000008
Ready for GDB connection.

Change-Id: I8f0aa1da8d179a760e8d49c3764dfd5a69d06887
Signed-off-by: Subrata Banik <subratabanik@google.com>
Reviewed-on: https://review.coreboot.org/c/coreboot/+/83036
Reviewed-by: Kapil Porwal <kapilporwal@google.com>
Reviewed-by: Arthur Heymans <arthur@aheymans.xyz>
Tested-by: build bot (Jenkins) <no-reply@coreboot.org>
diff --git a/payloads/libpayload/arch/x86/exception.c b/payloads/libpayload/arch/x86/exception.c
index fdab042..9811be3 100644
--- a/payloads/libpayload/arch/x86/exception.c
+++ b/payloads/libpayload/arch/x86/exception.c
@@ -113,13 +113,15 @@
 {
 	int i, j;
 	const int line = 8;
-	uint32_t *ptr = (uint32_t *)(addr & ~(line * sizeof(*ptr) - 1));
+	uint32_t *ptr = (uint32_t *)((uintptr_t)addr & ~(line * sizeof(*ptr) - 1));
 
 	printf("Dumping stack:\n");
 	for (i = bytes / sizeof(*ptr); i >= 0; i -= line) {
 		printf("%p: ", ptr + i);
-		for (j = i; j < i + line; j++)
-			printf("%08x ", *(ptr + j));
+		for (j = i; j < i + line; j++) {
+			if ((uintptr_t)(ptr + j) >= addr && (uintptr_t)(ptr + j) < addr + bytes)
+				printf("%08x ", *(ptr + j));
+		}
 		printf("\n");
 	}
 }
@@ -213,10 +215,6 @@
 
 void exception_init(void)
 {
-	/* TODO: Add exception init code for x64, currently only supporting 32-bit code */
-	if (CONFIG(LP_ARCH_X86_64))
-		return;
-
 	exception_stack_end = exception_stack + ARRAY_SIZE(exception_stack);
 	exception_init_asm();
 }
diff --git a/payloads/libpayload/arch/x86/exception_asm_64.S b/payloads/libpayload/arch/x86/exception_asm_64.S
index ee4e9e5..6d91b93 100644
--- a/payloads/libpayload/arch/x86/exception_asm_64.S
+++ b/payloads/libpayload/arch/x86/exception_asm_64.S
@@ -39,10 +39,10 @@
 	.quad 0
 error_code:
 	.quad 0
-old_rsp:
-	.quad 0
 old_rax:
 	.quad 0
+old_rcx:
+	.quad 0
 
 	.align 16
 
@@ -55,23 +55,23 @@
 
 	.macro stub num
 exception_stub_\num:
-	movq	$0, error_code
-	movq	$\num, vector
-	jmp	exception_common
+	movq $0, error_code
+	movq $\num, vector
+	jmp exception_common
 	.endm
 
 	.macro stub_err num
 exception_stub_\num:
-	pop	error_code
-	movq	$\num, vector
-	jmp	exception_common
+	pop error_code
+	movq $\num, vector
+	jmp exception_common
 	.endm
 
 	.altmacro
-	.macro	user_defined_stubs from, to
-	stub	\from
-	.if	\to-\from
-	user_defined_stubs	%(from+1),\to
+	.macro user_defined_stubs from, to
+	stub \from
+	.if \to-\from
+	user_defined_stubs %(from+1),\to
 	.endif
 	.endm
 
@@ -114,9 +114,117 @@
 	user_defined_stubs 192, 255
 
 exception_common:
+	/*
+	 * At this point, on x86-64, on the stack there is:
+	 * 0(%rsp) rip
+	 * 8(%rsp) cs
+	 * 16(%rsp) rflags
+	 * 24(%rsp) rsp
+	 * 32(%rsp) ss
+	 *
+	 * This section sets up the exception stack.
+	 * It saves the old stack pointer (rsp) to preserve RIP, CS, RFLAGS and SS.
+	 * Then sets up the new stack pointer to point to the exception stack area.
+	 */
+	movq %rax, old_rax
+	movq %rcx, old_rcx
 
-	/* Return from the exception. */
-	iretl
+	mov %rsp, %rax
+	movq exception_stack_end, %rsp
+	/*
+	 * The `exception_state` struct is not 16-byte aligned.
+	 * Push an extra 8 bytes to ensure the stack pointer
+	 * is 16-byte aligned before calling exception_dispatch.
+	 */
+	push $0
+
+	/*
+	 * Push values onto the top of the exception stack to form an
+	 * exception state structure.
+	 */
+	push vector
+	push error_code
+
+	/* push of the gs, fs, es, ds, ss and cs */
+	mov %gs, %rcx
+	movl %ecx, -4(%rsp)   /* gs */
+	mov %fs, %rcx
+	movl %ecx, -8(%rsp)   /* fs */
+	movl $0, -12(%rsp)    /* es */
+	movl $0, -16(%rsp)    /* ds */
+	movq 32(%rax), %rcx
+	movl %ecx, -20(%rsp)  /* ss */
+	movq 8(%rax), %rcx
+	movl %ecx, -24(%rsp)  /* cs */
+	sub $24, %rsp
+
+	push 16(%rax)         /* rflags */
+	push (%rax)           /* rip */
+	push %r15
+	push %r14
+	push %r13
+	push %r12
+	push %r11
+	push %r10
+	push %r9
+	push %r8
+	push 24(%rax)         /* rsp */
+	push %rbp
+	push %rdi
+	push %rsi
+	push %rdx
+	push old_rcx          /* rcx */
+	push %rbx
+	push old_rax          /* rax */
+
+	/*
+	 * Call the C exception handler. It will find the exception state
+	 * using the exception_state global pointer. Not
+	 * passing parameters means we don't have to worry about what ABI
+	 * is being used.
+	 */
+	mov %rsp, exception_state
+	call exception_dispatch
+
+	/*
+	 * Restore state from the exception state structure, including any
+	 * changes that might have been made.
+	 */
+	pop old_rax
+	pop %rbx
+	pop old_rcx
+	pop %rdx
+	pop %rsi
+	pop %rdi
+	pop %rbp
+	lea exception_stack, %rax
+	pop 24(%rax)          /* rsp */
+	pop %r8
+	pop %r9
+	pop %r10
+	pop %r11
+	pop %r12
+	pop %r13
+	pop %r14
+	pop %r15
+	pop (%rax)            /* rip */
+	pop 16(%rax)          /* rflags */
+
+	/* pop of the gs, fs, es, ds, ss and cs */
+	movl (%rsp), %ecx
+	movq %rcx, 8(%rax)    /* cs */
+	movl 4(%rsp), %ecx
+	movq %rcx, 32(%rax)   /* ss */
+	movl 16(%rsp), %ecx
+	mov %rcx, %fs         /* fs */
+	movl 20(%rsp), %ecx
+	mov %rcx, %gs         /* gs */
+
+	mov %rax, %rsp
+	movq old_rax, %rax
+	movq old_rcx, %rcx
+
+	iretq
 
 /*
  * We need segment selectors for the IDT, so we need to know where things are
@@ -139,18 +247,18 @@
 
 	/* selgdt 0x18, flat 4GB data segment */
 	.word 0xffff, 0x0000
-	.byte	0x00, 0x92, 0xcf, 0x00
+	.byte 0x00, 0x92, 0xcf, 0x00
 
 	/* selgdt 0x20, flat x64 code segment */
-	.word	0xffff, 0x0000
-	.byte	0x00, 0x9b, 0xaf, 0x00
+	.word 0xffff, 0x0000
+	.byte 0x00, 0x9b, 0xaf, 0x00
 gdt_end:
 
 /* GDT pointer for use with lgdt */
 .global gdt_ptr
 gdt_ptr:
-	.word	gdt_end - gdt - 1
-	.quad	gdt
+	.word gdt_end - gdt - 1
+	.quad gdt
 
 	/*
 	 * Record the target and construct the actual entry at init time. This
@@ -158,8 +266,11 @@
 	 * for us.
 	 */
 	.macro interrupt_gate target
-	.quad \target
-	.quad \target
+	.word 0       /* patchable */
+	.word 0x20    /* Target code segment selector */
+	.word 0xee00  /* Present, Type 64-bit Interrupt Gate */
+	.word 0       /* patchable */
+	.quad \target /* patchable */
 	.endm
 
 	.altmacro
@@ -171,7 +282,7 @@
 	.endm
 
 	.align 16
-	.global	idt
+	.global idt
 idt:
 	interrupt_gate exception_stub_0
 	interrupt_gate exception_stub_1
@@ -216,6 +327,25 @@
 	.word idt_end - idt - 1
 	.quad idt
 
-	.global exception_init_asm
+.section .text.exception_init_asm
+.globl exception_init_asm
+.type exception_init_asm, @function
+
 exception_init_asm:
+	/* Set up IDT entries */
+	mov $idt, %rax
+1:
+	movq 8(%rax), %rdi
+	movw %di, (%rax)   /* procedure entry point offset bits 0..15 */
+	shr $16, %rdi
+	movw %di, 6(%rax)  /* procedure entry point offset bits 16..31 */
+	shr $16, %rdi
+	movl %edi, 8(%rax) /* procedure entry point offset bits 32..63 */
+	movl $0, 12(%rax)  /* reserved */
+	add $16, %rax
+	cmp $idt_end, %rax
+	jne 1b
+
+	/* Load the IDT */
+	lidt idt_ptr
 	ret