treewide: Disable R_AMD64_32S relocation support

This fixes a hard to debug hang that could occur in any stage, but in
the end it follows simple rules and is easy to fix.

In long mode the 32bit displacement addressing used on 'mov' and 'lea'
instructions is sign-extended. Those instructions can be found using
readelf on the stage and searching for relocation type R_X86_64_32S.

The sign extension is no issue when either running in protected mode or
the code module and thus the address is below 2GiB. If the address is
greater than 2GiB, as usually the case for code in TSEG, the higher
address bits [64:32] are all set to 1 and the effective address is
pointing to memory not paged. Accessing this memory will cause a page
fault, which isn't handled either.

To prevent such problems
- disable R_AMD64_32S relocations in rmodtool
- add comment explaining why it's not allowed
- use the pseudo op movabs, which doesn't use 32bit displacement addressing
- Print a useful error message if such a reloc is present in the code

Fixes a crash in TSEG and when in long mode seen on Intel Sandybridge.

Change-Id: Ia5f5a9cde7c325f67b12e3a8e9a76283cc3870a3
Signed-off-by: Patrick Rudolph <patrick.rudolph@9elements.com>
Reviewed-on: https://review.coreboot.org/c/coreboot/+/55448
Tested-by: build bot (Jenkins) <no-reply@coreboot.org>
Reviewed-by: Arthur Heymans <arthur@aheymans.xyz>
diff --git a/src/arch/x86/c_start.S b/src/arch/x86/c_start.S
index f55ab6e..b7ffddc 100644
--- a/src/arch/x86/c_start.S
+++ b/src/arch/x86/c_start.S
@@ -32,8 +32,11 @@
 	.globl _start
 _start:
 	cli
+#ifdef __x86_64__
+	movabs	$gdtaddr, %rax
+	lgdt	(%rax)
+#else
 	lgdt	%cs:gdtaddr
-#ifndef __x86_64__
 	ljmp	$RAM_CODE_SEG, $1f
 #endif
 1:	movl	$RAM_DATA_SEG, %eax
@@ -52,11 +55,14 @@
 	cld
 
 #ifdef	__x86_64__
-	mov	%rdi, _cbmem_top_ptr
+	mov	%rdi, %rax
+	movabs	%rax, _cbmem_top_ptr
+	movabs	$_stack, %rdi
 #else
 	/* The return argument is at 0(%esp), the calling argument at 4(%esp) */
 	movl	4(%esp), %eax
 	movl	%eax, _cbmem_top_ptr
+	leal	_stack, %edi
 #endif
 
 	/** poison the stack. Code should not count on the
@@ -64,7 +70,6 @@
 	 * recently uncovered a bug in the broadcast SIPI
 	 * code.
 	 */
-	leal	_stack, %edi
 	movl	$_estack, %ecx
 	subl	%edi, %ecx
 	shrl	$2, %ecx   /* it is 32 bit aligned, right? */
@@ -226,7 +231,7 @@
 	push	%rsp
 	pushfq
 	push	%rcx		# cx is code segment selector from caller
-	mov	$setCodeSelectorLongJump, %rax
+	movabs	$setCodeSelectorLongJump, %rax
 	push	%rax
 
 	# the iret will continue at next instruction, with the new cs value
diff --git a/src/arch/x86/exit_car.S b/src/arch/x86/exit_car.S
index fae7899..d1b1a53 100644
--- a/src/arch/x86/exit_car.S
+++ b/src/arch/x86/exit_car.S
@@ -49,7 +49,8 @@
 #endif
 
 #ifdef	__x86_64__
-	mov	%rdi, _cbmem_top_ptr
+	mov	%rdi, %rax
+	movabs %rax, _cbmem_top_ptr
 #else
 	/* The return argument is at 0(%esp), the calling argument at 4(%esp) */
 	movl	4(%esp), %eax
@@ -60,7 +61,12 @@
 	cpuid
 	btl	$CPUID_FEATURE_CLFLUSH_BIT, %edx
 	jnc	skip_clflush
+#ifdef	__x86_64__
+	movabs	_cbmem_top_ptr, %rax
+	clflush	(%rax)
+#else
 	clflush	_cbmem_top_ptr
+#endif
 
 skip_clflush:
 	/* chipset_teardown_car() is expected to disable cache-as-ram. */
@@ -71,17 +77,24 @@
 	mov	%cr0, %rax
 	and     $(~(CR0_CD | CR0_NW)), %eax
 	mov	%rax, %cr0
+
+	/* Ensure cache is clean. */
+	invd
+
+	/* Set up new stack. */
+	movabs	post_car_stack_top, %rax
+	mov	%rax, %rsp
 #else
 	mov	%cr0, %eax
 	and	$(~(CR0_CD | CR0_NW)), %eax
 	mov	%eax, %cr0
-#endif
+
 	/* Ensure cache is clean. */
 	invd
 
 	/* Set up new stack. */
 	mov	post_car_stack_top, %esp
-
+#endif
 	/*
 	 * Honor variable MTRR information pushed on the stack with the
 	 * following layout:
diff --git a/src/arch/x86/gdt_init.S b/src/arch/x86/gdt_init.S
index f33a151..30b3965 100644
--- a/src/arch/x86/gdt_init.S
+++ b/src/arch/x86/gdt_init.S
@@ -23,18 +23,6 @@
 .section .init._gdt64_, "ax", @progbits
 	.globl gdt_init64
 gdt_init64:
-	/* Workaround a bug in the assembler.
-	 * The following code doesn't work:
-	 * 	lgdt gdtptr64
-	 *
-	 * The assembler tries to save memory by using 32bit displacement addressing mode.
-	 * Displacements are using signed integers.
-	 * This is fine in protected mode, as the negative address points to the correct
-	 * address > 2GiB, but in long mode this doesn't work at all.
-	 * Tests showed that QEMU can gracefully handle it, but real CPUs can't.
-	 *
-	 * Use the movabs pseudo instruction to force using a 64bit absolute address.
-	 */
 	movabs	$gdtptr64, %rax
 	lgdt	(%rax)
 	ret
diff --git a/src/cpu/x86/lapic/secondary.S b/src/cpu/x86/lapic/secondary.S
index 073e6b4..d36bc9a 100644
--- a/src/cpu/x86/lapic/secondary.S
+++ b/src/cpu/x86/lapic/secondary.S
@@ -61,9 +61,11 @@
 #if ENV_X86_64
 	/* entry64.inc preserves ebx. */
 	#include <cpu/x86/64bit/entry64.inc>
-	mov	secondary_stack, %rsp
+	movabs	secondary_stack, %rax
+	mov	%rax, %rsp
 	andl	$0xfffffff0, %esp
-	mov	secondary_cpu_index, %rdi
+	movabs	secondary_cpu_index, %rax
+	mov	%rax, %rdi
 #else
 	/* Set the stack pointer, and flag that we are done */
 	xorl	%eax, %eax
diff --git a/src/cpu/x86/sipi_vector.S b/src/cpu/x86/sipi_vector.S
index f9b2957..d8156b8 100644
--- a/src/cpu/x86/sipi_vector.S
+++ b/src/cpu/x86/sipi_vector.S
@@ -220,7 +220,7 @@
 
 	mov	%rsi, %rdi	/* cpu_num */
 
-	movl	c_handler, %eax
+	movabs	c_handler, %eax
 	call	*%rax
 #else
 	/* c_handler(cpu_num), preserve proper stack alignment */
diff --git a/src/cpu/x86/smm/smm_stub.S b/src/cpu/x86/smm/smm_stub.S
index 0c690da..07be047 100644
--- a/src/cpu/x86/smm/smm_stub.S
+++ b/src/cpu/x86/smm/smm_stub.S
@@ -210,7 +210,7 @@
 
 	mov	%rsp, %rdi	/* *arg */
 
-	movl	c_handler, %eax
+	movabs	c_handler, %eax
 	call	*%rax
 
 	/*
diff --git a/util/cbfstool/rmodule.c b/util/cbfstool/rmodule.c
index 4ac2951..ce00510 100644
--- a/util/cbfstool/rmodule.c
+++ b/util/cbfstool/rmodule.c
@@ -38,10 +38,16 @@
 
 	type = ELF64_R_TYPE(rel->r_info);
 
-	/* Only these 6 relocations are expected to be found. */
+	/*
+	 * Relocation R_AMD64_32S is not allowed. It can only be safely used in protected mode,
+	 * and when the address pointed to is below 2 GiB in long mode.
+	 * Using it in assembly operations will break compilation with error:
+	 * E: Invalid reloc type: 11
+	 */
+
+	/* Only these 5 relocations are expected to be found. */
 	return (type == R_AMD64_64 ||
 		type == R_AMD64_PC64 ||
-		type == R_AMD64_32S ||
 		type == R_AMD64_32 ||
 		type == R_AMD64_PC32 ||
 	/*
@@ -60,7 +66,6 @@
 
 	/* Only emit absolute relocations */
 	return (type == R_AMD64_64 ||
-		type == R_AMD64_32S ||
 		type == R_AMD64_32);
 }
 
@@ -182,6 +187,10 @@
 			if (!ctx->ops->valid_type(r)) {
 				ERROR("Invalid reloc type: %u\n",
 				      (unsigned int)ELF64_R_TYPE(r->r_info));
+				if ((ctx->ops->arch == EM_X86_64) &&
+				    (ELF64_R_TYPE(r->r_info) == R_AMD64_32S))
+					ERROR("Illegal use of 32bit sign extended addressing at offset 0x%x\n",
+					      (unsigned int)r->r_offset);
 				return -1;
 			}