arch/x86/gdt: Work around assembler bug

The GDT loading did work fine on x86_64 a few months ago, but today it
only works in QEMU, but not on real hardware or KVM-enabled QEMU. This
might be related to toolchain changes.

Use 64bit GDT loading on x86_64 and force the assembler to generate a
64bit address load on the GDT. This will make sure no 32bit (signed)
displacement op is being generated, which points to the wrong address
in longmode.

Verified using readelf and made sure no R_X86_64_32S relocation symbol
is emitted. Disassembled the romstage ELF and made sure the GDT address
is 64bit in size.

Tested on QEMU and KVM-enabled QEMU: Doesn't crash any more on KVM.

Signed-off-by: Patrick Rudolph <siro@das-labor.org>
Change-Id: Ia824f90d9611e6e8db09bd62a05e6f990581f09a
Reviewed-on: https://review.coreboot.org/c/coreboot/+/43136
Tested-by: build bot (Jenkins) <no-reply@coreboot.org>
Reviewed-by: Angel Pons <th3fanbus@gmail.com>
diff --git a/src/arch/x86/assembly_entry.S b/src/arch/x86/assembly_entry.S
index 0d8307b..31670c2 100644
--- a/src/arch/x86/assembly_entry.S
+++ b/src/arch/x86/assembly_entry.S
@@ -15,12 +15,22 @@
 	#define _STACK_TOP _ecar_stack
 #endif
 
+#ifdef __x86_64__
+.code64
+#else
+.code32
+#endif
+
 .section ".text._start", "ax", @progbits
 .global _start
 _start:
 
 	/* Migrate GDT to this text segment */
+#ifdef __x86_64__
+	call	gdt_init64
+#else
 	call	gdt_init
+#endif
 
 	/* reset stack pointer to CAR/EARLYRAM stack */
 	mov	$_STACK_TOP, %esp
diff --git a/src/arch/x86/gdt_init.S b/src/arch/x86/gdt_init.S
index 7dd4b94..1558ac6 100644
--- a/src/arch/x86/gdt_init.S
+++ b/src/arch/x86/gdt_init.S
@@ -20,7 +20,20 @@
 .section ".text._gdt64_", "ax", @progbits
 	.globl gdt_init64
 gdt_init64:
-	lgdt	gdtptr64
+	/* Workaround a bug in the assembler.
+	 * The following code doesn't work:
+	 * 	lgdt gdtptr64
+	 *
+	 * The assembler tries to save memory by using 32bit displacement addressing mode.
+	 * Displacements are using signed integers.
+	 * This is fine in protected mode, as the negative address points to the correct
+	 * address > 2GiB, but in long mode this doesn't work at all.
+	 * Tests showed that QEMU can gracefully handle it, but real CPUs can't.
+	 *
+	 * Use the movabs pseudo instruction to force using a 64bit absolute address.
+	 */
+	movabs	$gdtptr64, %rax
+	lgdt	(%rax)
 	ret
 
 .previous