arch/x86,cpu/x86: Introduce new method for accessing cpu_info

There is currently a fundamental flaw in the current cpu_info()
implementation. It assumes that current stack is CONFIG_STACK_SIZE
aligned. This assumption breaks down when performing SMM relocation.

The first step in performing SMM relocation is changing the SMBASE. This
is accomplished by installing the smmstub at 0x00038000, which is the
default SMM entry point. The stub is configured to set up a new stack
with the size of 1 KiB (CONFIG_SMM_STUB_STACK_SIZE), and an entry point
of smm_do_relocation located in RAMSTAGE RAM.

This means that when smm_do_relocation is executed, it is running in SMM
with a different sized stack. When cpu_info() gets called it will be
using CONFIG_STACK_SIZE to calculate the location of the cpu_info
struct. This results in reading random memory. Since cpu_info() has to
run in multiple environments, we can't use a compile time constant to
locate the cpu_info struct.

This CL introduces a new way of locating cpu_info. It uses a per-cpu
segment descriptor that points to a per-cpu segment that is allocated on
the stack. By using a segment descriptor to point to the per-cpu data,
we no longer need to calculate the location of the cpu_info struct. This
has the following advantages:
* Stacks no longer need to be CONFIG_STACK_SIZE aligned.
* Accessing an unconfigured segment will result in an exception. This
  ensures no one can call cpu_info() from an unsupported environment.
* Segment selectors are cleared when entering SMM and restored when
  leaving SMM.
* There is a 1:1 mapping between cpu and cpu_info. When using
  COOP_MULTITASKING, a new cpu_info is currently allocated at the top of
  each thread's stack. This no longer needs to happen.

This CL guards most of the code with CONFIG(CPU_INFO_V2). I did this so
reviewers can feel more comfortable knowing most of the CL is a no-op. I
would eventually like to remove most of the guards though.

This CL does not touch the LEGACY_SMP_INIT code path. I don't have any
way of testing it.

The %gs segment was chosen over the %fs segment because it's what the
linux kernel uses for per-cpu data in x86_64 mode.

BUG=b:194391185, b:179699789
TEST=Boot guybrush with CPU_INFO_V2 and verify BSP and APs have correct
%gs segment. Verify cpu_info looks sane. Verify booting to the OS
works correctly with COOP_MULTITASKING enabled.

Signed-off-by: Raul E Rangel <rrangel@chromium.org>
Change-Id: I79dce9597cb784acb39a96897fb3c2f2973bfd98
Reviewed-on: https://review.coreboot.org/c/coreboot/+/57627
Tested-by: build bot (Jenkins) <no-reply@coreboot.org>
Reviewed-by: Eric Peers <epeers@google.com>
Reviewed-by: Karthik Ramasubramanian <kramasub@google.com>
diff --git a/src/arch/x86/c_start.S b/src/arch/x86/c_start.S
index cb7d504..9e718fc 100644
--- a/src/arch/x86/c_start.S
+++ b/src/arch/x86/c_start.S
@@ -80,6 +80,20 @@
 
 	push_cpu_info
 
+#if CONFIG(CPU_INFO_V2)
+	/* Allocate the per_cpu_segment_data on the stack */
+	push_per_cpu_segment_data
+
+	/*
+	 * Update the BSP's per_cpu_segment_descriptor to point to the
+	 * per_cpu_segment_data that was allocated on the stack.
+	 */
+	set_segment_descriptor_base $per_cpu_segment_descriptors, %esp
+
+	mov	per_cpu_segment_selector, %eax
+	mov	%eax, %gs
+#endif
+
 	/*
 	 *	Now we are finished. Memory is up, data is copied and
 	 *	bss is cleared.   Now we call the main routine and
@@ -127,6 +141,7 @@
 #endif
 
 	.globl gdt, gdt_end
+	.global per_cpu_segment_descriptors, per_cpu_segment_selector
 
 gdtaddr:
 	.word	gdt_end - gdt - 1
@@ -136,7 +151,7 @@
 	.long	gdt		/* we know the offset */
 #endif
 
-	 .data
+	.data
 
 	/* This is the gdt for GCC part of coreboot.
 	 * It is different from the gdt in ASM part of coreboot
@@ -206,8 +221,26 @@
 	.word	0xffff, 0x0000
 	.byte	0x00, 0x9b, 0xaf, 0x00
 #endif
+#if CONFIG(CPU_INFO_V2)
+per_cpu_segment_descriptors:
+	.rept CONFIG_MAX_CPUS
+	/* flat data segment */
+	.word	0xffff, 0x0000
+#if ENV_X86_64
+	.byte	0x00, 0x92, 0xcf, 0x00
+#else
+	.byte	0x00, 0x93, 0xcf, 0x00
+#endif
+	.endr
+#endif /* CPU_INFO_V2 */
 gdt_end:
 
+#if CONFIG(CPU_INFO_V2)
+/* Segment selector pointing to the first per_cpu_segment_descriptor. */
+per_cpu_segment_selector:
+	.long	per_cpu_segment_descriptors - gdt
+#endif /* CPU_INFO_V2 */
+
 	.section ".text._start", "ax", @progbits
 #if ENV_X86_64
 SetCodeSelector: