cpu/x86: Set up a separate stack for APs

APs use a lot less stack, so set up a separate stack for those in .bss.

Now that CPU_INFO_V2 is the only code path that is used, there is no
need to align stacks in c_start.S.

Change-Id: I7a681a2e3003da0400843daa5d6d6180d952abf5
Signed-off-by: Arthur Heymans <arthur@aheymans.xyz>
Reviewed-on: https://review.coreboot.org/c/coreboot/+/69123
Tested-by: build bot (Jenkins) <no-reply@coreboot.org>
Reviewed-by: Angel Pons <th3fanbus@gmail.com>
Reviewed-by: Werner Zeh <werner.zeh@siemens.com>
diff --git a/src/arch/x86/c_start.S b/src/arch/x86/c_start.S
index 5b7052e..52aeb19 100644
--- a/src/arch/x86/c_start.S
+++ b/src/arch/x86/c_start.S
@@ -10,11 +10,9 @@
 .global _estack
 .global _stack_size
 
-/* Stack alignment is not enforced with rmodule loader, reserve one
- * extra CPU such that alignment can be enforced on entry. */
-.align CONFIG_STACK_SIZE
+.align 16
 _stack:
-.space (CONFIG_MAX_CPUS+1)*CONFIG_STACK_SIZE
+.space CONFIG_STACK_SIZE
 _estack:
 .set _stack_size, _estack - _stack
 
@@ -75,7 +73,7 @@
 
 	/* Set new stack with enforced alignment. */
 	movl	$_estack, %esp
-	andl	$(~(CONFIG_STACK_SIZE-1)), %esp
+	andl	$(0xfffffff0), %esp
 
 	/*
 	 *	Now we are finished. Memory is up, data is copied and
diff --git a/src/cpu/x86/Kconfig b/src/cpu/x86/Kconfig
index 2253e18..7e58175 100644
--- a/src/cpu/x86/Kconfig
+++ b/src/cpu/x86/Kconfig
@@ -202,4 +202,11 @@
 	  However, modern OSes use PAT to control cacheability instead of
 	  using MTRRs.
 
+config AP_STACK_SIZE
+	hex
+	default 0x800
+	help
+	  This is the amount of stack each AP needs. The BSP stack size can be
+	  larger and is set with STACK_SIZE.
+
 endif # ARCH_X86
diff --git a/src/cpu/x86/mp_init.c b/src/cpu/x86/mp_init.c
index f004185..507da7d 100644
--- a/src/cpu/x86/mp_init.c
+++ b/src/cpu/x86/mp_init.c
@@ -215,6 +215,8 @@
 	park_this_cpu(NULL);
 }
 
+static __aligned(16) uint8_t ap_stack[CONFIG_AP_STACK_SIZE * CONFIG_MAX_CPUS];
+
 static void setup_default_sipi_vector_params(struct sipi_params *sp)
 {
 	sp->gdt = (uintptr_t)&gdt;
@@ -222,8 +224,8 @@
 	sp->idt_ptr = (uintptr_t)&idtarg;
 	sp->per_cpu_segment_descriptors = (uintptr_t)&per_cpu_segment_descriptors;
 	sp->per_cpu_segment_selector = per_cpu_segment_selector;
-	sp->stack_size = CONFIG_STACK_SIZE;
-	sp->stack_top = ALIGN_DOWN((uintptr_t)&_estack, CONFIG_STACK_SIZE);
+	sp->stack_size = CONFIG_AP_STACK_SIZE;
+	sp->stack_top = (uintptr_t)ap_stack + ARRAY_SIZE(ap_stack);
 }
 
 static const unsigned int fixed_mtrrs[NUM_FIXED_MTRRS] = {