soc/intel/common/cpu: Update COS mask calculation for NEM enhanced mode

Update the COS mask calculation to accomodate the RW data as per SoC
configuration. Currently only one way is allocated for RW data and
configured for non-eviction. For earlier platform this served fine,
and could accomodate a RW data up to 256Kb. Starting TGL and JSL, the
DCACHE_RAM_SIZE is configured for 512Kb, which cannot be mapped to a
single way. Hence update the number of ways to be configured for non-
eviction as per total LLC size.

The total LLC size/ number of ways gives the way size. DCACHE_RAM_SIZE/
way size gives the number of ways that need to be configured for non-
eviction, instead of harcoding it to 1.

TGL uses MSR IA32_CR_SF_QOS_MASK_1(0x1891) and IA32_CR_SF_QOS_MASK_2(0x1892)
as COS mask selection register and hence needs to be progarmmed accordingly.

Also JSL and TGL platforms the COS mask selection is mapped to bit 32:33
of MSR IA32_PQR_ASSOC(0xC8F) and need to be updated in edx(maps 63:32)
before MSR write instead of eax(maps 31:0). This implementation corrects
that as well.

BUG=b:149273819
TEST= Boot waddledoo(JSL), hatch(CML), Volteer(TGL)with NEM enhanced
      CAR configuration.

Signed-off-by: Aamir Bohra <aamir.bohra@intel.com>
Change-Id: I54e047161853bfc70516c1d607aa479e68836d04
Reviewed-on: https://review.coreboot.org/c/coreboot/+/43494
Tested-by: build bot (Jenkins) <no-reply@coreboot.org>
Reviewed-by: Shreesh Chhabbi <shreesh.chhabbi@intel.corp-partner.google.com>
Reviewed-by: Tim Wawrzynczak <twawrzynczak@chromium.org>
diff --git a/src/include/cpu/x86/msr.h b/src/include/cpu/x86/msr.h
index c761bc0..1573eef 100644
--- a/src/include/cpu/x86/msr.h
+++ b/src/include/cpu/x86/msr.h
@@ -90,6 +90,9 @@
 #define IA32_L3_MASK_1			0xc91
 #define IA32_L3_MASK_2			0xc92
 
+#define IA32_CR_SF_QOS_MASK_1           0x1891
+#define IA32_CR_SF_QOS_MASK_2           0x1892
+
 #ifndef __ASSEMBLER__
 #include <types.h>
 
diff --git a/src/soc/intel/cannonlake/Kconfig b/src/soc/intel/cannonlake/Kconfig
index 9ff2827..596de41 100644
--- a/src/soc/intel/cannonlake/Kconfig
+++ b/src/soc/intel/cannonlake/Kconfig
@@ -310,7 +310,7 @@
 config USE_CANNONLAKE_CAR_NEM_ENHANCED
 	bool "Enhanced Non-evict mode"
 	select SOC_INTEL_COMMON_BLOCK_CAR
-	select INTEL_CAR_NEM_ENHANCED
+	select USE_CAR_NEM_ENHANCED_V1
 	help
 	  A current limitation of NEM (Non-Evict mode) is that code and data
 	  sizes are derived from the requirement to not write out any modified
diff --git a/src/soc/intel/common/block/cpu/Kconfig b/src/soc/intel/common/block/cpu/Kconfig
index 3c29b24..1351cb8 100644
--- a/src/soc/intel/common/block/cpu/Kconfig
+++ b/src/soc/intel/common/block/cpu/Kconfig
@@ -51,6 +51,28 @@
 	  ENHANCED NEM guarantees that modified data is always
 	  kept in cache while clean data is replaced.
 
+config USE_CAR_NEM_ENHANCED_V1
+	bool
+	select INTEL_CAR_NEM_ENHANCED
+	help
+	  This config supports INTEL_CAR_NEM_ENHANCED mode on
+	  SKL, KBL, CNL, WHL, CML and ICL and JSL platforms.
+
+config USE_CAR_NEM_ENHANCED_V2
+	bool
+	select INTEL_CAR_NEM_ENHANCED
+	select COS_MAPPED_TO_MSB
+	help
+	  This config supports INTEL_CAR_NEM_ENHANCED mode on
+	  TGL  platform.
+
+config COS_MAPPED_TO_MSB
+	bool
+	depends on INTEL_CAR_NEM_ENHANCED
+	help
+	  On TGL and JSL platform the class of service configuration
+	  is mapped to MSB of MSR IA32_PQR_ASSOC.
+
 config USE_INTEL_FSP_MP_INIT
 	bool "Perform MP Initialization by FSP"
 	default n
diff --git a/src/soc/intel/common/block/cpu/car/cache_as_ram.S b/src/soc/intel/common/block/cpu/car/cache_as_ram.S
index 69ed174..5af1fc65 100644
--- a/src/soc/intel/common/block/cpu/car/cache_as_ram.S
+++ b/src/soc/intel/common/block/cpu/car/cache_as_ram.S
@@ -351,54 +351,96 @@
 	jnz	find_llc_subleaf
 
 	/*
-	 * Set MSR 0xC91 IA32_L3_MASK_1 = 0xE/0xFE/0xFFE/0xFFFE
-	 * for 4/8/16 way of LLC
-	*/
+	 * Calculate the total LLC size
+	 * (Line_Size + 1) * (Sets + 1) * (Partitions + 1) * (Ways + 1)
+	 * (EBX[11:0] + 1) * (ECX + 1) * (EBX[21:12] + 1) * EBX[31:22] + 1)
+	 */
+
+	mov	%ebx, %eax
+	and	$0xFFF, %eax
+	inc	%eax
+	inc	%ecx
+	mul	%ecx
+	mov	%eax, %ecx
+	mov	%ebx, %eax
+	shr	$12, %eax
+	and	$0x3FF, %eax
+	inc	%eax
+	mul	%ecx
 	shr	$22, %ebx
 	inc	%ebx
-	/* Calculate n-way associativity of LLC */
-	mov	%bl, %cl
+	mov	%ebx, %edx
+	mul	%ebx /* eax now holds total LLC size */
 
 	/*
-	 * Maximizing RO cacheability while locking in the CAR to a
-	 * single way since that particular way won't be victim candidate
-	 * for evictions.
-	 * This has been done after programming LLC_WAY_MASK_1 MSR
-	 * with desired LLC way as mentioned below.
-	 *
-	 * Hence create Code and Data Size as per request
-	 * Code Size (RO) : Up to 16M
-	 * Data Size (RW) : Up to 256K
+	 * The number of the ways that we want to protect from eviction
+	 * can be calculated as RW data stack size / way size where way
+	 * size is Total LLC size / Total number of LLC ways.
 	 */
-	movl	$0x01, %eax
+	div	%ebx /* way size */
+	mov	%eax, %ecx
+
 	/*
-	 * LLC Ways -> LLC_WAY_MASK_1:
-	 *  4: 0x000E
-	 *  8: 0x00FE
-	 * 12: 0x0FFE
-	 * 16: 0xFFFE
-	 *
-	 * These MSRs contain one bit per each way of LLC
+	 * Check if way size if bigger than the cache ram size.
+	 * Then we need to allocate just one way for non-eviction
+	 * of RW data.
+	 */
+       movl    $0x01, %eax
+       cmp     $CONFIG_DCACHE_RAM_SIZE, %ecx
+       jnc     set_eviction_mask
+
+	/*
+	 * RW data size / way size is equal to number of
+	 * ways to be configured for non-eviction
+	 */
+	mov     $CONFIG_DCACHE_RAM_SIZE, %eax
+	div	%ecx
+	mov	%eax, %ecx
+	movl	$0x01, %eax
+	shl	%cl, %eax
+	subl	$0x01, %eax
+
+set_eviction_mask:
+	mov	%ebx, %ecx /* back up the number of ways */
+	mov	%eax, %ebx /* back up the non-eviction mask*/
+	/*
+	 * Set MSR 0xC91 IA32_L3_MASK_1 or MSR 0x1891 IA32_CR_SF_QOS_MASK_1
+	 * This MSR contain one bit per each way of LLC
 	 * - If this bit is '0' - the way is protected from eviction
 	 * - If this bit is '1' - the way is not protected from eviction
 	 */
-	shl	%cl, %eax
-	subl	$0x02, %eax
+	mov     $0x1, %eax
+        shl     %cl, %eax
+        subl    $0x01, %eax
+        mov     %eax, %ecx
+        mov     %ebx, %eax
+
+	xor	$~0, %eax	/* invert 32 bits */
+	and	%ecx, %eax
+#if CONFIG(USE_CAR_NEM_ENHANCED_V1)
 	movl	$IA32_L3_MASK_1, %ecx
+#elif CONFIG(USE_CAR_NEM_ENHANCED_V2)
+	movl	$IA32_CR_SF_QOS_MASK_1, %ecx
+#endif
+	xorl	%edx, %edx
+	wrmsr
+
+	/*
+	 * Set MSR 0xC92 IA32_L3_MASK_1 or MSR 0x1892 IA32_CR_SF_QOS_MASK_2
+	 * This MSR contain one bit per each way of LLC
+	 * - If this bit is '0' - the way is protected from eviction
+	 * - If this bit is '1' - the way is not protected from eviction
+	 */
+	mov	%ebx, %eax
+#if CONFIG(USE_CAR_NEM_ENHANCED_V1)
+	movl	$IA32_L3_MASK_2, %ecx
+#elif CONFIG(USE_CAR_NEM_ENHANCED_V2)
+	movl	$IA32_CR_SF_QOS_MASK_2, %ecx
+#endif
 	xorl	%edx, %edx
 	wrmsr
 	/*
-	 * Set MSR 0xC92 IA32_L3_MASK_2 = 0x1
-	 *
-	 * For SKL SOC, data size remains 256K consistently.
-	 * Hence, creating 1-way associative cache for Data
-	*/
-	mov	$IA32_L3_MASK_2, %ecx
-	mov	$0x01, %eax
-	xorl	%edx, %edx
-	wrmsr
-	/*
-	 * Set IA32_PQR_ASSOC = 0x02
+	 * Set IA32_PQR_ASSOC
 	 *
 	 * Possible values:
 	 * 0: Default value, no way mask should be applied
@@ -407,8 +449,13 @@
 	 * 3: Shouldn't be use in NEM Mode
 	 */
 	movl	$IA32_PQR_ASSOC, %ecx
-	movl	$0x02, %eax
+	xorl	%eax, %eax
 	xorl	%edx, %edx
+#if CONFIG(COS_MAPPED_TO_MSB)
+	movl	$0x02, %edx
+#else
+	movl	$0x02, %eax
+#endif
 	wrmsr
 
 	movl	$CONFIG_DCACHE_RAM_BASE, %edi
@@ -418,13 +465,17 @@
 	cld
 	rep	stosl
 	/*
-	 * Set IA32_PQR_ASSOC = 0x01
+	 * Set IA32_PQR_ASSOC
 	 * At this stage we apply LLC_WAY_MASK_1 to the cache.
-	 * i.e. way 0 is protected from eviction.
 	*/
 	movl	$IA32_PQR_ASSOC, %ecx
-	movl	$0x01, %eax
+	xorl	%eax, %eax
 	xorl	%edx, %edx
+#if CONFIG(COS_MAPPED_TO_MSB)
+	movl	$0x01, %edx
+#else
+	movl	$0x01, %eax
+#endif
 	wrmsr
 
 	post_code(0x27)
diff --git a/src/soc/intel/denverton_ns/Kconfig b/src/soc/intel/denverton_ns/Kconfig
index 33635b3..91b45dc 100644
--- a/src/soc/intel/denverton_ns/Kconfig
+++ b/src/soc/intel/denverton_ns/Kconfig
@@ -161,7 +161,7 @@
 	depends on !FSP_CAR
 	default y
 	select SOC_INTEL_COMMON_BLOCK_CAR
-	select INTEL_CAR_NEM_ENHANCED
+	select USE_CAR_NEM_ENHANCED_V1
 	help
 	  A current limitation of NEM (Non-Evict mode) is that code and data sizes
 	  are derived from the requirement to not write out any modified cache line.
diff --git a/src/soc/intel/icelake/Kconfig b/src/soc/intel/icelake/Kconfig
index 1230675..1e66e97 100644
--- a/src/soc/intel/icelake/Kconfig
+++ b/src/soc/intel/icelake/Kconfig
@@ -52,7 +52,6 @@
 	select SOC_INTEL_COMMON_PCH_BASE
 	select SOC_INTEL_COMMON_RESET
 	select SOC_INTEL_COMMON_BLOCK_CAR
-	select INTEL_CAR_NEM_ENHANCED
 	select SSE2
 	select SUPPORT_CPU_UCODE_IN_CBFS
 	select TSC_MONOTONIC_TIMER
@@ -61,6 +60,7 @@
 	select DISPLAY_FSP_VERSION_INFO
 	select HECI_DISABLE_USING_SMM
 	select USE_INTEL_FSP_TO_CALL_COREBOOT_PUBLISH_MP_PPI
+	select USE_CAR_NEM_ENHANCED_V1
 
 config DCACHE_RAM_BASE
 	default 0xfef00000
diff --git a/src/soc/intel/skylake/Kconfig b/src/soc/intel/skylake/Kconfig
index 9f9cb18..db0f688 100644
--- a/src/soc/intel/skylake/Kconfig
+++ b/src/soc/intel/skylake/Kconfig
@@ -35,7 +35,6 @@
 	select HAVE_FSP_LOGO_SUPPORT
 	select INTEL_DESCRIPTOR_MODE_CAPABLE
 	select HAVE_SMI_HANDLER
-	select INTEL_CAR_NEM_ENHANCED
 	select INTEL_GMA_ACPI
 	select INTEL_GMA_ADD_VBT if RUN_FSP_GOP
 	select HAVE_INTEL_FSP_REPO
@@ -79,6 +78,7 @@
 	select TSC_SYNC_MFENCE
 	select UDELAY_TSC
 	select UDK_2015_BINDING
+	select USE_CAR_NEM_ENHANCED_V1
 
 config FSP_HYPERTHREADING
 	bool "Enable Hyper-Threading"