soc/intel/common/cpu: Update COS mask calculation for NEM enhanced mode

Update the COS mask calculation to accomodate the RW data as per SoC
configuration. Currently only one way is allocated for RW data and
configured for non-eviction. For earlier platform this served fine,
and could accomodate a RW data up to 256Kb. Starting TGL and JSL, the
DCACHE_RAM_SIZE is configured for 512Kb, which cannot be mapped to a
single way. Hence update the number of ways to be configured for non-
eviction as per total LLC size.

The total LLC size/ number of ways gives the way size. DCACHE_RAM_SIZE/
way size gives the number of ways that need to be configured for non-
eviction, instead of harcoding it to 1.

TGL uses MSR IA32_CR_SF_QOS_MASK_1(0x1891) and IA32_CR_SF_QOS_MASK_2(0x1892)
as COS mask selection register and hence needs to be progarmmed accordingly.

Also JSL and TGL platforms the COS mask selection is mapped to bit 32:33
of MSR IA32_PQR_ASSOC(0xC8F) and need to be updated in edx(maps 63:32)
before MSR write instead of eax(maps 31:0). This implementation corrects
that as well.

BUG=b:149273819
TEST= Boot waddledoo(JSL), hatch(CML), Volteer(TGL)with NEM enhanced
      CAR configuration.

Signed-off-by: Aamir Bohra <aamir.bohra@intel.com>
Change-Id: I54e047161853bfc70516c1d607aa479e68836d04
Reviewed-on: https://review.coreboot.org/c/coreboot/+/43494
Tested-by: build bot (Jenkins) <no-reply@coreboot.org>
Reviewed-by: Shreesh Chhabbi <shreesh.chhabbi@intel.corp-partner.google.com>
Reviewed-by: Tim Wawrzynczak <twawrzynczak@chromium.org>
diff --git a/src/soc/intel/common/block/cpu/car/cache_as_ram.S b/src/soc/intel/common/block/cpu/car/cache_as_ram.S
index 69ed174..5af1fc65 100644
--- a/src/soc/intel/common/block/cpu/car/cache_as_ram.S
+++ b/src/soc/intel/common/block/cpu/car/cache_as_ram.S
@@ -351,54 +351,96 @@
 	jnz	find_llc_subleaf
 
 	/*
-	 * Set MSR 0xC91 IA32_L3_MASK_1 = 0xE/0xFE/0xFFE/0xFFFE
-	 * for 4/8/16 way of LLC
-	*/
+	 * Calculate the total LLC size
+	 * (Line_Size + 1) * (Sets + 1) * (Partitions + 1) * (Ways + 1)
+	 * (EBX[11:0] + 1) * (ECX + 1) * (EBX[21:12] + 1) * EBX[31:22] + 1)
+	 */
+
+	mov	%ebx, %eax
+	and	$0xFFF, %eax
+	inc	%eax
+	inc	%ecx
+	mul	%ecx
+	mov	%eax, %ecx
+	mov	%ebx, %eax
+	shr	$12, %eax
+	and	$0x3FF, %eax
+	inc	%eax
+	mul	%ecx
 	shr	$22, %ebx
 	inc	%ebx
-	/* Calculate n-way associativity of LLC */
-	mov	%bl, %cl
+	mov	%ebx, %edx
+	mul	%ebx /* eax now holds total LLC size */
 
 	/*
-	 * Maximizing RO cacheability while locking in the CAR to a
-	 * single way since that particular way won't be victim candidate
-	 * for evictions.
-	 * This has been done after programming LLC_WAY_MASK_1 MSR
-	 * with desired LLC way as mentioned below.
-	 *
-	 * Hence create Code and Data Size as per request
-	 * Code Size (RO) : Up to 16M
-	 * Data Size (RW) : Up to 256K
+	 * The number of the ways that we want to protect from eviction
+	 * can be calculated as RW data stack size / way size where way
+	 * size is Total LLC size / Total number of LLC ways.
 	 */
-	movl	$0x01, %eax
+	div	%ebx /* way size */
+	mov	%eax, %ecx
+
 	/*
-	 * LLC Ways -> LLC_WAY_MASK_1:
-	 *  4: 0x000E
-	 *  8: 0x00FE
-	 * 12: 0x0FFE
-	 * 16: 0xFFFE
-	 *
-	 * These MSRs contain one bit per each way of LLC
+	 * Check if way size if bigger than the cache ram size.
+	 * Then we need to allocate just one way for non-eviction
+	 * of RW data.
+	 */
+       movl    $0x01, %eax
+       cmp     $CONFIG_DCACHE_RAM_SIZE, %ecx
+       jnc     set_eviction_mask
+
+	/*
+	 * RW data size / way size is equal to number of
+	 * ways to be configured for non-eviction
+	 */
+	mov     $CONFIG_DCACHE_RAM_SIZE, %eax
+	div	%ecx
+	mov	%eax, %ecx
+	movl	$0x01, %eax
+	shl	%cl, %eax
+	subl	$0x01, %eax
+
+set_eviction_mask:
+	mov	%ebx, %ecx /* back up the number of ways */
+	mov	%eax, %ebx /* back up the non-eviction mask*/
+	/*
+	 * Set MSR 0xC91 IA32_L3_MASK_1 or MSR 0x1891 IA32_CR_SF_QOS_MASK_1
+	 * This MSR contain one bit per each way of LLC
 	 * - If this bit is '0' - the way is protected from eviction
 	 * - If this bit is '1' - the way is not protected from eviction
 	 */
-	shl	%cl, %eax
-	subl	$0x02, %eax
+	mov     $0x1, %eax
+        shl     %cl, %eax
+        subl    $0x01, %eax
+        mov     %eax, %ecx
+        mov     %ebx, %eax
+
+	xor	$~0, %eax	/* invert 32 bits */
+	and	%ecx, %eax
+#if CONFIG(USE_CAR_NEM_ENHANCED_V1)
 	movl	$IA32_L3_MASK_1, %ecx
+#elif CONFIG(USE_CAR_NEM_ENHANCED_V2)
+	movl	$IA32_CR_SF_QOS_MASK_1, %ecx
+#endif
+	xorl	%edx, %edx
+	wrmsr
+
+	/*
+	 * Set MSR 0xC92 IA32_L3_MASK_1 or MSR 0x1892 IA32_CR_SF_QOS_MASK_2
+	 * This MSR contain one bit per each way of LLC
+	 * - If this bit is '0' - the way is protected from eviction
+	 * - If this bit is '1' - the way is not protected from eviction
+	 */
+	mov	%ebx, %eax
+#if CONFIG(USE_CAR_NEM_ENHANCED_V1)
+	movl	$IA32_L3_MASK_2, %ecx
+#elif CONFIG(USE_CAR_NEM_ENHANCED_V2)
+	movl	$IA32_CR_SF_QOS_MASK_2, %ecx
+#endif
 	xorl	%edx, %edx
 	wrmsr
 	/*
-	 * Set MSR 0xC92 IA32_L3_MASK_2 = 0x1
-	 *
-	 * For SKL SOC, data size remains 256K consistently.
-	 * Hence, creating 1-way associative cache for Data
-	*/
-	mov	$IA32_L3_MASK_2, %ecx
-	mov	$0x01, %eax
-	xorl	%edx, %edx
-	wrmsr
-	/*
-	 * Set IA32_PQR_ASSOC = 0x02
+	 * Set IA32_PQR_ASSOC
 	 *
 	 * Possible values:
 	 * 0: Default value, no way mask should be applied
@@ -407,8 +449,13 @@
 	 * 3: Shouldn't be use in NEM Mode
 	 */
 	movl	$IA32_PQR_ASSOC, %ecx
-	movl	$0x02, %eax
+	xorl	%eax, %eax
 	xorl	%edx, %edx
+#if CONFIG(COS_MAPPED_TO_MSB)
+	movl	$0x02, %edx
+#else
+	movl	$0x02, %eax
+#endif
 	wrmsr
 
 	movl	$CONFIG_DCACHE_RAM_BASE, %edi
@@ -418,13 +465,17 @@
 	cld
 	rep	stosl
 	/*
-	 * Set IA32_PQR_ASSOC = 0x01
+	 * Set IA32_PQR_ASSOC
 	 * At this stage we apply LLC_WAY_MASK_1 to the cache.
-	 * i.e. way 0 is protected from eviction.
 	*/
 	movl	$IA32_PQR_ASSOC, %ecx
-	movl	$0x01, %eax
+	xorl	%eax, %eax
 	xorl	%edx, %edx
+#if CONFIG(COS_MAPPED_TO_MSB)
+	movl	$0x01, %edx
+#else
+	movl	$0x01, %eax
+#endif
 	wrmsr
 
 	post_code(0x27)