Patrick Georgi | ac95903 | 2020-05-05 22:49:26 +0200 | [diff] [blame] | 1 | /* SPDX-License-Identifier: GPL-2.0-or-later */ |
Keith Hui | 1ac19e2 | 2011-07-27 23:06:16 -0400 | [diff] [blame] | 2 | |
| 3 | /* |
| 4 | * Intel Pentium L2 Cache initialization. |
| 5 | * This code was developed by reverse engineering |
| 6 | * the BIOS. Where the code accesses documented |
| 7 | * registers I have added comments as best I can. |
| 8 | * Some undocumented registers on the Pentium II are |
| 9 | * used so some of the documentation is incomplete. |
| 10 | * |
| 11 | * References: |
| 12 | * Intel Architecture Software Developer's Manual |
| 13 | * Volume 3B: System Programming Guide, Part 2 (#253669) |
| 14 | * Appendix B.9 |
| 15 | */ |
| 16 | |
| 17 | /* This code is ported from coreboot v1. |
Martin Roth | 4c3ab73 | 2013-07-08 16:23:54 -0600 | [diff] [blame] | 18 | * The L2 cache initialization sequence here only apply to SECC/SECC2 P6 family |
Keith Hui | 1ac19e2 | 2011-07-27 23:06:16 -0400 | [diff] [blame] | 19 | * CPUs with Klamath (63x), Deschutes (65x) and Katmai (67x) cores. |
| 20 | * It is not required for Coppermine (68x) and Tualatin (6bx) cores. |
| 21 | * It is currently not known if Celerons with Mendocino (66x) core require the |
| 22 | * special initialization. |
| 23 | * Covington-core Celerons do not have L2 cache. |
| 24 | */ |
| 25 | |
| 26 | #include <stdint.h> |
Felix Held | 1fb2e1e | 2021-09-16 22:05:52 +0200 | [diff] [blame] | 27 | #include <arch/cpu.h> |
Keith Hui | 1ac19e2 | 2011-07-27 23:06:16 -0400 | [diff] [blame] | 28 | #include <console/console.h> |
Keith Hui | 1ac19e2 | 2011-07-27 23:06:16 -0400 | [diff] [blame] | 29 | #include <cpu/intel/l2_cache.h> |
| 30 | #include <cpu/x86/cache.h> |
| 31 | #include <cpu/x86/msr.h> |
| 32 | |
| 33 | /* Latency Tables */ |
| 34 | struct latency_entry { |
| 35 | u8 key; |
| 36 | u8 value; |
| 37 | }; |
| 38 | /* |
| 39 | Latency maps for Deschutes and Katmai. |
| 40 | No such mapping is available for Klamath. |
| 41 | |
| 42 | Cache latency to |
| 43 | be written to L2 -----++++ |
| 44 | control register |||| |
| 45 | 0000 xx 00 -----> 000 cccc 0 |
| 46 | |||| 00 66MHz |
| 47 | |||| 10 100MHz |
| 48 | |||| 01 133MHz (Katmai "B" only) |
| 49 | ++++------ CPU frequency multiplier |
| 50 | |
| 51 | 0000 2x |
| 52 | 0001 3x |
| 53 | 0010 4x |
| 54 | 0011 5x |
| 55 | 0100 2.5x |
| 56 | 0101 3.5x |
| 57 | 0110 4.5x |
| 58 | 0111 5.5x |
| 59 | 1000 6x |
| 60 | 1001 7x |
| 61 | 1010 8x |
| 62 | 1011 Reserved |
| 63 | 1100 6.5x |
| 64 | 1101 7.5x |
| 65 | 1110 1.5x |
| 66 | 1111 2x |
| 67 | |
| 68 | */ |
| 69 | static const struct latency_entry latency_650_t0[] = { |
| 70 | {0x10, 0x02}, {0x50, 0x02}, {0x20, 0x04}, {0x60, 0x06}, |
| 71 | {0x00, 0x08}, {0x40, 0x0C}, {0x12, 0x06}, {0x52, 0x0A}, |
| 72 | {0x22, 0x0E}, {0x62, 0x10}, {0x02, 0x10}, {0xFF, 0x00} |
| 73 | }; |
| 74 | |
| 75 | static const struct latency_entry latency_650_t1[] = { |
| 76 | {0x12, 0x14}, {0x52, 0x16}, {0x22, 0x16}, {0x62, 0x16}, |
| 77 | {0xFF, 0x00} |
| 78 | }; |
| 79 | |
| 80 | static const struct latency_entry latency_670_t0[] = { |
| 81 | {0x60, 0x06}, {0x00, 0x08}, {0x12, 0x06}, {0x52, 0x0A}, |
| 82 | {0x22, 0x0E}, {0x62, 0x10}, {0x02, 0x10}, {0x42, 0x02}, |
| 83 | {0x11, 0x0E}, {0x51, 0x0C}, {0x21, 0x02}, {0x61, 0x10}, |
| 84 | {0x01, 0x10}, {0x41, 0x02}, {0xFF, 0x00} |
| 85 | }; |
| 86 | |
| 87 | static const struct latency_entry latency_670_t1[] = { |
| 88 | {0x22, 0x18}, {0x62, 0x18}, {0x02, 0x1A}, {0x11, 0x18}, |
| 89 | {0xFF, 0x00} |
| 90 | }; |
| 91 | |
| 92 | static const struct latency_entry latency_670_t2[] = { |
| 93 | {0x22, 0x12}, {0x62, 0x14}, {0x02, 0x16}, {0x42, 0x1E}, |
| 94 | {0x11, 0x12}, {0x51, 0x16}, {0x21, 0x1E}, {0x61, 0x14}, |
| 95 | {0x01, 0x16}, {0x41, 0x1E}, {0xFF, 0x00} |
| 96 | }; |
| 97 | |
| 98 | /* Latency tables for 650 model/type */ |
| 99 | static const struct latency_entry *latency_650[] = { |
| 100 | latency_650_t0, latency_650_t1, latency_650_t1 |
| 101 | }; |
| 102 | |
| 103 | /* Latency tables for 670 model/type */ |
| 104 | static const struct latency_entry *latency_670[] = { |
| 105 | latency_670_t0, latency_670_t1, latency_670_t2 |
| 106 | }; |
| 107 | |
| 108 | int calculate_l2_latency(void) |
| 109 | { |
| 110 | u32 eax, l, signature; |
| 111 | const struct latency_entry *latency_table, *le; |
| 112 | msr_t msr; |
| 113 | |
| 114 | /* First, attempt to get cache latency value from |
| 115 | IA32_PLATFORM_ID[56:53]. (L2 Cache Latency Read) |
| 116 | */ |
| 117 | msr = rdmsr(IA32_PLATFORM_ID); |
| 118 | |
Lee Leahy | 9d62e7e | 2017-03-15 17:40:50 -0700 | [diff] [blame] | 119 | printk(BIOS_DEBUG, "rdmsr(IA32_PLATFORM_ID) = %x:%x\n", msr.hi, msr.lo); |
Keith Hui | 1ac19e2 | 2011-07-27 23:06:16 -0400 | [diff] [blame] | 120 | |
| 121 | l = (msr.hi >> 20) & 0x1e; |
| 122 | |
| 123 | if (l == 0) { |
| 124 | /* If latency value isn't available from |
| 125 | IA32_PLATFORM_ID[56:53], read it from |
| 126 | L2 control register 0 for lookup from |
| 127 | tables. */ |
| 128 | int t, a; |
| 129 | |
| 130 | /* The raw code is read from L2 register 0, bits [7:4]. */ |
| 131 | a = read_l2(0); |
| 132 | if (a < 0) |
| 133 | return -1; |
| 134 | |
| 135 | a &= 0xf0; |
| 136 | |
| 137 | if ((a & 0x20) == 0) |
| 138 | t = 0; |
| 139 | else if (a == 0x20) |
| 140 | t = 1; |
| 141 | else if (a == 0x30) |
| 142 | t = 2; |
| 143 | else |
| 144 | return -1; |
| 145 | |
Lee Leahy | 9d62e7e | 2017-03-15 17:40:50 -0700 | [diff] [blame] | 146 | printk(BIOS_DEBUG, "L2 latency type = %x\n", t); |
Keith Hui | 1ac19e2 | 2011-07-27 23:06:16 -0400 | [diff] [blame] | 147 | |
| 148 | /* Get CPUID family/model */ |
| 149 | signature = cpuid_eax(1) & 0xfff0; |
| 150 | |
| 151 | /* Read EBL_CR_POWERON */ |
| 152 | msr = rdmsr(EBL_CR_POWERON); |
| 153 | /* Get clock multiplier and FSB frequency. |
| 154 | * Multiplier is in [25:22]. |
Lee Leahy | cdc5048 | 2017-03-15 18:26:18 -0700 | [diff] [blame] | 155 | * FSB is in [19:18] in Katmai, [19] in Deschutes ([18] is zero |
| 156 | * for them). |
Keith Hui | 1ac19e2 | 2011-07-27 23:06:16 -0400 | [diff] [blame] | 157 | */ |
| 158 | eax = msr.lo >> 18; |
| 159 | if (signature == 0x650) { |
| 160 | eax &= ~0xf2; |
| 161 | latency_table = latency_650[t]; |
| 162 | } else if (signature == 0x670) { |
| 163 | eax &= ~0xf3; |
| 164 | latency_table = latency_670[t]; |
| 165 | } else |
| 166 | return -1; |
| 167 | |
| 168 | /* Search table for matching entry */ |
| 169 | for (le = latency_table; le->key != eax; le++) { |
| 170 | /* Fail if we get to the end of the table */ |
| 171 | if (le->key == 0xff) { |
Lee Leahy | cdc5048 | 2017-03-15 18:26:18 -0700 | [diff] [blame] | 172 | printk(BIOS_DEBUG, |
| 173 | "Could not find key %02x in latency table\n", |
| 174 | eax); |
Keith Hui | 1ac19e2 | 2011-07-27 23:06:16 -0400 | [diff] [blame] | 175 | return -1; |
| 176 | } |
| 177 | } |
| 178 | |
| 179 | l = le->value; |
| 180 | } |
| 181 | |
Lee Leahy | 9d62e7e | 2017-03-15 17:40:50 -0700 | [diff] [blame] | 182 | printk(BIOS_DEBUG, "L2 Cache latency is %d\n", l / 2); |
Keith Hui | 1ac19e2 | 2011-07-27 23:06:16 -0400 | [diff] [blame] | 183 | |
| 184 | /* Writes the calculated latency in BBL_CR_CTL3[4:1]. */ |
| 185 | msr = rdmsr(BBL_CR_CTL3); |
| 186 | msr.lo &= 0xffffffe1; |
| 187 | msr.lo |= l; |
| 188 | wrmsr(BBL_CR_CTL3, msr); |
| 189 | |
| 190 | return 0; |
| 191 | } |
| 192 | |
Keith Hui | 1ac19e2 | 2011-07-27 23:06:16 -0400 | [diff] [blame] | 193 | /* Setup address, data_high:data_low into the L2 |
| 194 | * control registers and then issue command with correct cache way |
| 195 | */ |
| 196 | int signal_l2(u32 address, u32 data_high, u32 data_low, int way, u8 command) |
| 197 | { |
| 198 | int i; |
| 199 | msr_t msr; |
| 200 | |
| 201 | /* Write L2 Address to BBL_CR_ADDR */ |
| 202 | msr.lo = address; |
| 203 | msr.hi = 0; |
| 204 | wrmsr(BBL_CR_ADDR, msr); |
| 205 | |
| 206 | /* Write data to BBL_CR_D{0..3} */ |
| 207 | msr.lo = data_low; |
| 208 | msr.hi = data_high; |
Lee Leahy | 26eeb0f | 2017-03-15 18:08:50 -0700 | [diff] [blame] | 209 | for (i = BBL_CR_D0; i <= BBL_CR_D3; i++) |
Keith Hui | 1ac19e2 | 2011-07-27 23:06:16 -0400 | [diff] [blame] | 210 | wrmsr(i, msr); |
Keith Hui | 1ac19e2 | 2011-07-27 23:06:16 -0400 | [diff] [blame] | 211 | |
| 212 | /* Put the command and way into BBL_CR_CTL */ |
| 213 | msr = rdmsr(BBL_CR_CTL); |
| 214 | msr.lo = (msr.lo & 0xfffffce0) | command | (way << 8); |
| 215 | wrmsr(BBL_CR_CTL, msr); |
| 216 | |
| 217 | /* Trigger L2 controller */ |
| 218 | msr.lo = 0; |
| 219 | msr.hi = 0; |
| 220 | wrmsr(BBL_CR_TRIG, msr); |
| 221 | |
| 222 | /* Poll the controller to see when done */ |
| 223 | for (i = 0; i < 0x100; i++) { |
| 224 | /* Read BBL_CR_BUSY */ |
| 225 | msr = rdmsr(BBL_CR_BUSY); |
| 226 | /* If not busy then return */ |
| 227 | if ((msr.lo & 1) == 0) |
| 228 | return 0; |
| 229 | } |
| 230 | |
| 231 | /* Return timeout code */ |
| 232 | return -1; |
| 233 | } |
| 234 | |
| 235 | /* Read the L2 Cache controller register at given address */ |
| 236 | int read_l2(u32 address) |
| 237 | { |
| 238 | msr_t msr; |
| 239 | |
| 240 | /* Send a L2 Control Register Read to L2 controller */ |
| 241 | if (signal_l2(address << 5, 0, 0, 0, L2CMD_CR) != 0) |
| 242 | return -1; |
| 243 | |
| 244 | /* If OK then get the result from BBL_CR_ADDR */ |
| 245 | msr = rdmsr(BBL_CR_ADDR); |
| 246 | return (msr.lo >> 0x15); |
Keith Hui | 1ac19e2 | 2011-07-27 23:06:16 -0400 | [diff] [blame] | 247 | } |
| 248 | |
| 249 | /* Write data into the L2 controller register at address */ |
| 250 | int write_l2(u32 address, u32 data) |
| 251 | { |
| 252 | int v1, v2, i; |
| 253 | |
| 254 | v1 = read_l2(0); |
| 255 | if (v1 < 0) |
| 256 | return -1; |
| 257 | |
| 258 | v2 = read_l2(2); |
| 259 | if (v2 < 0) |
| 260 | return -1; |
| 261 | |
| 262 | if ((v1 & 0x20) == 0) { |
| 263 | v2 &= 0x3; |
| 264 | v2++; |
| 265 | } else |
| 266 | v2 &= 0x7; |
| 267 | |
Lee Leahy | cdc5048 | 2017-03-15 18:26:18 -0700 | [diff] [blame] | 268 | /* This write has to be replicated to a number of places. Not sure what. |
| 269 | */ |
Keith Hui | 1ac19e2 | 2011-07-27 23:06:16 -0400 | [diff] [blame] | 270 | |
| 271 | for (i = 0; i < v2; i++) { |
Keith Hui | 1ac19e2 | 2011-07-27 23:06:16 -0400 | [diff] [blame] | 272 | u32 data1, data2; |
| 273 | // Bits legend |
| 274 | // data1 = ffffffff |
| 275 | // data2 = 000000dc |
| 276 | // address = 00aaaaaa |
Martin Roth | 4c3ab73 | 2013-07-08 16:23:54 -0600 | [diff] [blame] | 277 | // Final address signaled: |
Keith Hui | 1ac19e2 | 2011-07-27 23:06:16 -0400 | [diff] [blame] | 278 | // 000fffff fff000c0 000dcaaa aaa00000 |
| 279 | data1 = data & 0xff; |
| 280 | data1 = data1 << 21; |
| 281 | data2 = (i << 11) & 0x1800; |
| 282 | data1 |= data2; |
| 283 | data2 <<= 6; |
| 284 | data2 &= 0x20000; |
| 285 | data1 |= data2; |
| 286 | |
| 287 | /* Signal L2 controller */ |
| 288 | if (signal_l2((address << 5) | data1, 0, 0, 0, 3)) |
| 289 | return -1; |
| 290 | } |
| 291 | return 0; |
| 292 | } |
| 293 | |
| 294 | /* Write data_high:data_low into the cache at address1. Test address2 |
| 295 | * to see if the same data is returned. Return 0 if the data matches. |
| 296 | * return lower 16 bits if mismatched data if mismatch. Return -1 |
| 297 | * on error |
| 298 | */ |
| 299 | int test_l2_address_alias(u32 address1, u32 address2, |
| 300 | u32 data_high, u32 data_low) |
| 301 | { |
| 302 | int d; |
| 303 | msr_t msr; |
| 304 | |
| 305 | /* Tag Write with Data Write for L2 */ |
| 306 | if (signal_l2(address1, data_high, data_low, 0, L2CMD_TWW)) |
| 307 | return -1; |
| 308 | |
| 309 | /* Tag Read with Data Read for L2 */ |
| 310 | if (signal_l2(address2, 0, 0, 0, L2CMD_TRR)) |
| 311 | return -1; |
| 312 | |
| 313 | /* Read data from BBL_CR_D[0-3] */ |
| 314 | for (d = BBL_CR_D0; d <= BBL_CR_D3; d++) { |
| 315 | msr = rdmsr(d); |
| 316 | if (msr.lo != data_low || msr.hi != data_high) |
| 317 | return (msr.lo & 0xffff); |
| 318 | } |
| 319 | |
| 320 | return 0; |
| 321 | } |
| 322 | |
| 323 | /* Calculates the L2 cache size. |
| 324 | * |
Martin Roth | 0cd338e | 2016-07-29 14:07:30 -0600 | [diff] [blame] | 325 | * Reference: Intel(R) 64 and IA-32 Architectures Software Developer's Manual |
Lee Leahy | cdc5048 | 2017-03-15 18:26:18 -0700 | [diff] [blame] | 326 | * Volume 3B: System Programming Guide, Part 2, Intel pub. 253669, |
| 327 | * pg. B-172. |
Keith Hui | 1ac19e2 | 2011-07-27 23:06:16 -0400 | [diff] [blame] | 328 | * |
| 329 | */ |
| 330 | int calculate_l2_cache_size(void) |
| 331 | { |
| 332 | int v; |
| 333 | msr_t msr; |
| 334 | u32 cache_setting; |
| 335 | u32 address, size, eax, bblcr3; |
| 336 | |
| 337 | v = read_l2(0); |
| 338 | if (v < 0) |
| 339 | return -1; |
| 340 | if ((v & 0x20) == 0) { |
| 341 | msr = rdmsr(BBL_CR_CTL3); |
| 342 | bblcr3 = msr.lo & ~BBLCR3_L2_SIZE; |
| 343 | /* |
| 344 | * Successively write in all the possible cache size per bank |
Lee Leahy | cdc5048 | 2017-03-15 18:26:18 -0700 | [diff] [blame] | 345 | * into BBL_CR_CTL3[17:13], starting from 256KB (00001) to 4MB |
| 346 | * (10000), and read the last value written and accepted by the |
| 347 | * cache. |
Keith Hui | 1ac19e2 | 2011-07-27 23:06:16 -0400 | [diff] [blame] | 348 | * |
| 349 | * No idea why these bits are writable at all. |
| 350 | */ |
| 351 | for (cache_setting = BBLCR3_L2_SIZE_256K; |
| 352 | cache_setting <= BBLCR3_L2_SIZE_4M; cache_setting <<= 1) { |
Keith Hui | 1ac19e2 | 2011-07-27 23:06:16 -0400 | [diff] [blame] | 353 | eax = bblcr3 | cache_setting; |
| 354 | msr.lo = eax; |
| 355 | wrmsr(BBL_CR_CTL3, msr); |
| 356 | msr = rdmsr(BBL_CR_CTL3); |
| 357 | |
| 358 | /* Value not accepted */ |
| 359 | if (msr.lo != eax) |
| 360 | break; |
| 361 | } |
| 362 | |
| 363 | /* Backtrack to the last value that worked... */ |
| 364 | cache_setting >>= 1; |
| 365 | |
| 366 | /* and write it into BBL_CR_CTL3 */ |
| 367 | msr.lo &= ~BBLCR3_L2_SIZE; |
| 368 | msr.lo |= (cache_setting & BBLCR3_L2_SIZE); |
| 369 | |
| 370 | wrmsr(BBL_CR_CTL3, msr); |
| 371 | |
Lee Leahy | 9d62e7e | 2017-03-15 17:40:50 -0700 | [diff] [blame] | 372 | printk(BIOS_DEBUG, "Maximum cache mask is %x\n", cache_setting); |
Keith Hui | 1ac19e2 | 2011-07-27 23:06:16 -0400 | [diff] [blame] | 373 | |
Lee Leahy | cdc5048 | 2017-03-15 18:26:18 -0700 | [diff] [blame] | 374 | /* For now, BBL_CR_CTL3 has the highest cache "size" that |
| 375 | * register will accept. Now we'll ping the cache and see where |
| 376 | * it wraps. |
Keith Hui | 1ac19e2 | 2011-07-27 23:06:16 -0400 | [diff] [blame] | 377 | */ |
| 378 | |
| 379 | /* Write aaaaaaaa:aaaaaaaa to address 0 in the l2 cache. |
| 380 | * If this "alias test" returns an "address", it means the |
| 381 | * cache cannot be written to properly, and we have a problem. |
| 382 | */ |
| 383 | v = test_l2_address_alias(0, 0, 0xaaaaaaaa, 0xaaaaaaaa); |
| 384 | if (v != 0) |
| 385 | return -1; |
| 386 | |
| 387 | /* Start with 32K wrap point (256KB actually) */ |
| 388 | size = 1; |
| 389 | address = 0x8000; |
| 390 | |
| 391 | while (1) { |
| 392 | v = test_l2_address_alias(address, 0, 0x55555555, |
| 393 | 0x55555555); |
| 394 | // Write failed. |
| 395 | if (v < 0) |
| 396 | return -1; |
| 397 | // It wraps here. |
| 398 | else if (v == 0) |
| 399 | break; |
| 400 | |
| 401 | size <<= 1; |
| 402 | address <<= 1; |
| 403 | |
| 404 | if (address > 0x200000) |
| 405 | return -1; |
| 406 | } |
| 407 | |
| 408 | /* Mask size */ |
| 409 | size &= 0x3e; |
| 410 | |
| 411 | /* Shift to [17:13] */ |
| 412 | size <<= 12; |
| 413 | |
| 414 | /* Set this into BBL_CR_CTL3 */ |
| 415 | msr = rdmsr(BBL_CR_CTL3); |
| 416 | msr.lo &= ~BBLCR3_L2_SIZE; |
| 417 | msr.lo |= size; |
| 418 | wrmsr(BBL_CR_CTL3, msr); |
| 419 | |
Lee Leahy | 9d62e7e | 2017-03-15 17:40:50 -0700 | [diff] [blame] | 420 | printk(BIOS_DEBUG, "L2 Cache Mask is %x\n", size); |
Keith Hui | 1ac19e2 | 2011-07-27 23:06:16 -0400 | [diff] [blame] | 421 | |
| 422 | /* Shift to [6:2] */ |
| 423 | size >>= 11; |
| 424 | |
| 425 | v = read_l2(2); |
| 426 | |
| 427 | if (v < 0) |
| 428 | return -1; |
| 429 | |
Lee Leahy | 9d62e7e | 2017-03-15 17:40:50 -0700 | [diff] [blame] | 430 | printk(BIOS_DEBUG, "L2(2): %x ", v); |
Keith Hui | 1ac19e2 | 2011-07-27 23:06:16 -0400 | [diff] [blame] | 431 | |
| 432 | v &= 0x3; |
| 433 | |
| 434 | /* Shift size right by v */ |
| 435 | size >>= v; |
| 436 | |
| 437 | /* Or in this size */ |
| 438 | v |= size; |
| 439 | |
Lee Leahy | 9d62e7e | 2017-03-15 17:40:50 -0700 | [diff] [blame] | 440 | printk(BIOS_DEBUG, "-> %x\n", v); |
Keith Hui | 1ac19e2 | 2011-07-27 23:06:16 -0400 | [diff] [blame] | 441 | |
| 442 | if (write_l2(2, v) != 0) |
| 443 | return -1; |
| 444 | } else { |
| 445 | // Some cache size information is available from L2 registers. |
| 446 | // Work from there. |
| 447 | int b, c; |
| 448 | |
| 449 | v = read_l2(2); |
| 450 | |
Lee Leahy | 9d62e7e | 2017-03-15 17:40:50 -0700 | [diff] [blame] | 451 | printk(BIOS_DEBUG, "L2(2) = %x\n", v); |
Keith Hui | 1ac19e2 | 2011-07-27 23:06:16 -0400 | [diff] [blame] | 452 | |
| 453 | if (v < 0) |
| 454 | return -1; |
| 455 | |
| 456 | // L2 register 2 bitmap: cc---bbb |
| 457 | b = v & 0x7; |
| 458 | c = v >> 6; |
| 459 | |
| 460 | v = 1 << c * b; |
| 461 | |
| 462 | v &= 0xf; |
| 463 | |
Lee Leahy | 9d62e7e | 2017-03-15 17:40:50 -0700 | [diff] [blame] | 464 | printk(BIOS_DEBUG, "Calculated a = %x\n", v); |
Keith Hui | 1ac19e2 | 2011-07-27 23:06:16 -0400 | [diff] [blame] | 465 | |
| 466 | if (v == 0) |
| 467 | return -1; |
| 468 | |
| 469 | /* Shift to 17:14 */ |
| 470 | v <<= 14; |
| 471 | |
| 472 | /* Write this size into BBL_CR_CTL3 */ |
| 473 | msr = rdmsr(BBL_CR_CTL3); |
| 474 | msr.lo &= ~BBLCR3_L2_SIZE; |
| 475 | msr.lo |= v; |
| 476 | wrmsr(BBL_CR_CTL3, msr); |
| 477 | } |
| 478 | |
| 479 | return 0; |
| 480 | } |
| 481 | |
Lee Leahy | cdc5048 | 2017-03-15 18:26:18 -0700 | [diff] [blame] | 482 | // L2 physical address range can be found from L2 control register 3, |
| 483 | // bits [2:0]. |
Keith Hui | 1ac19e2 | 2011-07-27 23:06:16 -0400 | [diff] [blame] | 484 | int calculate_l2_physical_address_range(void) |
| 485 | { |
| 486 | int r0, r3; |
| 487 | msr_t msr; |
| 488 | |
| 489 | r3 = read_l2(3); |
| 490 | if (r3 < 0) |
| 491 | return -1; |
| 492 | |
| 493 | r0 = read_l2(0); |
| 494 | if (r0 < 0) |
| 495 | return -1; |
| 496 | |
| 497 | if (r0 & 0x20) |
| 498 | r3 = 0x7; |
| 499 | else |
| 500 | r3 &= 0x7; |
| 501 | |
Lee Leahy | cdc5048 | 2017-03-15 18:26:18 -0700 | [diff] [blame] | 502 | printk(BIOS_DEBUG, "L2 Physical Address Range is %dM\n", |
| 503 | (1 << r3) * 512); |
Keith Hui | 1ac19e2 | 2011-07-27 23:06:16 -0400 | [diff] [blame] | 504 | |
| 505 | /* Shift into [22:20] to be saved into BBL_CR_CTL3. */ |
| 506 | r3 = r3 << 20; |
| 507 | |
| 508 | msr = rdmsr(BBL_CR_CTL3); |
| 509 | msr.lo &= ~BBLCR3_L2_PHYSICAL_RANGE; |
| 510 | msr.lo |= r3; |
| 511 | wrmsr(BBL_CR_CTL3, msr); |
| 512 | |
| 513 | return 0; |
| 514 | } |
| 515 | |
| 516 | int set_l2_ecc(void) |
| 517 | { |
| 518 | u32 eax; |
| 519 | const u32 data1 = 0xaa55aa55; |
| 520 | const u32 data2 = 0xaaaaaaaa; |
| 521 | msr_t msr; |
| 522 | |
| 523 | /* Set User Supplied ECC in BBL_CR_CTL */ |
| 524 | msr = rdmsr(BBL_CR_CTL); |
| 525 | msr.lo |= BBLCR3_L2_SUPPLIED_ECC; |
| 526 | wrmsr(BBL_CR_CTL, msr); |
| 527 | |
| 528 | /* Write a value into the L2 Data ECC register BBL_CR_DECC */ |
| 529 | msr.lo = data1; |
| 530 | msr.hi = 0; |
| 531 | wrmsr(BBL_CR_DECC, msr); |
| 532 | |
| 533 | if (test_l2_address_alias(0, 0, data2, data2) < 0) |
| 534 | return -1; |
| 535 | |
| 536 | /* Read back ECC from BBL_CR_DECC */ |
| 537 | msr = rdmsr(BBL_CR_DECC); |
| 538 | eax = msr.lo; |
| 539 | |
| 540 | if (eax == data1) { |
Lee Leahy | 9d62e7e | 2017-03-15 17:40:50 -0700 | [diff] [blame] | 541 | printk(BIOS_DEBUG, "L2 ECC Checking is enabled\n"); |
Keith Hui | 1ac19e2 | 2011-07-27 23:06:16 -0400 | [diff] [blame] | 542 | |
| 543 | /* Set ECC Check Enable in BBL_CR_CTL3 */ |
| 544 | msr = rdmsr(BBL_CR_CTL3); |
| 545 | msr.lo |= BBLCR3_L2_ECC_CHECK_ENABLE; |
| 546 | wrmsr(BBL_CR_CTL3, msr); |
| 547 | } |
| 548 | |
| 549 | /* Clear User Supplied ECC in BBL_CR_CTL */ |
| 550 | msr = rdmsr(BBL_CR_CTL); |
| 551 | msr.lo &= ~BBLCR3_L2_SUPPLIED_ECC; |
| 552 | wrmsr(BBL_CR_CTL, msr); |
| 553 | |
| 554 | return 0; |
| 555 | } |
| 556 | |
| 557 | /* |
| 558 | * This is the function called from CPU initialization |
| 559 | * driver to set up P6 family L2 cache. |
| 560 | */ |
| 561 | |
| 562 | int p6_configure_l2_cache(void) |
| 563 | { |
| 564 | msr_t msr, bblctl3; |
| 565 | unsigned int eax; |
| 566 | u16 signature; |
| 567 | int cache_size, bank; |
| 568 | int result, calc_eax; |
| 569 | int v, a; |
| 570 | |
| 571 | int badclk1, badclk2, clkratio; |
| 572 | int crctl3_or; |
| 573 | |
| 574 | printk(BIOS_INFO, "Configuring L2 cache... "); |
| 575 | |
| 576 | /* Read BBL_CR_CTL3 */ |
| 577 | bblctl3 = rdmsr(BBL_CR_CTL3); |
| 578 | /* If bit 23 (L2 Hardware disable) is set then done */ |
| 579 | /* These would be Covington core Celerons with no L2 cache */ |
| 580 | if (bblctl3.lo & BBLCR3_L2_NOT_PRESENT) { |
Lee Leahy | 9d62e7e | 2017-03-15 17:40:50 -0700 | [diff] [blame] | 581 | printk(BIOS_INFO, "hardware disabled\n"); |
Keith Hui | 1ac19e2 | 2011-07-27 23:06:16 -0400 | [diff] [blame] | 582 | return 0; |
| 583 | } |
| 584 | |
| 585 | signature = cpuid_eax(1) & 0xfff0; |
| 586 | |
| 587 | /* Klamath-specific bit settings for certain |
| 588 | preliminary checks. |
| 589 | */ |
| 590 | if (signature == 0x630) { |
| 591 | clkratio = 0x1c00000; |
| 592 | badclk2 = 0x1000000; |
| 593 | crctl3_or = 0x44000; |
| 594 | } else { |
| 595 | clkratio = 0x3c00000; |
| 596 | badclk2 = 0x3000000; |
| 597 | crctl3_or = 0x40000; |
| 598 | } |
| 599 | badclk1 = 0xc00000; |
| 600 | |
| 601 | /* Read EBL_CR_POWERON */ |
| 602 | msr = rdmsr(EBL_CR_POWERON); |
| 603 | eax = msr.lo; |
| 604 | /* Mask out [22-25] Clock frequency ratio */ |
| 605 | eax &= clkratio; |
| 606 | if (eax == badclk1 || eax == badclk2) { |
| 607 | printk(BIOS_ERR, "Incorrect clock frequency ratio %x\n", eax); |
| 608 | return -1; |
| 609 | } |
| 610 | |
| 611 | disable_cache(); |
| 612 | |
| 613 | /* Mask out from BBL_CR_CTL3: |
| 614 | * [0] L2 Configured |
| 615 | * [5] ECC Check Enable |
| 616 | * [6] Address Parity Check Enable |
| 617 | * [7] CRTN Parity Check Enable |
| 618 | * [8] L2 Enabled |
| 619 | * [12:11] Number of L2 banks |
| 620 | * [17:13] Cache size per bank |
| 621 | * [18] (Set below) |
| 622 | * [22:20] L2 Physical Address Range Support |
| 623 | */ |
| 624 | bblctl3.lo &= 0xff88061e; |
| 625 | /* Set: |
| 626 | * [17:13] = 00010 = 512Kbyte Cache size per bank (63x) |
| 627 | * [17:13] = 00000 = 128Kbyte Cache size per bank (all others) |
| 628 | * [18] Cache state error checking enable |
| 629 | */ |
| 630 | bblctl3.lo |= crctl3_or; |
| 631 | |
| 632 | /* Write BBL_CR_CTL3 */ |
| 633 | wrmsr(BBL_CR_CTL3, bblctl3); |
| 634 | |
| 635 | if (signature != 0x630) { |
| 636 | eax = bblctl3.lo; |
| 637 | |
| 638 | /* Set the l2 latency in BBL_CR_CTL3 */ |
| 639 | if (calculate_l2_latency() != 0) |
| 640 | goto bad; |
| 641 | |
| 642 | /* Read the new latency values back */ |
| 643 | bblctl3 = rdmsr(BBL_CR_CTL3); |
| 644 | calc_eax = bblctl3.lo; |
| 645 | |
| 646 | /* Write back the original default value */ |
| 647 | bblctl3.lo = eax; |
| 648 | wrmsr(BBL_CR_CTL3, bblctl3); |
| 649 | |
Lee Leahy | cdc5048 | 2017-03-15 18:26:18 -0700 | [diff] [blame] | 650 | /* Write BBL_CR_CTL3[27:26] (reserved??) to bits [1:0] of L2 |
| 651 | * register 4. Apparently all other bits must be preserved, |
| 652 | * hence these code. |
Keith Hui | 1ac19e2 | 2011-07-27 23:06:16 -0400 | [diff] [blame] | 653 | */ |
| 654 | |
| 655 | v = (calc_eax >> 26) & 0x3; |
| 656 | |
Lee Leahy | 9d62e7e | 2017-03-15 17:40:50 -0700 | [diff] [blame] | 657 | printk(BIOS_DEBUG, "write_l2(4, %x)\n", v); |
Keith Hui | 1ac19e2 | 2011-07-27 23:06:16 -0400 | [diff] [blame] | 658 | |
| 659 | a = read_l2(4); |
Lee Leahy | 26eeb0f | 2017-03-15 18:08:50 -0700 | [diff] [blame] | 660 | if (a >= 0) { |
Keith Hui | 1ac19e2 | 2011-07-27 23:06:16 -0400 | [diff] [blame] | 661 | a &= 0xfffc; |
| 662 | a |= v; |
| 663 | a = write_l2(4, a); |
| 664 | /* a now contains result code from write_l2() */ |
| 665 | } |
| 666 | if (a != 0) |
| 667 | goto bad; |
| 668 | |
| 669 | /* Restore the correct latency value into BBL_CR_CTL3 */ |
| 670 | bblctl3.lo = calc_eax; |
| 671 | wrmsr(BBL_CR_CTL3, bblctl3); |
| 672 | } /* ! 63x CPU */ |
| 673 | |
| 674 | /* Read L2 register 0 */ |
| 675 | v = read_l2(0); |
| 676 | |
Lee Leahy | cdc5048 | 2017-03-15 18:26:18 -0700 | [diff] [blame] | 677 | /* If L2(0)[5] set (and can be read properly), enable CRTN and address |
| 678 | * parity |
Keith Hui | 1ac19e2 | 2011-07-27 23:06:16 -0400 | [diff] [blame] | 679 | */ |
| 680 | if (v >= 0 && (v & 0x20)) { |
| 681 | bblctl3 = rdmsr(BBL_CR_CTL3); |
| 682 | bblctl3.lo |= (BBLCR3_L2_ADDR_PARITY_ENABLE | |
Lee Leahy | 7b5f12b9 | 2017-03-15 17:16:59 -0700 | [diff] [blame] | 683 | BBLCR3_L2_CRTN_PARITY_ENABLE); |
Keith Hui | 1ac19e2 | 2011-07-27 23:06:16 -0400 | [diff] [blame] | 684 | wrmsr(BBL_CR_CTL3, bblctl3); |
| 685 | } |
| 686 | |
| 687 | /* If something goes wrong at L2 ECC setup, cache ECC |
| 688 | * will just remain disabled. |
| 689 | */ |
| 690 | set_l2_ecc(); |
| 691 | |
| 692 | if (calculate_l2_physical_address_range() != 0) { |
Lee Leahy | cdc5048 | 2017-03-15 18:26:18 -0700 | [diff] [blame] | 693 | printk(BIOS_ERR, |
| 694 | "Failed to calculate L2 physical address range"); |
Keith Hui | 1ac19e2 | 2011-07-27 23:06:16 -0400 | [diff] [blame] | 695 | goto bad; |
| 696 | } |
| 697 | |
| 698 | if (calculate_l2_cache_size() != 0) { |
| 699 | printk(BIOS_ERR, "Failed to calculate L2 cache size"); |
| 700 | goto bad; |
| 701 | } |
| 702 | |
| 703 | /* Turn on cache. Only L1 is active at this time. */ |
| 704 | enable_cache(); |
| 705 | |
| 706 | /* Get the calculated cache size from BBL_CR_CTL3[17:13] */ |
| 707 | bblctl3 = rdmsr(BBL_CR_CTL3); |
| 708 | cache_size = (bblctl3.lo & BBLCR3_L2_SIZE); |
| 709 | if (cache_size == 0) |
| 710 | cache_size = 0x1000; |
| 711 | cache_size = cache_size << 3; |
| 712 | |
| 713 | /* TODO: Cache size above is per bank. We're supposed to get |
| 714 | * the number of banks from BBL_CR_CTL3[12:11]. |
| 715 | * Confirm that this still provides the correct answer. |
| 716 | */ |
| 717 | bank = (bblctl3.lo >> 11) & 0x3; |
| 718 | if (bank == 0) |
| 719 | bank = 1; |
| 720 | |
| 721 | printk(BIOS_INFO, "size %dK... ", cache_size * bank * 4 / 1024); |
| 722 | |
| 723 | /* Write to all cache lines to initialize */ |
| 724 | |
| 725 | while (cache_size > 0) { |
Keith Hui | 1ac19e2 | 2011-07-27 23:06:16 -0400 | [diff] [blame] | 726 | /* Each cache line is 32 bytes. */ |
| 727 | cache_size -= 32; |
| 728 | |
| 729 | /* Update each way */ |
| 730 | |
Lee Leahy | cdc5048 | 2017-03-15 18:26:18 -0700 | [diff] [blame] | 731 | /* We're supposed to get L2 associativity from |
| 732 | * BBL_CR_CTL3[10:9]. But this code only applies to certain |
| 733 | * members of the P6 processor family and since all P6 |
| 734 | * processors have 4-way L2 cache, we can safely assume |
Keith Hui | 1ac19e2 | 2011-07-27 23:06:16 -0400 | [diff] [blame] | 735 | * 4 way for all cache operations. |
| 736 | */ |
| 737 | |
| 738 | for (v = 0; v < 4; v++) { |
| 739 | /* Send Tag Write w/Data Write (TWW) to L2 controller |
| 740 | * MESI = Invalid |
| 741 | */ |
Lee Leahy | cdc5048 | 2017-03-15 18:26:18 -0700 | [diff] [blame] | 742 | if (signal_l2(cache_size, 0, 0, v, L2CMD_TWW |
| 743 | | L2CMD_MESI_I) != 0) { |
| 744 | printk(BIOS_ERR, |
| 745 | "Failed on signal_l2(%x, %x)\n", |
Keith Hui | 1ac19e2 | 2011-07-27 23:06:16 -0400 | [diff] [blame] | 746 | cache_size, v); |
| 747 | goto bad; |
| 748 | } |
| 749 | } |
| 750 | } |
| 751 | printk(BIOS_DEBUG, "L2 Cache lines initialized\n"); |
| 752 | |
| 753 | /* Disable cache */ |
| 754 | disable_cache(); |
| 755 | |
| 756 | /* Set L2 cache configured in BBL_CR_CTL3 */ |
| 757 | bblctl3 = rdmsr(BBL_CR_CTL3); |
| 758 | bblctl3.lo |= BBLCR3_L2_CONFIGURED; |
| 759 | wrmsr(BBL_CR_CTL3, bblctl3); |
| 760 | |
| 761 | /* Invalidate cache and discard unsaved writes */ |
| 762 | asm volatile ("invd"); |
| 763 | |
| 764 | /* Write 0 to L2 control register 5 */ |
| 765 | if (write_l2(5, 0) != 0) { |
Lee Leahy | 9d62e7e | 2017-03-15 17:40:50 -0700 | [diff] [blame] | 766 | printk(BIOS_ERR, "write_l2(5, 0) failed\n"); |
Keith Hui | 1ac19e2 | 2011-07-27 23:06:16 -0400 | [diff] [blame] | 767 | goto done; |
| 768 | } |
| 769 | |
| 770 | bblctl3 = rdmsr(BBL_CR_CTL3); |
| 771 | if (signature == 0x650) { |
| 772 | /* Change the L2 latency to 0101 then back to |
| 773 | * original value. I don't know why this is needed - dpd |
| 774 | */ |
| 775 | eax = bblctl3.lo; |
| 776 | bblctl3.lo &= ~BBLCR3_L2_LATENCY; |
| 777 | bblctl3.lo |= 0x0a; |
| 778 | wrmsr(BBL_CR_CTL3, bblctl3); |
| 779 | bblctl3.lo = eax; |
| 780 | wrmsr(BBL_CR_CTL3, bblctl3); |
| 781 | } |
| 782 | |
| 783 | /* Enable L2 in BBL_CR_CTL3 */ |
| 784 | bblctl3.lo |= BBLCR3_L2_ENABLED; |
| 785 | wrmsr(BBL_CR_CTL3, bblctl3); |
| 786 | |
| 787 | /* Turn on cache. Both L1 and L2 are now active. Wahoo! */ |
| 788 | done: |
| 789 | result = 0; |
| 790 | goto out; |
| 791 | bad: |
| 792 | result = -1; |
| 793 | out: |
| 794 | printk(BIOS_INFO, "done.\n"); |
| 795 | return result; |
| 796 | } |