blob: b4193981f3ccece83ffd5132a146ab21f10e6ee3 [file] [log] [blame]
Martin Roth9b1b3352016-02-24 12:27:06 -08001/*
2 * MemTest86+ V5 Specific code (GPL V2.0)
3 * By Samuel DEMEULEMEESTER, sdemeule@memtest.org
4 * http://www.canardpc.com - http://www.memtest.org
5 * ------------------------------------------------
6 * init.c - MemTest-86 Version 3.6
7 *
8 * Released under version 2 of the Gnu Public License.
9 * By Chris Brady
Martin Roth869474b2016-02-24 13:47:46 -080010 *
11 * Edited by David McInnis October 4, 2014
Martin Roth9b1b3352016-02-24 12:27:06 -080012 */
Martin Roth4dcd13d2016-02-24 13:53:07 -080013
Martin Roth9b1b3352016-02-24 12:27:06 -080014
15#include "stdin.h"
16#include "stddef.h"
17#include "test.h"
18#include "defs.h"
19#include "config.h"
20#include "cpuid.h"
21#include "smp.h"
22#include "io.h"
23#include "spd.h"
Martin Roth8cc1aeb2016-02-24 13:03:52 -080024#include "multiboot.h"
Martin Roth1286f192016-02-25 10:26:59 -080025#include "controller.h"
26#include "pci.h"
Martin Roth9b1b3352016-02-24 12:27:06 -080027
28extern struct tseq tseq[];
29extern short memsz_mode;
30extern int num_cpus;
31extern int act_cpus;
32extern int found_cpus;
33unsigned long imc_type = 0;
34extern int maxcpus;
35extern char cpu_mask[];
36extern void initialise_cpus();
37
38/* Here we store all of the cpuid data */
39extern struct cpu_ident cpu_id;
40
41int l1_cache=0, l2_cache=0, l3_cache=0;
42int tsc_invariable = 0;
43ulong extclock;
44
45ulong memspeed(ulong src, ulong len, int iter);
46static void cpu_type(void);
47static int cpuspeed(void);
48static void get_cache_size();
49static void cpu_cache_speed();
50void get_cpuid();
51int beepmode;
52extern short dmi_initialized;
53extern int dmi_err_cnts[MAX_DMI_MEMDEVS];
54
55/* Failsafe function */
56/* msec: number of ms to wait - scs: scancode expected to stop */
57/* bits: 0 = extended detection - 1: SMP - 2: Temp Check */
58/* 3: MP SMP - 4-7: RSVD */
59void failsafe(int msec, int scs)
60{
61 int i;
62 ulong sh, sl, l, h, t;
63 unsigned char c;
64 volatile char *pp;
Martin Roth4dcd13d2016-02-24 13:53:07 -080065
Martin Roth9b1b3352016-02-24 12:27:06 -080066 for(i=0, pp=(char *)(SCREEN_ADR+(18*160)+(18*2)+1); i<40; i++, pp+=2) {
67 *pp = 0x1E;
Martin Roth4dcd13d2016-02-24 13:53:07 -080068 }
Martin Roth9b1b3352016-02-24 12:27:06 -080069 for(i=0, pp=(char *)(SCREEN_ADR+(18*160)+(18*2)+1); i<3; i++, pp+=2) {
70 *pp = 0x9E;
Martin Roth4dcd13d2016-02-24 13:53:07 -080071 }
Martin Roth9b1b3352016-02-24 12:27:06 -080072 for(i=0, pp=(char *)(SCREEN_ADR+(18*160)+(55*2)+1); i<3; i++, pp+=2) {
73 *pp = 0x9E;
Martin Roth4dcd13d2016-02-24 13:53:07 -080074 }
75
Martin Roth9b1b3352016-02-24 12:27:06 -080076 cprint(18, 18, "==> Press F1 to enter Fail-Safe Mode <==");
Martin Roth4dcd13d2016-02-24 13:53:07 -080077
Martin Roth9b1b3352016-02-24 12:27:06 -080078 if(v->fail_safe & 2)
79 {
Martin Roth4dcd13d2016-02-24 13:53:07 -080080 cprint(19, 15, "==> Press F2 to force Multi-Threading (SMP) <==");
Martin Roth9b1b3352016-02-24 12:27:06 -080081 }
82
83 /* save the starting time */
84 asm __volatile__(
85 "rdtsc":"=a" (sl),"=d" (sh));
86
87 /* loop for n seconds */
88 while (1) {
89 asm __volatile__(
90 "rdtsc":"=a" (l),"=d" (h));
91 asm __volatile__ (
92 "subl %2,%0\n\t"
93 "sbbl %3,%1"
94 :"=a" (l), "=d" (h)
95 :"g" (sl), "g" (sh),
96 "0" (l), "1" (h));
97
98 t = h * ((unsigned)0xffffffff / v->clks_msec);
99 t += (l / v->clks_msec);
100
101 /* Is the time up? */
102 if (t >= msec) { break; }
Martin Roth4dcd13d2016-02-24 13:53:07 -0800103
Martin Roth9b1b3352016-02-24 12:27:06 -0800104 /* Is expected Scan code pressed? */
105 c = get_key();
106 c &= 0x7f;
Martin Roth4dcd13d2016-02-24 13:53:07 -0800107
Martin Roth9b1b3352016-02-24 12:27:06 -0800108 /* F1 */
109 if(c == scs) { v->fail_safe |= 1; break; }
Martin Roth4dcd13d2016-02-24 13:53:07 -0800110
Martin Roth9b1b3352016-02-24 12:27:06 -0800111 /* F2 */
Martin Roth4dcd13d2016-02-24 13:53:07 -0800112 if(c == scs+1)
113 {
Martin Roth9b1b3352016-02-24 12:27:06 -0800114 v->fail_safe ^= 2;
115 break;
116
117 }
Martin Roth4dcd13d2016-02-24 13:53:07 -0800118
Martin Roth9b1b3352016-02-24 12:27:06 -0800119 /* F3 */
Martin Roth4dcd13d2016-02-24 13:53:07 -0800120 if(c == scs+2)
121 {
Martin Roth9b1b3352016-02-24 12:27:06 -0800122 if(v->fail_safe & 2) { v->fail_safe ^= 2; }
123 v->fail_safe |= 8;
124 break;
Martin Roth4dcd13d2016-02-24 13:53:07 -0800125 }
126
Martin Roth9b1b3352016-02-24 12:27:06 -0800127 }
Martin Roth4dcd13d2016-02-24 13:53:07 -0800128
Martin Roth9b1b3352016-02-24 12:27:06 -0800129 cprint(18, 18, " ");
130 cprint(19, 15, " ");
Martin Roth4dcd13d2016-02-24 13:53:07 -0800131
Martin Roth9b1b3352016-02-24 12:27:06 -0800132 for(i=0, pp=(char *)(SCREEN_ADR+(18*160)+(18*2)+1); i<40; i++, pp+=2) {
133 *pp = 0x17;
Martin Roth4dcd13d2016-02-24 13:53:07 -0800134 }
135
Martin Roth9b1b3352016-02-24 12:27:06 -0800136}
137
138
139
140static void display_init(void)
141{
142 int i;
143 volatile char *pp;
Martin Roth4dcd13d2016-02-24 13:53:07 -0800144
Martin Roth9b1b3352016-02-24 12:27:06 -0800145 /* Set HW cursor out of screen boundaries */
146 __outb(0x0F, 0x03D4);
147 __outb(0xFF, 0x03D5);
148
149 __outb(0x0E, 0x03D4);
150 __outb(0xFF, 0x03D5);
151
152
153 serial_echo_init();
Ben Gardnerb72a23a2016-03-04 17:40:38 -0600154 serial_echo_print("\x1b[LINE_SCROLL;24r"); /* Set scroll area row 7-23 */
155 serial_echo_print("\x1b[H\x1b[2J"); /* Clear Screen */
156 serial_echo_print("\x1b[37m\x1b[44m");
157 serial_echo_print("\x1b[0m");
158 serial_echo_print("\x1b[37m\x1b[44m");
Martin Roth9b1b3352016-02-24 12:27:06 -0800159
160 /* Clear screen & set background to blue */
161 for(i=0, pp=(char *)(SCREEN_ADR); i<80*24; i++) {
162 *pp++ = ' ';
163 *pp++ = 0x17;
164 }
165
166 /* Make the name background green */
167 for(i=0, pp=(char *)(SCREEN_ADR+1); i<TITLE_WIDTH; i++, pp+=2) {
168 *pp = 0x20;
169 }
Martin Roth48c7f182016-02-25 16:24:37 -0800170 cprint(0, 0, MEMTEST_VERSION_STRING);
Martin Roth9b1b3352016-02-24 12:27:06 -0800171
172 /* Set Blinking "+" */
Martin Roth48c7f182016-02-25 16:24:37 -0800173 pp=(char *)(SCREEN_ADR+1 + (MEMTEST_PLUS_LOCATION * 2));
174 *pp = 0xA4;
Martin Roth9b1b3352016-02-24 12:27:06 -0800175
176 /* Do reverse video for the bottom display line */
177 for(i=0, pp=(char *)(SCREEN_ADR+1+(24 * 160)); i<80; i++, pp+=2) {
178 *pp = 0x71;
179 }
180
Ben Gardnerb72a23a2016-03-04 17:40:38 -0600181 serial_echo_print("\x1b[0m");
Martin Roth9b1b3352016-02-24 12:27:06 -0800182}
183
184/*
185 * Initialize test, setup screen and find out how much memory there is.
186 */
187void init(void)
188{
189 int i;
Martin Roth4dcd13d2016-02-24 13:53:07 -0800190
Martin Roth9b1b3352016-02-24 12:27:06 -0800191 outb(0x8, 0x3f2); /* Kill Floppy Motor */
192
193 /* Turn on cache */
194 set_cache(1);
195
196 /* Setup the display */
197 display_init();
Martin Roth4dcd13d2016-02-24 13:53:07 -0800198
Martin Roth9b1b3352016-02-24 12:27:06 -0800199 cprint(5, 60, "| Time: 0:00:00");
200 cprint(1, COL_MID,"Pass %");
201 cprint(2, COL_MID,"Test %");
202 cprint(3, COL_MID,"Test #");
203 cprint(4, COL_MID,"Testing: ");
204 cprint(5, COL_MID,"Pattern: ");
205 cprint(1, 0, "CLK: (32b Mode)");
206 cprint(2, 0, "L1 Cache: Unknown ");
207 cprint(3, 0, "L2 Cache: Unknown ");
208 cprint(4, 0, "L3 Cache: None ");
209 cprint(5, 0, "Memory : ");
210 cprint(6, 0, "------------------------------------------------------------------------------");
211 cprint(7, 0, "Core#:");
212 cprint(8, 0, "State:");
213 cprint(9, 0, "Cores: Active / Total (Run: All) | Pass: 0 Errors: 0 ");
214 cprint(10, 0, "------------------------------------------------------------------------------");
215
Martin Roth4dcd13d2016-02-24 13:53:07 -0800216 /*
Martin Roth9b1b3352016-02-24 12:27:06 -0800217 for(i=0, pp=(char *)(SCREEN_ADR+(5*160)+(53*2)+1); i<20; i++, pp+=2) {
218 *pp = 0x92;
219 }
220
221 for(i=0, pp=(char *)(SCREEN_ADR+0*160+1); i<80; i++, pp+=2) {
222 *pp = 0x47;
223 }
224 */
Martin Roth4dcd13d2016-02-24 13:53:07 -0800225
Martin Roth9b1b3352016-02-24 12:27:06 -0800226 cprint(7, 39, "| Chipset : Unknown");
227 cprint(8, 39, "| Memory Type : Unknown");
Martin Roth4dcd13d2016-02-24 13:53:07 -0800228
Martin Roth9b1b3352016-02-24 12:27:06 -0800229
230 for(i=0; i < 6; i++) {
231 cprint(i, COL_MID-2, "| ");
232 }
Martin Roth4dcd13d2016-02-24 13:53:07 -0800233
Martin Roth9b1b3352016-02-24 12:27:06 -0800234 footer();
235
236 aprint(5, 10, v->test_pages);
237
238 v->pass = 0;
239 v->msg_line = 0;
240 v->ecount = 0;
241 v->ecc_ecount = 0;
242 v->testsel = -1;
243 v->msg_line = LINE_SCROLL-1;
244 v->scroll_start = v->msg_line * 160;
245 v->erri.low_addr.page = 0x7fffffff;
246 v->erri.low_addr.offset = 0xfff;
247 v->erri.high_addr.page = 0;
248 v->erri.high_addr.offset = 0;
249 v->erri.min_bits = 32;
250 v->erri.max_bits = 0;
251 v->erri.min_bits = 32;
252 v->erri.max_bits = 0;
253 v->erri.maxl = 0;
254 v->erri.cor_err = 0;
255 v->erri.ebits = 0;
256 v->erri.hdr_flag = 0;
257 v->erri.tbits = 0;
258 for (i=0; tseq[i].msg != NULL; i++) {
259 tseq[i].errors = 0;
260 }
261 if (dmi_initialized) {
262 for (i=0; i < MAX_DMI_MEMDEVS; i++){
263 if (dmi_err_cnts[i] > 0) {
264 dmi_err_cnts[i] = 0;
265 }
266 }
267 }
Martin Roth4dcd13d2016-02-24 13:53:07 -0800268
Martin Roth9b1b3352016-02-24 12:27:06 -0800269 /* setup beep mode */
270 beepmode = BEEP_MODE;
Martin Roth4dcd13d2016-02-24 13:53:07 -0800271
Martin Roth9b1b3352016-02-24 12:27:06 -0800272 /* Get the cpu and cache information */
273 get_cpuid();
274
275 /* setup pci */
Martin Roth4dcd13d2016-02-24 13:53:07 -0800276 pci_init();
Martin Roth9b1b3352016-02-24 12:27:06 -0800277
Martin Roth4dcd13d2016-02-24 13:53:07 -0800278 get_cache_size();
Martin Roth9b1b3352016-02-24 12:27:06 -0800279
280 cpu_type();
281
282 cpu_cache_speed();
283
Martin Roth4dcd13d2016-02-24 13:53:07 -0800284 /* Check fail safe */
Martin Roth9b1b3352016-02-24 12:27:06 -0800285 failsafe(5000, 0x3B);
286
287 /* Initalize SMP */
288 initialise_cpus();
Martin Roth4dcd13d2016-02-24 13:53:07 -0800289
Martin Roth9b1b3352016-02-24 12:27:06 -0800290 for (i = 0; i <num_cpus; i++) {
291 dprint(7, i+7, i%10, 1, 0);
292 cprint(8, i+7, "S");
293 }
294
295 dprint(9, 19, num_cpus, 2, 0);
Martin Roth4dcd13d2016-02-24 13:53:07 -0800296
Martin Roth9b1b3352016-02-24 12:27:06 -0800297 if((v->fail_safe & 3) == 2)
298 {
299 cprint(LINE_CPU,9, "(SMP: Disabled)");
300 cprint(LINE_RAM,9, "Running...");
301 }
Martin Roth4dcd13d2016-02-24 13:53:07 -0800302 // dprint(10, 5, found_cpus, 2, 0);
Martin Roth9b1b3352016-02-24 12:27:06 -0800303
304 /* Find Memory Specs */
Martin Roth4dcd13d2016-02-24 13:53:07 -0800305 if(v->fail_safe & 1)
306 {
Martin Roth9b1b3352016-02-24 12:27:06 -0800307 cprint(LINE_CPU, COL_SPEC, " **** FAIL SAFE **** FAIL SAFE **** ");
308 cprint(LINE_RAM, COL_SPEC, " No detection, same reliability ");
309 } else {
310 find_controller();
311 get_spd_spec();
312 if(num_cpus <= 16 && !(v->fail_safe & 4)) { coretemp(); }
313 }
Martin Roth4dcd13d2016-02-24 13:53:07 -0800314
Martin Roth9b1b3352016-02-24 12:27:06 -0800315 if(v->check_temp > 0 && !(v->fail_safe & 4))
316 {
317 cprint(LINE_CPU, 26, "| CPU Temp");
Martin Rothfb4ff612016-02-25 16:27:04 -0800318 cprint(LINE_CPU+1, 26, "| C");
Martin Roth9b1b3352016-02-24 12:27:06 -0800319 }
Martin Roth4dcd13d2016-02-24 13:53:07 -0800320
Martin Roth9b1b3352016-02-24 12:27:06 -0800321 beep(600);
322 beep(1000);
Martin Roth4dcd13d2016-02-24 13:53:07 -0800323
Martin Roth9b1b3352016-02-24 12:27:06 -0800324 /* Record the start time */
325 asm __volatile__ ("rdtsc":"=a" (v->startl),"=d" (v->starth));
326 v->snapl = v->startl;
327 v->snaph = v->starth;
328 if (l1_cache == 0) { l1_cache = 64; }
329 if (l2_cache == 0) { l1_cache = 512; }
330 v->printmode=PRINTMODE_ADDRESSES;
331 v->numpatn=0;
332}
333
334/* Get cache sizes for most AMD and Intel CPUs, exceptions for old CPUs are
335 * handled in CPU detection */
336void get_cache_size()
337{
338 int i, j, n, size;
339 unsigned int v[4];
340 unsigned char *dp = (unsigned char *)v;
341 struct cpuid4_eax *eax = (struct cpuid4_eax *)&v[0];
342 struct cpuid4_ebx *ebx = (struct cpuid4_ebx *)&v[1];
343 struct cpuid4_ecx *ecx = (struct cpuid4_ecx *)&v[2];
344
345 switch(cpu_id.vend_id.char_array[0]) {
346 /* AMD Processors */
347 case 'A':
348 //l1_cache = cpu_id.cache_info.amd.l1_i_sz;
349 l1_cache = cpu_id.cache_info.amd.l1_d_sz;
350 l2_cache = cpu_id.cache_info.amd.l2_sz;
351 l3_cache = cpu_id.cache_info.amd.l3_sz;
352 l3_cache *= 512;
353 break;
354 case 'G':
355 /* Intel Processors */
356 l1_cache = 0;
357 l2_cache = 0;
358 l3_cache = 0;
359
360 /* Use CPUID(4) if it is available */
361 if (cpu_id.max_cpuid > 3) {
362
363 /* figure out how many cache leaves */
364 n = -1;
Martin Roth4dcd13d2016-02-24 13:53:07 -0800365 do
Martin Roth9b1b3352016-02-24 12:27:06 -0800366 {
367 ++n;
368 /* Do cpuid(4) loop to find out num_cache_leaves */
369 cpuid_count(4, n, &v[0], &v[1], &v[2], &v[3]);
370 } while ((eax->ctype) != 0);
371
372 /* loop through all of the leaves */
Martin Roth4dcd13d2016-02-24 13:53:07 -0800373 for (i=0; i<n; i++)
Martin Roth9b1b3352016-02-24 12:27:06 -0800374 {
375 cpuid_count(4, i, &v[0], &v[1], &v[2], &v[3]);
376
377 /* Check for a valid cache type */
Martin Roth4dcd13d2016-02-24 13:53:07 -0800378 if (eax->ctype == 1 || eax->ctype == 3)
Martin Roth9b1b3352016-02-24 12:27:06 -0800379 {
380
381 /* Compute the cache size */
382 size = (ecx->number_of_sets + 1) *
383 (ebx->coherency_line_size + 1) *
384 (ebx->physical_line_partition + 1) *
385 (ebx->ways_of_associativity + 1);
386 size /= 1024;
387
Martin Roth4dcd13d2016-02-24 13:53:07 -0800388 switch (eax->level)
Martin Roth9b1b3352016-02-24 12:27:06 -0800389 {
390 case 1:
391 l1_cache += size;
392 break;
393 case 2:
394 l2_cache += size;
395 break;
396 case 3:
397 l3_cache += size;
398 break;
399 }
400 }
401 }
402 return;
403 }
404
405 /* No CPUID(4) so we use the older CPUID(2) method */
406 /* Get number of times to iterate */
407 cpuid(2, &v[0], &v[1], &v[2], &v[3]);
408 n = v[0] & 0xff;
409 for (i=0 ; i<n ; i++) {
410 cpuid(2, &v[0], &v[1], &v[2], &v[3]);
411
412 /* If bit 31 is set, this is an unknown format */
413 for (j=0 ; j<3 ; j++) {
414 if (v[j] & (1 << 31)) {
415 v[j] = 0;
416 }
417 }
418
419 /* Byte 0 is level count, not a descriptor */
420 for (j = 1 ; j < 16 ; j++) {
421 switch(dp[j]) {
422 case 0x6:
423 case 0xa:
424 case 0x66:
425 l1_cache += 8;
426 break;
427 case 0x8:
428 case 0xc:
429 case 0xd:
430 case 0x60:
431 case 0x67:
432 l1_cache += 16;
433 break;
434 case 0xe:
435 l1_cache += 24;
436 break;
437 case 0x9:
438 case 0x2c:
439 case 0x30:
440 case 0x68:
441 l1_cache += 32;
442 break;
443 case 0x39:
444 case 0x3b:
445 case 0x41:
446 case 0x79:
447 l2_cache += 128;
448 break;
449 case 0x3a:
450 l2_cache += 192;
451 break;
452 case 0x21:
453 case 0x3c:
454 case 0x3f:
455 case 0x42:
456 case 0x7a:
457 case 0x82:
458 l2_cache += 256;
459 break;
460 case 0x3d:
461 l2_cache += 384;
462 break;
463 case 0x3e:
464 case 0x43:
465 case 0x7b:
466 case 0x7f:
467 case 0x80:
468 case 0x83:
469 case 0x86:
470 l2_cache += 512;
471 break;
472 case 0x44:
473 case 0x78:
474 case 0x7c:
475 case 0x84:
476 case 0x87:
477 l2_cache += 1024;
478 break;
479 case 0x45:
480 case 0x7d:
481 case 0x85:
482 l2_cache += 2048;
483 break;
484 case 0x48:
485 l2_cache += 3072;
486 break;
487 case 0x4e:
488 l2_cache += 6144;
489 break;
490 case 0x23:
491 case 0xd0:
492 l3_cache += 512;
493 break;
494 case 0xd1:
495 case 0xd6:
496 l3_cache += 1024;
497 break;
498 case 0x25:
499 case 0xd2:
500 case 0xd7:
501 case 0xdc:
502 case 0xe2:
503 l3_cache += 2048;
504 break;
505 case 0x29:
506 case 0x46:
507 case 0x49:
508 case 0xd8:
509 case 0xdd:
510 case 0xe3:
511 l3_cache += 4096;
512 break;
513 case 0x4a:
514 l3_cache += 6144;
515 break;
516 case 0x47:
517 case 0x4b:
518 case 0xde:
519 case 0xe4:
520 l3_cache += 8192;
Martin Roth4dcd13d2016-02-24 13:53:07 -0800521 break;
Martin Roth9b1b3352016-02-24 12:27:06 -0800522 case 0x4c:
523 case 0xea:
524 l3_cache += 12288;
Martin Roth4dcd13d2016-02-24 13:53:07 -0800525 break;
Martin Roth9b1b3352016-02-24 12:27:06 -0800526 case 0x4d:
527 l3_cache += 16384;
Martin Roth4dcd13d2016-02-24 13:53:07 -0800528 break;
Martin Roth9b1b3352016-02-24 12:27:06 -0800529 case 0xeb:
530 l3_cache += 18432;
Martin Roth4dcd13d2016-02-24 13:53:07 -0800531 break;
Martin Roth9b1b3352016-02-24 12:27:06 -0800532 case 0xec:
533 l3_cache += 24576;
Martin Roth4dcd13d2016-02-24 13:53:07 -0800534 break;
Martin Roth9b1b3352016-02-24 12:27:06 -0800535 } /* end switch */
536 } /* end for 1-16 */
537 } /* end for 0 - n */
538 }
539}
540
541/*
542 * Find IMC type and set global variables accordingly
543 */
544void detect_imc(void)
545{
546 // Check AMD IMC
Martin Roth4dcd13d2016-02-24 13:53:07 -0800547 if(cpu_id.vend_id.char_array[0] == 'A' && cpu_id.vers.bits.family == 0xF)
Martin Roth9b1b3352016-02-24 12:27:06 -0800548 {
549 switch(cpu_id.vers.bits.extendedFamily)
550 {
551 case 0x0:
552 imc_type = 0x0100; // Old K8
553 break;
554 case 0x1:
555 case 0x2:
556 imc_type = 0x0101; // K10 (Family 10h & 11h)
557 break;
558 case 0x3:
559 imc_type = 0x0102; // A-Series APU (Family 12h)
560 break;
561 case 0x5:
562 imc_type = 0x0103; // C- / E- / Z- Series APU (Family 14h)
Martin Roth4dcd13d2016-02-24 13:53:07 -0800563 break;
Martin Roth9b1b3352016-02-24 12:27:06 -0800564 case 0x6:
565 imc_type = 0x0104; // FX Series (Family 15h)
Martin Roth4dcd13d2016-02-24 13:53:07 -0800566 break;
Martin Roth9b1b3352016-02-24 12:27:06 -0800567 case 0x7:
568 imc_type = 0x0105; // Kabini & related (Family 16h)
Martin Roth4dcd13d2016-02-24 13:53:07 -0800569 break;
570 }
Martin Roth9b1b3352016-02-24 12:27:06 -0800571 return;
572 }
Martin Roth4dcd13d2016-02-24 13:53:07 -0800573
574 // Check Intel IMC
575 if(cpu_id.vend_id.char_array[0] == 'G' && cpu_id.vers.bits.family == 6 && cpu_id.vers.bits.extendedModel)
576 {
Martin Roth9b1b3352016-02-24 12:27:06 -0800577 switch(cpu_id.vers.bits.model)
578 {
579 case 0x5:
580 if(cpu_id.vers.bits.extendedModel == 2) { imc_type = 0x0003; } // Core i3/i5 1st Gen 45 nm (NHM)
581 if(cpu_id.vers.bits.extendedModel == 3) { v->fail_safe |= 4; } // Atom Clover Trail
582 if(cpu_id.vers.bits.extendedModel == 4) { imc_type = 0x0007; } // HSW-ULT
583 break;
584 case 0x6:
Martin Roth4dcd13d2016-02-24 13:53:07 -0800585 if(cpu_id.vers.bits.extendedModel == 3) {
Martin Roth9b1b3352016-02-24 12:27:06 -0800586 imc_type = 0x0009; // Atom Cedar Trail
587 v->fail_safe |= 4; // Disable Core temp
588 }
589 break;
Martin Roth3ebe90b2016-03-03 20:36:04 -0700590 case 0x7:
591 if(cpu_id.vers.bits.extendedModel == 3) {
592 imc_type = 0x000A; // Atom Bay Trail
593 }
594 break;
Martin Roth9b1b3352016-02-24 12:27:06 -0800595 case 0xA:
596 switch(cpu_id.vers.bits.extendedModel)
597 {
598 case 0x1:
599 imc_type = 0x0001; // Core i7 1st Gen 45 nm (NHME)
600 break;
601 case 0x2:
602 imc_type = 0x0004; // Core 2nd Gen (SNB)
Martin Roth4dcd13d2016-02-24 13:53:07 -0800603 break;
Martin Roth9b1b3352016-02-24 12:27:06 -0800604 case 0x3:
Martin Roth4dcd13d2016-02-24 13:53:07 -0800605 imc_type = 0x0006; // Core 3nd Gen (IVB)
Martin Roth9b1b3352016-02-24 12:27:06 -0800606 break;
607 }
608 break;
609 case 0xC:
610 switch(cpu_id.vers.bits.extendedModel)
611 {
612 case 0x1:
Martin Roth4dcd13d2016-02-24 13:53:07 -0800613 if(cpu_id.vers.bits.stepping > 9) { imc_type = 0x0008; } // Atom PineView
Martin Roth9b1b3352016-02-24 12:27:06 -0800614 v->fail_safe |= 4; // Disable Core temp
Martin Roth4dcd13d2016-02-24 13:53:07 -0800615 break;
Martin Roth9b1b3352016-02-24 12:27:06 -0800616 case 0x2:
Martin Roth4dcd13d2016-02-24 13:53:07 -0800617 imc_type = 0x0002; // Core i7 1st Gen 32 nm (WMR)
618 break;
Martin Roth9b1b3352016-02-24 12:27:06 -0800619 case 0x3:
Martin Roth4dcd13d2016-02-24 13:53:07 -0800620 imc_type = 0x0007; // Core 4nd Gen (HSW)
Martin Roth9b1b3352016-02-24 12:27:06 -0800621 break;
622 }
Martin Roth4dcd13d2016-02-24 13:53:07 -0800623 break;
Martin Roth9b1b3352016-02-24 12:27:06 -0800624 case 0xD:
625 imc_type = 0x0005; // SNB-E
Martin Roth4dcd13d2016-02-24 13:53:07 -0800626 break;
Martin Roth9b1b3352016-02-24 12:27:06 -0800627 case 0xE:
628 imc_type = 0x0001; // Core i7 1st Gen 45 nm (NHM)
Martin Roth4dcd13d2016-02-24 13:53:07 -0800629 break;
Martin Roth9b1b3352016-02-24 12:27:06 -0800630 }
Martin Roth4dcd13d2016-02-24 13:53:07 -0800631
Martin Roth9b1b3352016-02-24 12:27:06 -0800632 if(imc_type) { tsc_invariable = 1; }
633 return;
634 }
635}
636
637void smp_default_mode(void)
638{
639 int i, result;
640 char *cpupsn = cpu_id.brand_id.char_array;
641 char *disabledcpu[] = { "Opteron", "Xeon", "Genuine Intel" };
Martin Roth4dcd13d2016-02-24 13:53:07 -0800642
643 for(i = 0; i < 3; i++)
Martin Roth9b1b3352016-02-24 12:27:06 -0800644 {
645 result = strstr(cpupsn , disabledcpu[i]);
646 if(result != -1) { v->fail_safe |= 0b10; }
647 }
Martin Roth4dcd13d2016-02-24 13:53:07 -0800648
Martin Roth9b1b3352016-02-24 12:27:06 -0800649 // For 5.01 release, SMP disabled by defualt by config.h toggle
650 if(CONSERVATIVE_SMP) { v->fail_safe |= 0b10; }
Martin Roth4dcd13d2016-02-24 13:53:07 -0800651
Martin Roth9b1b3352016-02-24 12:27:06 -0800652}
653
654/*
655 * Find CPU type
656 */
657void cpu_type(void)
658{
659 /* If we can get a brand string use it, and we are done */
660 if (cpu_id.max_xcpuid >= 0x80000004) {
661 cprint(0, COL_MID, cpu_id.brand_id.char_array);
662 //If we have a brand string, maybe we have an IMC. Check that.
663 detect_imc();
Martin Roth4dcd13d2016-02-24 13:53:07 -0800664 smp_default_mode();
Martin Roth9b1b3352016-02-24 12:27:06 -0800665 return;
666 }
667
Martin Roth4dcd13d2016-02-24 13:53:07 -0800668 /* The brand string is not available so we need to figure out
Martin Roth9b1b3352016-02-24 12:27:06 -0800669 * CPU what we have */
670 switch(cpu_id.vend_id.char_array[0]) {
671 /* AMD Processors */
672 case 'A':
673 switch(cpu_id.vers.bits.family) {
674 case 4:
675 switch(cpu_id.vers.bits.model) {
676 case 3:
677 cprint(0, COL_MID, "AMD 486DX2");
678 break;
679 case 7:
680 cprint(0, COL_MID, "AMD 486DX2-WB");
681 break;
682 case 8:
683 cprint(0, COL_MID, "AMD 486DX4");
684 break;
685 case 9:
686 cprint(0, COL_MID, "AMD 486DX4-WB");
687 break;
688 case 14:
689 cprint(0, COL_MID, "AMD 5x86-WT");
690 break;
691 case 15:
692 cprint(0, COL_MID, "AMD 5x86-WB");
693 break;
694 }
695 /* Since we can't get CPU speed or cache info return */
696 return;
697 case 5:
698 switch(cpu_id.vers.bits.model) {
699 case 0:
700 case 1:
701 case 2:
702 case 3:
703 cprint(0, COL_MID, "AMD K5");
704 l1_cache = 8;
705 break;
706 case 6:
707 case 7:
708 cprint(0, COL_MID, "AMD K6");
709 break;
710 case 8:
711 cprint(0, COL_MID, "AMD K6-2");
712 break;
713 case 9:
714 cprint(0, COL_MID, "AMD K6-III");
715 break;
Martin Roth4dcd13d2016-02-24 13:53:07 -0800716 case 13:
717 cprint(0, COL_MID, "AMD K6-III+");
Martin Roth9b1b3352016-02-24 12:27:06 -0800718 break;
719 }
720 break;
721 case 6:
722
723 switch(cpu_id.vers.bits.model) {
724 case 1:
725 cprint(0, COL_MID, "AMD Athlon (0.25)");
726 break;
727 case 2:
728 case 4:
729 cprint(0, COL_MID, "AMD Athlon (0.18)");
730 break;
731 case 6:
732 if (l2_cache == 64) {
733 cprint(0, COL_MID, "AMD Duron (0.18)");
734 } else {
735 cprint(0, COL_MID, "Athlon XP (0.18)");
736 }
737 break;
738 case 8:
739 case 10:
740 if (l2_cache == 64) {
741 cprint(0, COL_MID, "AMD Duron (0.13)");
742 } else {
743 cprint(0, COL_MID, "Athlon XP (0.13)");
744 }
745 break;
746 case 3:
747 case 7:
748 cprint(0, COL_MID, "AMD Duron");
749 /* Duron stepping 0 CPUID for L2 is broken */
750 /* (AMD errata T13)*/
751 if (cpu_id.vers.bits.stepping == 0) { /* stepping 0 */
752 /* Hard code the right L2 size */
753 l2_cache = 64;
754 } else {
755 }
756 break;
757 }
758 break;
759
760 /* All AMD family values >= 10 have the Brand ID
761 * feature so we don't need to find the CPU type */
762 }
763 break;
764
765 /* Intel or Transmeta Processors */
766 case 'G':
767 if ( cpu_id.vend_id.char_array[7] == 'T' ) { /* GenuineTMx86 */
768 if (cpu_id.vers.bits.family == 5) {
769 cprint(0, COL_MID, "TM 5x00");
770 } else if (cpu_id.vers.bits.family == 15) {
771 cprint(0, COL_MID, "TM 8x00");
772 }
773 l1_cache = cpu_id.cache_info.ch[3] + cpu_id.cache_info.ch[7];
774 l2_cache = (cpu_id.cache_info.ch[11]*256) + cpu_id.cache_info.ch[10];
775 } else { /* GenuineIntel */
776 if (cpu_id.vers.bits.family == 4) {
777 switch(cpu_id.vers.bits.model) {
778 case 0:
779 case 1:
780 cprint(0, COL_MID, "Intel 486DX");
781 break;
782 case 2:
783 cprint(0, COL_MID, "Intel 486SX");
784 break;
785 case 3:
786 cprint(0, COL_MID, "Intel 486DX2");
787 break;
788 case 4:
789 cprint(0, COL_MID, "Intel 486SL");
790 break;
791 case 5:
792 cprint(0, COL_MID, "Intel 486SX2");
793 break;
794 case 7:
795 cprint(0, COL_MID, "Intel 486DX2-WB");
796 break;
797 case 8:
798 cprint(0, COL_MID, "Intel 486DX4");
799 break;
800 case 9:
801 cprint(0, COL_MID, "Intel 486DX4-WB");
802 break;
803 }
804 /* Since we can't get CPU speed or cache info return */
805 return;
806 }
807
808
809 switch(cpu_id.vers.bits.family) {
810 case 5:
811 switch(cpu_id.vers.bits.model) {
812 case 0:
813 case 1:
814 case 2:
815 case 3:
816 case 7:
817 cprint(0, COL_MID, "Pentium");
818 if (l1_cache == 0) {
819 l1_cache = 8;
820 }
821 break;
822 case 4:
823 case 8:
824 cprint(0, COL_MID, "Pentium-MMX");
825 if (l1_cache == 0) {
826 l1_cache = 16;
827 }
828 break;
829 }
830 break;
831 case 6:
832 switch(cpu_id.vers.bits.model) {
833 case 0:
834 case 1:
835 cprint(0, COL_MID, "Pentium Pro");
836 break;
837 case 3:
838 case 4:
839 cprint(0, COL_MID, "Pentium II");
840 break;
841 case 5:
842 if (l2_cache == 0) {
843 cprint(0, COL_MID, "Celeron");
844 } else {
845 cprint(0, COL_MID, "Pentium II");
846 }
847 break;
848 case 6:
849 if (l2_cache == 128) {
850 cprint(0, COL_MID, "Celeron");
851 } else {
852 cprint(0, COL_MID, "Pentium II");
853 }
854 }
855 break;
856 case 7:
857 case 8:
858 case 11:
859 if (l2_cache == 128) {
860 cprint(0, COL_MID, "Celeron");
861 } else {
862 cprint(0, COL_MID, "Pentium III");
863 }
864 break;
865 case 9:
866 if (l2_cache == 512) {
867 cprint(0, COL_MID, "Celeron M (0.13)");
868 } else {
869 cprint(0, COL_MID, "Pentium M (0.13)");
870 }
871 break;
872 case 10:
873 cprint(0, COL_MID, "Pentium III Xeon");
874 break;
875 case 12:
876 l1_cache = 24;
877 cprint(0, COL_MID, "Atom (0.045)");
Martin Roth4dcd13d2016-02-24 13:53:07 -0800878 break;
Martin Roth9b1b3352016-02-24 12:27:06 -0800879 case 13:
880 if (l2_cache == 1024) {
881 cprint(0, COL_MID, "Celeron M (0.09)");
882 } else {
883 cprint(0, COL_MID, "Pentium M (0.09)");
884 }
885 break;
886 case 14:
887 cprint(0, COL_MID, "Intel Core");
Martin Roth4dcd13d2016-02-24 13:53:07 -0800888 break;
Martin Roth9b1b3352016-02-24 12:27:06 -0800889 case 15:
890 if (l2_cache == 1024) {
891 cprint(0, COL_MID, "Pentium E");
892 } else {
893 cprint(0, COL_MID, "Intel Core 2");
894 }
895 break;
896 }
897 break;
898 case 15:
899 switch(cpu_id.vers.bits.model) {
900 case 0:
Martin Roth4dcd13d2016-02-24 13:53:07 -0800901 case 1:
Martin Roth9b1b3352016-02-24 12:27:06 -0800902 case 2:
903 if (l2_cache == 128) {
904 cprint(0, COL_MID, "Celeron");
905 } else {
906 cprint(0, COL_MID, "Pentium 4");
907 }
908 break;
909 case 3:
910 case 4:
911 if (l2_cache == 256) {
912 cprint(0, COL_MID, "Celeron (0.09)");
913 } else {
914 cprint(0, COL_MID, "Pentium 4 (0.09)");
915 }
916 break;
917 case 6:
918 cprint(0, COL_MID, "Pentium D (65nm)");
919 break;
920 default:
921 cprint(0, COL_MID, "Unknown Intel");
922 break;
Martin Roth9b1b3352016-02-24 12:27:06 -0800923 }
924
925 }
926 break;
927
928 /* VIA/Cyrix/Centaur Processors with CPUID */
929 case 'C':
930 if ( cpu_id.vend_id.char_array[1] == 'e' ) { /* CentaurHauls */
931 l1_cache = cpu_id.cache_info.ch[3] + cpu_id.cache_info.ch[7];
932 l2_cache = cpu_id.cache_info.ch[11];
933 switch(cpu_id.vers.bits.family){
934 case 5:
935 cprint(0, COL_MID, "Centaur 5x86");
936 break;
937 case 6: // VIA C3
938 switch(cpu_id.vers.bits.model){
939 default:
940 if (cpu_id.vers.bits.stepping < 8) {
941 cprint(0, COL_MID, "VIA C3 Samuel2");
942 } else {
943 cprint(0, COL_MID, "VIA C3 Eden");
944 }
945 break;
946 case 10:
947 cprint(0, COL_MID, "VIA C7 (C5J)");
948 l1_cache = 64;
949 l2_cache = 128;
950 break;
951 case 13:
952 cprint(0, COL_MID, "VIA C7 (C5R)");
953 l1_cache = 64;
954 l2_cache = 128;
955 break;
956 case 15:
957 cprint(0, COL_MID, "VIA Isaiah (CN)");
958 l1_cache = 64;
959 l2_cache = 128;
960 break;
961 }
962 }
963 } else { /* CyrixInstead */
964 switch(cpu_id.vers.bits.family) {
965 case 5:
966 switch(cpu_id.vers.bits.model) {
967 case 0:
968 cprint(0, COL_MID, "Cyrix 6x86MX/MII");
969 break;
970 case 4:
971 cprint(0, COL_MID, "Cyrix GXm");
972 break;
973 }
974 return;
975
976 case 6: // VIA C3
977 switch(cpu_id.vers.bits.model) {
978 case 6:
979 cprint(0, COL_MID, "Cyrix III");
980 break;
981 case 7:
982 if (cpu_id.vers.bits.stepping < 8) {
983 cprint(0, COL_MID, "VIA C3 Samuel2");
984 } else {
985 cprint(0, COL_MID, "VIA C3 Ezra-T");
986 }
987 break;
988 case 8:
989 cprint(0, COL_MID, "VIA C3 Ezra-T");
990 break;
991 case 9:
992 cprint(0, COL_MID, "VIA C3 Nehemiah");
993 break;
994 }
995 // L1 = L2 = 64 KB from Cyrix III to Nehemiah
996 l1_cache = 64;
997 l2_cache = 64;
998 break;
999 }
1000 }
1001 break;
1002 /* Unknown processor */
1003 default:
1004 /* Make a guess at the family */
1005 switch(cpu_id.vers.bits.family) {
1006 case 5:
1007 cprint(0, COL_MID, "586");
1008 case 6:
1009 cprint(0, COL_MID, "686");
1010 default:
1011 cprint(0, COL_MID, "Unidentified Processor");
1012 }
1013 }
1014}
1015
1016#define STEST_ADDR 0x100000 /* Measure memory speed starting at 1MB */
1017
1018/* Measure and display CPU and cache sizes and speeds */
1019void cpu_cache_speed()
1020{
1021 int i, off = 4;
1022 ulong speed;
1023
1024
1025 /* Print CPU speed */
1026 if ((speed = cpuspeed()) > 0) {
1027 if (speed < 999499) {
1028 speed += 50; /* for rounding */
1029 cprint(1, off, " . MHz");
1030 dprint(1, off+1, speed/1000, 3, 1);
1031 dprint(1, off+5, (speed/100)%10, 1, 0);
1032 } else {
1033 speed += 500; /* for rounding */
1034 cprint(1, off, " MHz");
1035 dprint(1, off, speed/1000, 5, 0);
1036 }
1037 extclock = speed;
1038 }
1039
1040 /* Print out L1 cache info */
1041 /* To measure L1 cache speed we use a block size that is 1/4th */
1042 /* of the total L1 cache size since half of it is for instructions */
1043 if (l1_cache) {
1044 cprint(2, 0, "L1 Cache: K ");
1045 dprint(2, 11, l1_cache, 3, 0);
1046 if ((speed=memspeed(STEST_ADDR, (l1_cache/2)*1024, 200))) {
1047 cprint(2, 16, " MB/s");
1048 dprint(2, 16, speed, 6, 0);
1049 }
1050 }
1051
1052 /* Print out L2 cache info */
1053 /* We measure the L2 cache speed by using a block size that is */
1054 /* the size of the L1 cache. We have to fudge if the L1 */
1055 /* cache is bigger than the L2 */
1056 if (l2_cache) {
1057 cprint(3, 0, "L2 Cache: K ");
1058 dprint(3, 10, l2_cache, 4, 0);
1059
1060 if (l2_cache < l1_cache) {
1061 i = l1_cache / 4 + l2_cache / 4;
1062 } else {
1063 i = l1_cache;
1064 }
1065 if ((speed=memspeed(STEST_ADDR, i*1024, 200))) {
1066 cprint(3, 16, " MB/s");
1067 dprint(3, 16, speed, 6, 0);
1068 }
1069 }
1070 /* Print out L3 cache info */
1071 /* We measure the L3 cache speed by using a block size that is */
1072 /* 2X the size of the L2 cache. */
1073
Martin Roth4dcd13d2016-02-24 13:53:07 -08001074 if (l3_cache)
Martin Roth9b1b3352016-02-24 12:27:06 -08001075 {
1076 cprint(4, 0, "L3 Cache: K ");
1077 aprint(4, 10, l3_cache/4);
1078 //dprint(4, 10, l3_cache, 4, 0);
Martin Roth4dcd13d2016-02-24 13:53:07 -08001079
Martin Roth9b1b3352016-02-24 12:27:06 -08001080 i = l2_cache*2;
Martin Roth4dcd13d2016-02-24 13:53:07 -08001081
Martin Roth9b1b3352016-02-24 12:27:06 -08001082 if ((speed=memspeed(STEST_ADDR, i*1024, 150))) {
1083 cprint(4, 16, " MB/s");
1084 dprint(4, 16, speed, 6, 0);
1085 }
1086 }
1087}
1088
1089/* Measure and display memory speed, multitasked using all CPUs */
1090ulong spd[MAX_CPUS];
1091void get_mem_speed(int me, int ncpus)
1092{
1093 int i;
1094 ulong speed=0;
1095
Martin Roth4dcd13d2016-02-24 13:53:07 -08001096 /* Determine memory speed. To find the memory speed we use
Martin Roth9b1b3352016-02-24 12:27:06 -08001097 * A block size that is the sum of all the L1, L2 & L3 caches
1098 * in all cpus * 6 */
1099 i = (l3_cache + l2_cache + l1_cache) * 4;
1100
1101 /* Make sure that we have enough memory to do the test */
1102 /* If not use all we have */
1103 if ((1 + (i * 2)) > (v->plim_upper << 2)) {
1104 i = ((v->plim_upper <<2) - 1) / 2;
1105 }
Martin Roth4dcd13d2016-02-24 13:53:07 -08001106
Martin Roth9b1b3352016-02-24 12:27:06 -08001107 speed = memspeed(STEST_ADDR, i * 1024, 100);
1108 cprint(5, 16, " MB/s");
1109 dprint(5, 16, speed, 6, 0);
Martin Roth4dcd13d2016-02-24 13:53:07 -08001110
Martin Roth9b1b3352016-02-24 12:27:06 -08001111}
1112
1113/* #define TICKS 5 * 11832 (count = 6376)*/
1114/* #define TICKS (65536 - 12752) */
1115#define TICKS 59659 /* 50 ms */
1116
1117/* Returns CPU clock in khz */
1118ulong stlow, sthigh;
1119static int cpuspeed(void)
1120{
1121 int loops;
1122 ulong end_low, end_high;
1123
1124 if (cpu_id.fid.bits.rdtsc == 0 ) {
1125 return(-1);
1126 }
1127
1128 /* Setup timer */
1129 outb((inb(0x61) & ~0x02) | 0x01, 0x61);
Martin Roth4dcd13d2016-02-24 13:53:07 -08001130 outb(0xb0, 0x43);
Martin Roth9b1b3352016-02-24 12:27:06 -08001131 outb(TICKS & 0xff, 0x42);
1132 outb(TICKS >> 8, 0x42);
1133
1134 asm __volatile__ ("rdtsc":"=a" (stlow),"=d" (sthigh));
1135
1136 loops = 0;
1137 do {
1138 loops++;
1139 } while ((inb(0x61) & 0x20) == 0);
1140
1141 asm __volatile__ (
1142 "rdtsc\n\t" \
1143 "subl stlow,%%eax\n\t" \
1144 "sbbl sthigh,%%edx\n\t" \
1145 :"=a" (end_low), "=d" (end_high)
1146 );
1147
1148 /* Make sure we have a credible result */
1149 if (loops < 4 || end_low < 50000) {
1150 return(-1);
1151 }
1152 v->clks_msec = end_low/50;
1153
1154 if (tsc_invariable) end_low = correct_tsc(end_low);
1155
1156 return(v->clks_msec);
1157}
1158
1159/* Measure cache speed by copying a block of memory. */
1160/* Returned value is kbytes/second */
1161ulong memspeed(ulong src, ulong len, int iter)
1162{
1163 int i;
1164 ulong dst, wlen;
1165 ulong st_low, st_high;
1166 ulong end_low, end_high;
1167 ulong cal_low, cal_high;
1168
1169 if (cpu_id.fid.bits.rdtsc == 0 ) {
1170 return(-1);
1171 }
1172 if (len == 0) return(-2);
1173
1174 dst = src + len;
1175 wlen = len / 4; /* Length is bytes */
1176
1177 /* Calibrate the overhead with a zero word copy */
1178 asm __volatile__ ("rdtsc":"=a" (st_low),"=d" (st_high));
1179 for (i=0; i<iter; i++) {
1180 asm __volatile__ (
1181 "movl %0,%%esi\n\t" \
1182 "movl %1,%%edi\n\t" \
1183 "movl %2,%%ecx\n\t" \
1184 "cld\n\t" \
1185 "rep\n\t" \
1186 "movsl\n\t" \
1187 :: "g" (src), "g" (dst), "g" (0)
1188 : "esi", "edi", "ecx"
1189 );
1190 }
1191 asm __volatile__ ("rdtsc":"=a" (cal_low),"=d" (cal_high));
1192
1193 /* Compute the overhead time */
1194 asm __volatile__ (
1195 "subl %2,%0\n\t"
1196 "sbbl %3,%1"
1197 :"=a" (cal_low), "=d" (cal_high)
1198 :"g" (st_low), "g" (st_high),
1199 "0" (cal_low), "1" (cal_high)
1200 );
1201
1202
1203 /* Now measure the speed */
1204 /* Do the first copy to prime the cache */
1205 asm __volatile__ (
1206 "movl %0,%%esi\n\t" \
1207 "movl %1,%%edi\n\t" \
1208 "movl %2,%%ecx\n\t" \
1209 "cld\n\t" \
1210 "rep\n\t" \
1211 "movsl\n\t" \
1212 :: "g" (src), "g" (dst), "g" (wlen)
1213 : "esi", "edi", "ecx"
1214 );
1215 asm __volatile__ ("rdtsc":"=a" (st_low),"=d" (st_high));
1216 for (i=0; i<iter; i++) {
1217 asm __volatile__ (
1218 "movl %0,%%esi\n\t" \
1219 "movl %1,%%edi\n\t" \
1220 "movl %2,%%ecx\n\t" \
1221 "cld\n\t" \
1222 "rep\n\t" \
1223 "movsl\n\t" \
1224 :: "g" (src), "g" (dst), "g" (wlen)
1225 : "esi", "edi", "ecx"
1226 );
1227 }
1228 asm __volatile__ ("rdtsc":"=a" (end_low),"=d" (end_high));
1229
1230 /* Compute the elapsed time */
1231 asm __volatile__ (
1232 "subl %2,%0\n\t"
1233 "sbbl %3,%1"
1234 :"=a" (end_low), "=d" (end_high)
1235 :"g" (st_low), "g" (st_high),
1236 "0" (end_low), "1" (end_high)
1237 );
1238 /* Subtract the overhead time */
1239 asm __volatile__ (
1240 "subl %2,%0\n\t"
1241 "sbbl %3,%1"
1242 :"=a" (end_low), "=d" (end_high)
1243 :"g" (cal_low), "g" (cal_high),
1244 "0" (end_low), "1" (end_high)
1245 );
1246
1247 /* Make sure that the result fits in 32 bits */
1248 //hprint(11,40,end_high);
1249 if (end_high) {
1250 return(-3);
1251 }
1252 end_low /= 2;
1253
1254 /* Convert to clocks/KB */
1255 end_low /= len;
1256 end_low *= 1024;
1257 end_low /= iter;
1258 if (end_low == 0) {
1259 return(-4);
1260 }
1261
1262 /* Convert to kbytes/sec */
1263
1264 if (tsc_invariable) end_low = correct_tsc(end_low);
1265
1266 return((v->clks_msec)/end_low);
1267}
1268
1269#define rdmsr(msr,val1,val2) \
1270 __asm__ __volatile__("rdmsr" \
1271 : "=a" (val1), "=d" (val2) \
1272 : "c" (msr))
1273
1274
1275ulong correct_tsc(ulong el_org)
1276{
1277 float coef_now, coef_max;
1278 int msr_lo, msr_hi, is_xe;
Martin Roth4dcd13d2016-02-24 13:53:07 -08001279
Martin Roth9b1b3352016-02-24 12:27:06 -08001280 rdmsr(0x198, msr_lo, msr_hi);
Martin Roth4dcd13d2016-02-24 13:53:07 -08001281 is_xe = (msr_lo >> 31) & 0x1;
1282
Martin Roth9b1b3352016-02-24 12:27:06 -08001283 if(is_xe){
1284 rdmsr(0x198, msr_lo, msr_hi);
Martin Roth4dcd13d2016-02-24 13:53:07 -08001285 coef_max = ((msr_hi >> 8) & 0x1F);
Martin Roth9b1b3352016-02-24 12:27:06 -08001286 if ((msr_hi >> 14) & 0x1) { coef_max = coef_max + 0.5f; }
1287 } else {
1288 rdmsr(0x17, msr_lo, msr_hi);
1289 coef_max = ((msr_lo >> 8) & 0x1F);
1290 if ((msr_lo >> 14) & 0x1) { coef_max = coef_max + 0.5f; }
1291 }
Martin Roth4dcd13d2016-02-24 13:53:07 -08001292
Martin Roth9b1b3352016-02-24 12:27:06 -08001293 if(cpu_id.fid.bits.eist) {
1294 rdmsr(0x198, msr_lo, msr_hi);
1295 coef_now = ((msr_lo >> 8) & 0x1F);
1296 if ((msr_lo >> 14) & 0x1) { coef_now = coef_now + 0.5f; }
1297 } else {
1298 rdmsr(0x2A, msr_lo, msr_hi);
1299 coef_now = (msr_lo >> 22) & 0x1F;
1300 }
1301 if(coef_max && coef_now) {
1302 el_org = (ulong)(el_org * coef_now / coef_max);
1303 }
1304 return el_org;
1305}
1306