blob: b1356c21b27412b6d6ea30d97f603eb94bd3b7c7 [file] [log] [blame]
Vladimir Serbinenkoc6f6be02013-11-12 22:32:08 +01001/*
2 * This file is part of the coreboot project.
3 *
4 * Copyright (C) 2013 Vladimir Serbinenko.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21/* Please don't remove this. It's needed it to do debugging
22 and reverse engineering to support in futur more nehalem variants. */
23#ifndef REAL
24#define REAL 1
25#endif
26
27#if REAL
Kyösti Mälkki931c1dc2014-06-30 09:40:19 +030028#include <stdlib.h>
Vladimir Serbinenkoc6f6be02013-11-12 22:32:08 +010029#include <console/console.h>
30#include <string.h>
31#include <arch/hlt.h>
32#include <arch/io.h>
33#include <cpu/x86/msr.h>
34#include <cbmem.h>
35#include <arch/cbfs.h>
36#include <cbfs.h>
37#include <ip_checksum.h>
38#include <pc80/mc146818rtc.h>
39#include <device/pci_def.h>
40#include <arch/cpu.h>
41#include <spd.h>
42#include "raminit.h"
43#include <timestamp.h>
44#include <cpu/x86/mtrr.h>
45#include <cpu/intel/speedstep.h>
46#include <cpu/intel/turbo.h>
47#endif
48
49#if !REAL
50typedef unsigned char u8;
51typedef unsigned short u16;
52typedef unsigned int u32;
53typedef u32 device_t;
54#endif
55
56#include "nehalem.h"
57
58#include "southbridge/intel/ibexpeak/me.h"
59
60#if REAL
61#include <delay.h>
62#endif
63
64#define NORTHBRIDGE PCI_DEV(0, 0, 0)
65#define SOUTHBRIDGE PCI_DEV(0, 0x1f, 0)
66#define GMA PCI_DEV (0, 0x2, 0x0)
67#define HECIDEV PCI_DEV(0, 0x16, 0)
68#define HECIBAR 0x10
69
70#define FOR_ALL_RANKS \
71 for (channel = 0; channel < NUM_CHANNELS; channel++) \
72 for (slot = 0; slot < NUM_SLOTS; slot++) \
73 for (rank = 0; rank < NUM_RANKS; rank++)
74
75#define FOR_POPULATED_RANKS \
76 for (channel = 0; channel < NUM_CHANNELS; channel++) \
77 for (slot = 0; slot < NUM_SLOTS; slot++) \
78 for (rank = 0; rank < NUM_RANKS; rank++) \
79 if (info->populated_ranks[channel][slot][rank])
80
81#define FOR_POPULATED_RANKS_BACKWARDS \
82 for (channel = NUM_CHANNELS - 1; channel >= 0; channel--) \
83 for (slot = 0; slot < NUM_SLOTS; slot++) \
84 for (rank = 0; rank < NUM_RANKS; rank++) \
85 if (info->populated_ranks[channel][slot][rank])
86
87/* [REG_178][CHANNEL][2 * SLOT + RANK][LANE] */
88typedef struct {
89 u8 smallest;
90 u8 largest;
91} timing_bounds_t[2][2][2][9];
92
93struct ram_training {
94 /* [TM][CHANNEL][SLOT][RANK][LANE] */
95 u16 lane_timings[4][2][2][2][9];
96 u16 reg_178;
97 u16 reg_10b;
98
99 u8 reg178_center;
100 u8 reg178_smallest;
101 u8 reg178_largest;
102 timing_bounds_t timing_bounds[2];
103 u16 timing_offset[2][2][2][9];
104 u16 timing2_offset[2][2][2][9];
105 u16 timing2_bounds[2][2][2][9][2];
Vladimir Serbinenkof7a42de2014-01-09 11:10:04 +0100106 u8 reg274265[2][3]; /* [CHANNEL][REGISTER] */
107 u8 reg2ca9_bit0;
108 u32 reg_6dc;
109 u32 reg_6e8;
Vladimir Serbinenkoc6f6be02013-11-12 22:32:08 +0100110};
111
112#if !REAL
113#include "raminit_fake.c"
114#else
115
116#include <lib.h> /* Prototypes */
117
118static inline void write_mchbar32(u32 addr, u32 val)
119{
120 MCHBAR32(addr) = val;
121}
122
123static inline void write_mchbar16(u32 addr, u16 val)
124{
125 MCHBAR16(addr) = val;
126}
127
128static inline void write_mchbar8(u32 addr, u8 val)
129{
130 MCHBAR8(addr) = val;
131}
132
133
134static inline u32 read_mchbar32(u32 addr)
135{
136 return MCHBAR32(addr);
137}
138
139static inline u16 read_mchbar16(u32 addr)
140{
141 return MCHBAR16(addr);
142}
143
144static inline u8 read_mchbar8(u32 addr)
145{
146 return MCHBAR8(addr);
147}
148
149static inline u8 read_mchbar8_bypass(u32 addr)
150{
151 return MCHBAR8(addr);
152}
153
154static void clflush(u32 addr)
155{
156 asm volatile ("clflush (%0)"::"r" (addr));
157}
158
159typedef struct _u128 {
160 u64 lo;
161 u64 hi;
162} u128;
163
164static void read128(u32 addr, u64 * out)
165{
166 u128 ret;
167 u128 stor;
168 asm volatile ("movdqu %%xmm0, %0\n"
169 "movdqa (%2), %%xmm0\n"
170 "movdqu %%xmm0, %1\n"
171 "movdqu %0, %%xmm0":"+m" (stor), "=m"(ret):"r"(addr));
172 out[0] = ret.lo;
173 out[1] = ret.hi;
174}
175
176#endif
177
178/* OK */
179static void write_1d0(u32 val, u16 addr, int bits, int flag)
180{
181 write_mchbar32(0x1d0, 0);
182 while (read_mchbar32(0x1d0) & 0x800000) ;
183 write_mchbar32(0x1d4,
184 (val & ((1 << bits) - 1)) | (2 << bits) | (flag <<
185 bits));
186 write_mchbar32(0x1d0, 0x40000000 | addr);
187 while (read_mchbar32(0x1d0) & 0x800000) ;
188}
189
190/* OK */
191static u16 read_1d0(u16 addr, int split)
192{
193 u32 val;
194 write_mchbar32(0x1d0, 0);
195 while (read_mchbar32(0x1d0) & 0x800000) ;
196 write_mchbar32(0x1d0,
197 0x80000000 | (((read_mchbar8(0x246) >> 2) & 3) +
198 0x361 - addr));
199 while (read_mchbar32(0x1d0) & 0x800000) ;
200 val = read_mchbar32(0x1d8);
201 write_1d0(0, 0x33d, 0, 0);
202 write_1d0(0, 0x33d, 0, 0);
203 val &= ((1 << split) - 1);
204 // printk (BIOS_ERR, "R1D0C [%x] => %x\n", addr, val);
205 return val;
206}
207
208static void sfence(void)
209{
210#if REAL
211 asm volatile ("sfence");
212#endif
213}
214
215static inline u16 get_lane_offset(int slot, int rank, int lane)
216{
217 return 0x124 * lane + ((lane & 4) ? 0x23e : 0) + 11 * rank + 22 * slot -
218 0x452 * (lane == 8);
219}
220
221static inline u16 get_timing_register_addr(int lane, int tm, int slot, int rank)
222{
223 const u16 offs[] = { 0x1d, 0xa8, 0xe6, 0x5c };
224 return get_lane_offset(slot, rank, lane) + offs[(tm + 3) % 4];
225}
226
227#if REAL
228static u32 gav_real(int line, u32 in)
229{
230 // printk (BIOS_DEBUG, "%d: GAV: %x\n", line, in);
231 return in;
232}
233
234#define gav(x) gav_real (__LINE__, (x))
235#endif
236struct raminfo {
237 u16 clock_speed_index; /* clock_speed (REAL, not DDR) / 133.(3) - 3 */
238 u16 fsb_frequency; /* in 1.(1)/2 MHz. */
239 u8 is_x16_module[2][2]; /* [CHANNEL][SLOT] */
240 u8 density[2][2]; /* [CHANNEL][SLOT] */
241 u8 populated_ranks[2][2][2]; /* [CHANNEL][SLOT][RANK] */
242 int rank_start[2][2][2];
243 u8 cas_latency;
244 u8 board_lane_delay[9];
245 u8 use_ecc;
246 u8 revision;
247 u8 max_supported_clock_speed_index;
248 u8 uma_enabled;
249 u8 spd[2][2][151]; /* [CHANNEL][SLOT][BYTE] */
250 u8 silicon_revision;
251 u8 populated_ranks_mask[2];
252 u8 max_slots_used_in_channel;
253 u8 mode4030[2];
254 u16 avg4044[2];
255 u16 max4048[2];
256 unsigned total_memory_mb;
257 unsigned interleaved_part_mb;
258 unsigned non_interleaved_part_mb;
259
260 u32 heci_bar;
261 u64 heci_uma_addr;
262 unsigned memory_reserved_for_heci_mb;
263
264 struct ram_training training;
265 u32 last_500_command[2];
266
Vladimir Serbinenkoc6f6be02013-11-12 22:32:08 +0100267 u32 delay46_ps[2];
268 u32 delay54_ps[2];
269 u8 revision_flag_1;
270 u8 some_delay_1_cycle_floor;
271 u8 some_delay_2_halfcycles_ceil;
272 u8 some_delay_3_ps_rounded;
273
274 const struct ram_training *cached_training;
275};
276
277static void
278write_500(struct raminfo *info, int channel, u32 val, u16 addr, int bits,
279 int flag);
280
281/* OK */
282static u16
283read_500(struct raminfo *info, int channel, u16 addr, int split)
284{
285 u32 val;
286 info->last_500_command[channel] = 0x80000000;
287 write_mchbar32(0x500 + (channel << 10), 0);
288 while (read_mchbar32(0x500 + (channel << 10)) & 0x800000) ;
289 write_mchbar32(0x500 + (channel << 10),
290 0x80000000 |
291 (((read_mchbar8(0x246 + (channel << 10)) >> 2) &
292 3) + 0xb88 - addr));
293 while (read_mchbar32(0x500 + (channel << 10)) & 0x800000) ;
294 val = read_mchbar32(0x508 + (channel << 10));
295 return val & ((1 << split) - 1);
296}
297
298/* OK */
299static void
300write_500(struct raminfo *info, int channel, u32 val, u16 addr, int bits,
301 int flag)
302{
303 if (info->last_500_command[channel] == 0x80000000) {
304 info->last_500_command[channel] = 0x40000000;
305 write_500(info, channel, 0, 0xb61, 0, 0);
306 }
307 write_mchbar32(0x500 + (channel << 10), 0);
308 while (read_mchbar32(0x500 + (channel << 10)) & 0x800000) ;
309 write_mchbar32(0x504 + (channel << 10),
310 (val & ((1 << bits) - 1)) | (2 << bits) | (flag <<
311 bits));
312 write_mchbar32(0x500 + (channel << 10), 0x40000000 | addr);
313 while (read_mchbar32(0x500 + (channel << 10)) & 0x800000) ;
314}
315
316static int rw_test(int rank)
317{
318 const u32 mask = 0xf00fc33c;
319 int ok = 0xff;
320 int i;
321 for (i = 0; i < 64; i++)
322 write32((rank << 28) | (i << 2), 0);
323 sfence();
324 for (i = 0; i < 64; i++)
325 gav(read32((rank << 28) | (i << 2)));
326 sfence();
327 for (i = 0; i < 32; i++) {
328 u32 pat = (((mask >> i) & 1) ? 0xffffffff : 0);
329 write32((rank << 28) | (i << 3), pat);
330 write32((rank << 28) | (i << 3) | 4, pat);
331 }
332 sfence();
333 for (i = 0; i < 32; i++) {
334 u8 pat = (((mask >> i) & 1) ? 0xff : 0);
335 int j;
336 u32 val;
337 gav(val = read32((rank << 28) | (i << 3)));
338 for (j = 0; j < 4; j++)
339 if (((val >> (j * 8)) & 0xff) != pat)
340 ok &= ~(1 << j);
341 gav(val = read32((rank << 28) | (i << 3) | 4));
342 for (j = 0; j < 4; j++)
343 if (((val >> (j * 8)) & 0xff) != pat)
344 ok &= ~(16 << j);
345 }
346 sfence();
347 for (i = 0; i < 64; i++)
348 write32((rank << 28) | (i << 2), 0);
349 sfence();
350 for (i = 0; i < 64; i++)
351 gav(read32((rank << 28) | (i << 2)));
352
353 return ok;
354}
355
356static void
357program_timings(struct raminfo *info, u16 base, int channel, int slot, int rank)
358{
359 int lane;
360 for (lane = 0; lane < 8; lane++) {
361 write_500(info, channel,
362 base +
363 info->training.
364 lane_timings[2][channel][slot][rank][lane],
365 get_timing_register_addr(lane, 2, slot, rank), 9, 0);
366 write_500(info, channel,
367 base +
368 info->training.
369 lane_timings[3][channel][slot][rank][lane],
370 get_timing_register_addr(lane, 3, slot, rank), 9, 0);
371 }
372}
373
374static void write_26c(int channel, u16 si)
375{
376 write_mchbar32(0x26c + (channel << 10), 0x03243f35);
377 write_mchbar32(0x268 + (channel << 10), 0xcfc00000 | (si << 9));
378 write_mchbar16(0x2b9 + (channel << 10), si);
379}
380
381static u32 get_580(int channel, u8 addr)
382{
383 u32 ret;
384 gav(read_1d0(0x142, 3));
385 write_mchbar8(0x5ff, 0x0); /* OK */
386 write_mchbar8(0x5ff, 0x80); /* OK */
387 write_mchbar32(0x580 + (channel << 10), 0x8493c012 | addr);
388 write_mchbar8(0x580 + (channel << 10),
389 read_mchbar8(0x580 + (channel << 10)) | 1);
390 while (!((ret = read_mchbar32(0x580 + (channel << 10))) & 0x10000)) ;
391 write_mchbar8(0x580 + (channel << 10),
392 read_mchbar8(0x580 + (channel << 10)) & ~1);
393 return ret;
394}
395
396const int cached_config = 0;
397
398#define NUM_CHANNELS 2
399#define NUM_SLOTS 2
400#define NUM_RANKS 2
401#define RANK_SHIFT 28
402#define CHANNEL_SHIFT 10
403
404#include "raminit_tables.c"
405
406static void seq9(struct raminfo *info, int channel, int slot, int rank)
407{
408 int i, lane;
409
410 for (i = 0; i < 2; i++)
411 for (lane = 0; lane < 8; lane++)
412 write_500(info, channel,
413 info->training.lane_timings[i +
414 1][channel][slot]
415 [rank][lane], get_timing_register_addr(lane,
416 i + 1,
417 slot,
418 rank),
419 9, 0);
420
421 write_1d0(1, 0x103, 6, 1);
422 for (lane = 0; lane < 8; lane++)
423 write_500(info, channel,
424 info->training.
425 lane_timings[0][channel][slot][rank][lane],
426 get_timing_register_addr(lane, 0, slot, rank), 9, 0);
427
428 for (i = 0; i < 2; i++) {
429 for (lane = 0; lane < 8; lane++)
430 write_500(info, channel,
431 info->training.lane_timings[i +
432 1][channel][slot]
433 [rank][lane], get_timing_register_addr(lane,
434 i + 1,
435 slot,
436 rank),
437 9, 0);
438 gav(get_580(channel, ((i + 1) << 2) | (rank << 5)));
439 }
440
441 gav(read_1d0(0x142, 3)); // = 0x10408118
442 write_mchbar8(0x5ff, 0x0); /* OK */
443 write_mchbar8(0x5ff, 0x80); /* OK */
444 write_1d0(0x2, 0x142, 3, 1);
445 for (lane = 0; lane < 8; lane++) {
446 // printk (BIOS_ERR, "before: %x\n", info->training.lane_timings[2][channel][slot][rank][lane]);
447 info->training.lane_timings[2][channel][slot][rank][lane] =
448 read_500(info, channel,
449 get_timing_register_addr(lane, 2, slot, rank), 9);
450 //printk (BIOS_ERR, "after: %x\n", info->training.lane_timings[2][channel][slot][rank][lane]);
451 info->training.lane_timings[3][channel][slot][rank][lane] =
452 info->training.lane_timings[2][channel][slot][rank][lane] +
453 0x20;
454 }
455}
456
457static int count_ranks_in_channel(struct raminfo *info, int channel)
458{
459 int slot, rank;
460 int res = 0;
461 for (slot = 0; slot < NUM_SLOTS; slot++)
462 for (rank = 0; rank < NUM_SLOTS; rank++)
463 res += info->populated_ranks[channel][slot][rank];
464 return res;
465}
466
467static void
468config_rank(struct raminfo *info, int s3resume, int channel, int slot, int rank)
469{
470 int add;
471
472 write_1d0(0, 0x178, 7, 1);
473 seq9(info, channel, slot, rank);
474 program_timings(info, 0x80, channel, slot, rank);
475
476 if (channel == 0)
477 add = count_ranks_in_channel(info, 1);
478 else
479 add = 0;
480 if (!s3resume)
481 gav(rw_test(rank + add));
482 program_timings(info, 0x00, channel, slot, rank);
483 if (!s3resume)
484 gav(rw_test(rank + add));
485 if (!s3resume)
486 gav(rw_test(rank + add));
487 write_1d0(0, 0x142, 3, 1);
488 write_1d0(0, 0x103, 6, 1);
489
490 gav(get_580(channel, 0xc | (rank << 5)));
491 gav(read_1d0(0x142, 3));
492
493 write_mchbar8(0x5ff, 0x0); /* OK */
494 write_mchbar8(0x5ff, 0x80); /* OK */
495}
496
497static void set_4cf(struct raminfo *info, int channel, u8 val)
498{
499 gav(read_500(info, channel, 0x4cf, 4)); // = 0xc2300cf9
500 write_500(info, channel, val, 0x4cf, 4, 1);
501 gav(read_500(info, channel, 0x659, 4)); // = 0x80300839
502 write_500(info, channel, val, 0x659, 4, 1);
503 gav(read_500(info, channel, 0x697, 4)); // = 0x80300839
504 write_500(info, channel, val, 0x697, 4, 1);
505}
506
507static void set_334(int zero)
508{
509 int j, k, channel;
510 const u32 val3[] = { 0x2a2b2a2b, 0x26272627, 0x2e2f2e2f, 0x2a2b };
511 u32 vd8[2][16];
512
513 for (channel = 0; channel < NUM_CHANNELS; channel++) {
514 for (j = 0; j < 4; j++) {
515 u32 a = (j == 1) ? 0x29292929 : 0x31313131;
516 u32 lmask = (j == 3) ? 0xffff : 0xffffffff;
517 u16 c;
518 if ((j == 0 || j == 3) && zero)
519 c = 0;
520 else if (j == 3)
521 c = 0x5f;
522 else
523 c = 0x5f5f;
524
525 for (k = 0; k < 2; k++) {
526 write_mchbar32(0x138 + 8 * k,
527 (channel << 26) | (j << 24));
528 gav(vd8[1][(channel << 3) | (j << 1) | k] =
529 read_mchbar32(0x138 + 8 * k));
530 gav(vd8[0][(channel << 3) | (j << 1) | k] =
531 read_mchbar32(0x13c + 8 * k));
532 }
533
534 write_mchbar32(0x334 + (channel << 10) + (j * 0x44),
535 zero ? 0 : val3[j]);
536 write_mchbar32(0x32c + (channel << 10) + (j * 0x44),
537 zero ? 0 : (0x18191819 & lmask));
538 write_mchbar16(0x34a + (channel << 10) + (j * 0x44), c);
539 write_mchbar32(0x33c + (channel << 10) + (j * 0x44),
540 zero ? 0 : (a & lmask));
541 write_mchbar32(0x344 + (channel << 10) + (j * 0x44),
542 zero ? 0 : (a & lmask));
543 }
544 }
545
546 write_mchbar32(0x130, read_mchbar32(0x130) | 1); /* OK */
547 while (read_mchbar8(0x130) & 1) ; /* OK */
548}
549
550static void rmw_1d0(u16 addr, u32 and, u32 or, int split, int flag)
551{
552 u32 v;
553 v = read_1d0(addr, split);
554 write_1d0((v & and) | or, addr, split, flag);
555}
556
557static int find_highest_bit_set(u16 val)
558{
559 int i;
560 for (i = 15; i >= 0; i--)
561 if (val & (1 << i))
562 return i;
563 return -1;
564}
565
566static int find_lowest_bit_set32(u32 val)
567{
568 int i;
569 for (i = 0; i < 32; i++)
570 if (val & (1 << i))
571 return i;
572 return -1;
573}
574
Vladimir Serbinenkoc6f6be02013-11-12 22:32:08 +0100575enum {
576 DEVICE_TYPE = 2,
577 MODULE_TYPE = 3,
578 DENSITY = 4,
579 RANKS_AND_DQ = 7,
580 MEMORY_BUS_WIDTH = 8,
581 TIMEBASE_DIVIDEND = 10,
582 TIMEBASE_DIVISOR = 11,
583 CYCLETIME = 12,
584
585 CAS_LATENCIES_LSB = 14,
586 CAS_LATENCIES_MSB = 15,
587 CAS_LATENCY_TIME = 16,
588 THERMAL_AND_REFRESH = 31,
589 REFERENCE_RAW_CARD_USED = 62,
590 RANK1_ADDRESS_MAPPING = 63
591};
592
593static void calculate_timings(struct raminfo *info)
594{
595 unsigned cycletime;
596 unsigned cas_latency_time;
597 unsigned supported_cas_latencies;
598 unsigned channel, slot;
599 unsigned clock_speed_index;
600 unsigned min_cas_latency;
601 unsigned cas_latency;
602 unsigned max_clock_index;
603
604 /* Find common CAS latency */
605 supported_cas_latencies = 0x3fe;
606 for (channel = 0; channel < NUM_CHANNELS; channel++)
607 for (slot = 0; slot < NUM_SLOTS; slot++)
608 if (info->populated_ranks[channel][slot][0])
609 supported_cas_latencies &=
610 2 *
611 (info->
612 spd[channel][slot][CAS_LATENCIES_LSB] |
613 (info->
614 spd[channel][slot][CAS_LATENCIES_MSB] <<
615 8));
616
617 max_clock_index = min(3, info->max_supported_clock_speed_index);
618
619 cycletime = min_cycletime[max_clock_index];
620 cas_latency_time = min_cas_latency_time[max_clock_index];
621
622 for (channel = 0; channel < NUM_CHANNELS; channel++)
623 for (slot = 0; slot < NUM_SLOTS; slot++)
624 if (info->populated_ranks[channel][slot][0]) {
625 unsigned timebase;
626 timebase =
627 1000 *
628 info->
629 spd[channel][slot][TIMEBASE_DIVIDEND] /
630 info->spd[channel][slot][TIMEBASE_DIVISOR];
631 cycletime =
632 max(cycletime,
633 timebase *
634 info->spd[channel][slot][CYCLETIME]);
635 cas_latency_time =
636 max(cas_latency_time,
637 timebase *
638 info->
639 spd[channel][slot][CAS_LATENCY_TIME]);
640 }
641 for (clock_speed_index = 0; clock_speed_index < 3; clock_speed_index++) {
642 if (cycletime == min_cycletime[clock_speed_index])
643 break;
644 if (cycletime > min_cycletime[clock_speed_index]) {
645 clock_speed_index--;
646 cycletime = min_cycletime[clock_speed_index];
647 break;
648 }
649 }
Edward O'Callaghan7116ac82014-07-08 01:53:24 +1000650 min_cas_latency = CEIL_DIV(cas_latency_time, cycletime);
Vladimir Serbinenkoc6f6be02013-11-12 22:32:08 +0100651 cas_latency = 0;
652 while (supported_cas_latencies) {
653 cas_latency = find_highest_bit_set(supported_cas_latencies) + 3;
654 if (cas_latency <= min_cas_latency)
655 break;
656 supported_cas_latencies &=
657 ~(1 << find_highest_bit_set(supported_cas_latencies));
658 }
659
660 if (cas_latency != min_cas_latency && clock_speed_index)
661 clock_speed_index--;
662
663 if (cas_latency * min_cycletime[clock_speed_index] > 20000)
664 die("Couldn't configure DRAM");
665 info->clock_speed_index = clock_speed_index;
666 info->cas_latency = cas_latency;
667}
668
669static void program_base_timings(struct raminfo *info)
670{
671 unsigned channel;
672 unsigned slot, rank, lane;
673 unsigned extended_silicon_revision;
674 int i;
675
676 extended_silicon_revision = info->silicon_revision;
677 if (info->silicon_revision == 0)
678 for (channel = 0; channel < NUM_CHANNELS; channel++)
679 for (slot = 0; slot < NUM_SLOTS; slot++)
680 if ((info->
681 spd[channel][slot][MODULE_TYPE] & 0xF) ==
682 3)
683 extended_silicon_revision = 4;
684
685 for (channel = 0; channel < NUM_CHANNELS; channel++) {
686 for (slot = 0; slot < NUM_SLOTS; slot++)
687 for (rank = 0; rank < NUM_SLOTS; rank++) {
688 int card_timing_2;
689 if (!info->populated_ranks[channel][slot][rank])
690 continue;
691
692 for (lane = 0; lane < 9; lane++) {
693 int tm_reg;
694 int card_timing;
695
696 card_timing = 0;
697 if ((info->
698 spd[channel][slot][MODULE_TYPE] &
699 0xF) == 3) {
700 int reference_card;
701 reference_card =
702 info->
703 spd[channel][slot]
704 [REFERENCE_RAW_CARD_USED] &
705 0x1f;
706 if (reference_card == 3)
707 card_timing =
708 u16_ffd1188[0][lane]
709 [info->
710 clock_speed_index];
711 if (reference_card == 5)
712 card_timing =
713 u16_ffd1188[1][lane]
714 [info->
715 clock_speed_index];
716 }
717
718 info->training.
719 lane_timings[0][channel][slot][rank]
720 [lane] =
721 u8_FFFD1218[info->
722 clock_speed_index];
723 info->training.
724 lane_timings[1][channel][slot][rank]
725 [lane] = 256;
726
727 for (tm_reg = 2; tm_reg < 4; tm_reg++)
728 info->training.
729 lane_timings[tm_reg]
730 [channel][slot][rank][lane]
731 =
732 u8_FFFD1240[channel]
733 [extended_silicon_revision]
734 [lane][2 * slot +
735 rank][info->
736 clock_speed_index]
737 + info->max4048[channel]
738 +
739 u8_FFFD0C78[channel]
740 [extended_silicon_revision]
741 [info->
742 mode4030[channel]][slot]
743 [rank][info->
744 clock_speed_index]
745 + card_timing;
746 for (tm_reg = 0; tm_reg < 4; tm_reg++)
747 write_500(info, channel,
748 info->training.
749 lane_timings[tm_reg]
750 [channel][slot][rank]
751 [lane],
752 get_timing_register_addr
753 (lane, tm_reg, slot,
754 rank), 9, 0);
755 }
756
757 card_timing_2 = 0;
758 if (!(extended_silicon_revision != 4
759 || (info->
760 populated_ranks_mask[channel] & 5) ==
761 5)) {
762 if ((info->
763 spd[channel][slot]
764 [REFERENCE_RAW_CARD_USED] & 0x1F)
765 == 3)
766 card_timing_2 =
767 u16_FFFE0EB8[0][info->
768 clock_speed_index];
769 if ((info->
770 spd[channel][slot]
771 [REFERENCE_RAW_CARD_USED] & 0x1F)
772 == 5)
773 card_timing_2 =
774 u16_FFFE0EB8[1][info->
775 clock_speed_index];
776 }
777
778 for (i = 0; i < 3; i++)
779 write_500(info, channel,
780 (card_timing_2 +
781 info->max4048[channel]
782 +
783 u8_FFFD0EF8[channel]
784 [extended_silicon_revision]
785 [info->
786 mode4030[channel]][info->
787 clock_speed_index]),
788 u16_fffd0c50[i][slot][rank],
789 8, 1);
790 write_500(info, channel,
791 (info->max4048[channel] +
792 u8_FFFD0C78[channel]
793 [extended_silicon_revision][info->
794 mode4030
795 [channel]]
796 [slot][rank][info->
797 clock_speed_index]),
798 u16_fffd0c70[slot][rank], 7, 1);
799 }
800 if (!info->populated_ranks_mask[channel])
801 continue;
802 for (i = 0; i < 3; i++)
803 write_500(info, channel,
804 (info->max4048[channel] +
805 info->avg4044[channel]
806 +
807 u8_FFFD17E0[channel]
808 [extended_silicon_revision][info->
809 mode4030
810 [channel]][info->
811 clock_speed_index]),
812 u16_fffd0c68[i], 8, 1);
813 }
814}
815
816static unsigned int fsbcycle_ps(struct raminfo *info)
817{
818 return 900000 / info->fsb_frequency;
819}
820
821/* The time of DDR transfer in ps. */
822static unsigned int halfcycle_ps(struct raminfo *info)
823{
824 return 3750 / (info->clock_speed_index + 3);
825}
826
827/* The time of clock cycle in ps. */
828static unsigned int cycle_ps(struct raminfo *info)
829{
830 return 2 * halfcycle_ps(info);
831}
832
833/* Frequency in 1.(1)=10/9 MHz units. */
834static unsigned frequency_11(struct raminfo *info)
835{
836 return (info->clock_speed_index + 3) * 120;
837}
838
839/* Frequency in 0.1 MHz units. */
840static unsigned frequency_01(struct raminfo *info)
841{
842 return 100 * frequency_11(info) / 9;
843}
844
845static unsigned ps_to_halfcycles(struct raminfo *info, unsigned int ps)
846{
847 return (frequency_11(info) * 2) * ps / 900000;
848}
849
850static unsigned ns_to_cycles(struct raminfo *info, unsigned int ns)
851{
852 return (frequency_11(info)) * ns / 900;
853}
854
855static void compute_derived_timings(struct raminfo *info)
856{
857 unsigned channel, slot, rank;
858 int extended_silicon_revision;
859 int some_delay_1_ps;
860 int some_delay_2_ps;
861 int some_delay_2_halfcycles_ceil;
862 int some_delay_2_halfcycles_floor;
863 int some_delay_3_ps;
864 int some_delay_3_halfcycles;
865 int some_delay_3_ps_rounded;
866 int some_delay_1_cycle_ceil;
867 int some_delay_1_cycle_floor;
868
869 some_delay_3_halfcycles = 0;
870 some_delay_3_ps_rounded = 0;
871 extended_silicon_revision = info->silicon_revision;
872 if (!info->silicon_revision)
873 for (channel = 0; channel < NUM_CHANNELS; channel++)
874 for (slot = 0; slot < NUM_SLOTS; slot++)
875 if ((info->
876 spd[channel][slot][MODULE_TYPE] & 0xF) ==
877 3)
878 extended_silicon_revision = 4;
879 if (info->board_lane_delay[7] < 5)
880 info->board_lane_delay[7] = 5;
881 info->revision_flag_1 = 2;
882 if (info->silicon_revision == 2 || info->silicon_revision == 3)
883 info->revision_flag_1 = 0;
884 if (info->revision < 16)
885 info->revision_flag_1 = 0;
886
887 if (info->revision < 8)
888 info->revision_flag_1 = 0;
889 if (info->revision >= 8 && (info->silicon_revision == 0
890 || info->silicon_revision == 1))
891 some_delay_2_ps = 735;
892 else
893 some_delay_2_ps = 750;
894
895 if (info->revision >= 0x10 && (info->silicon_revision == 0
896 || info->silicon_revision == 1))
897 some_delay_1_ps = 3929;
898 else
899 some_delay_1_ps = 3490;
900
901 some_delay_1_cycle_floor = some_delay_1_ps / cycle_ps(info);
902 some_delay_1_cycle_ceil = some_delay_1_ps / cycle_ps(info);
903 if (some_delay_1_ps % cycle_ps(info))
904 some_delay_1_cycle_ceil++;
905 else
906 some_delay_1_cycle_floor--;
907 info->some_delay_1_cycle_floor = some_delay_1_cycle_floor;
908 if (info->revision_flag_1)
909 some_delay_2_ps = halfcycle_ps(info) >> 6;
910 some_delay_2_ps +=
911 max(some_delay_1_ps - 30,
912 2 * halfcycle_ps(info) * (some_delay_1_cycle_ceil - 1) + 1000) +
913 375;
914 some_delay_3_ps =
915 halfcycle_ps(info) - some_delay_2_ps % halfcycle_ps(info);
916 if (info->revision_flag_1) {
917 if (some_delay_3_ps < 150)
918 some_delay_3_halfcycles = 0;
919 else
920 some_delay_3_halfcycles =
921 (some_delay_3_ps << 6) / halfcycle_ps(info);
922 some_delay_3_ps_rounded =
923 halfcycle_ps(info) * some_delay_3_halfcycles >> 6;
924 }
925 some_delay_2_halfcycles_ceil =
926 (some_delay_2_ps + halfcycle_ps(info) - 1) / halfcycle_ps(info) -
927 2 * (some_delay_1_cycle_ceil - 1);
928 if (info->revision_flag_1 && some_delay_3_ps < 150)
929 some_delay_2_halfcycles_ceil++;
930 some_delay_2_halfcycles_floor = some_delay_2_halfcycles_ceil;
931 if (info->revision < 0x10)
932 some_delay_2_halfcycles_floor =
933 some_delay_2_halfcycles_ceil - 1;
934 if (!info->revision_flag_1)
935 some_delay_2_halfcycles_floor++;
936 info->some_delay_2_halfcycles_ceil = some_delay_2_halfcycles_ceil;
937 info->some_delay_3_ps_rounded = some_delay_3_ps_rounded;
938 if ((info->populated_ranks[0][0][0] && info->populated_ranks[0][1][0])
939 || (info->populated_ranks[1][0][0]
940 && info->populated_ranks[1][1][0]))
941 info->max_slots_used_in_channel = 2;
942 else
943 info->max_slots_used_in_channel = 1;
944 for (channel = 0; channel < 2; channel++)
945 write_mchbar32(0x244 + (channel << 10),
946 ((info->revision < 8) ? 1 : 0x200)
947 | ((2 - info->max_slots_used_in_channel) << 17) |
948 (channel << 21) | (info->
949 some_delay_1_cycle_floor <<
950 18) | 0x9510);
951 if (info->max_slots_used_in_channel == 1) {
952 info->mode4030[0] = (count_ranks_in_channel(info, 0) == 2);
953 info->mode4030[1] = (count_ranks_in_channel(info, 1) == 2);
954 } else {
955 info->mode4030[0] = ((count_ranks_in_channel(info, 0) == 1) || (count_ranks_in_channel(info, 0) == 2)) ? 2 : 3; /* 2 if 1 or 2 ranks */
956 info->mode4030[1] = ((count_ranks_in_channel(info, 1) == 1)
957 || (count_ranks_in_channel(info, 1) ==
958 2)) ? 2 : 3;
959 }
960 for (channel = 0; channel < NUM_CHANNELS; channel++) {
961 int max_of_unk;
962 int min_of_unk_2;
963
964 int i, count;
965 int sum;
966
967 if (!info->populated_ranks_mask[channel])
968 continue;
969
970 max_of_unk = 0;
971 min_of_unk_2 = 32767;
972
973 sum = 0;
974 count = 0;
975 for (i = 0; i < 3; i++) {
976 int unk1;
977 if (info->revision < 8)
978 unk1 =
979 u8_FFFD1891[0][channel][info->
980 clock_speed_index]
981 [i];
982 else if (!
983 (info->revision >= 0x10
984 || info->revision_flag_1))
985 unk1 =
986 u8_FFFD1891[1][channel][info->
987 clock_speed_index]
988 [i];
989 else
990 unk1 = 0;
991 for (slot = 0; slot < NUM_SLOTS; slot++)
992 for (rank = 0; rank < NUM_RANKS; rank++) {
993 int a = 0;
994 int b = 0;
995
996 if (!info->
997 populated_ranks[channel][slot]
998 [rank])
999 continue;
1000 if (extended_silicon_revision == 4
1001 && (info->
1002 populated_ranks_mask[channel] &
1003 5) != 5) {
1004 if ((info->
1005 spd[channel][slot]
1006 [REFERENCE_RAW_CARD_USED] &
1007 0x1F) == 3) {
1008 a = u16_ffd1178[0]
1009 [info->
1010 clock_speed_index];
1011 b = u16_fe0eb8[0][info->
1012 clock_speed_index];
1013 } else
1014 if ((info->
1015 spd[channel][slot]
1016 [REFERENCE_RAW_CARD_USED]
1017 & 0x1F) == 5) {
1018 a = u16_ffd1178[1]
1019 [info->
1020 clock_speed_index];
1021 b = u16_fe0eb8[1][info->
1022 clock_speed_index];
1023 }
1024 }
1025 min_of_unk_2 = min(min_of_unk_2, a);
1026 min_of_unk_2 = min(min_of_unk_2, b);
1027 if (rank == 0) {
1028 sum += a;
1029 count++;
1030 }
1031 {
1032 int t;
1033 t = b +
1034 u8_FFFD0EF8[channel]
1035 [extended_silicon_revision]
1036 [info->
1037 mode4030[channel]][info->
1038 clock_speed_index];
1039 if (unk1 >= t)
1040 max_of_unk =
1041 max(max_of_unk,
1042 unk1 - t);
1043 }
1044 }
1045 {
1046 int t =
1047 u8_FFFD17E0[channel]
1048 [extended_silicon_revision][info->
1049 mode4030
1050 [channel]]
1051 [info->clock_speed_index] + min_of_unk_2;
1052 if (unk1 >= t)
1053 max_of_unk = max(max_of_unk, unk1 - t);
1054 }
1055 }
1056
1057 info->avg4044[channel] = sum / count;
1058 info->max4048[channel] = max_of_unk;
1059 }
1060}
1061
1062static void jedec_read(struct raminfo *info,
1063 int channel, int slot, int rank,
1064 int total_rank, u8 addr3, unsigned int value)
1065{
1066 /* Handle mirrored mapping. */
1067 if ((rank & 1) && (info->spd[channel][slot][RANK1_ADDRESS_MAPPING] & 1))
1068 addr3 =
1069 (addr3 & 0xCF) | ((addr3 & 0x10) << 1) | ((addr3 >> 1) &
1070 0x10);
1071 write_mchbar8(0x271, addr3 | (read_mchbar8(0x271) & 0xC1));
1072 write_mchbar8(0x671, addr3 | (read_mchbar8(0x671) & 0xC1));
1073
1074 /* Handle mirrored mapping. */
1075 if ((rank & 1) && (info->spd[channel][slot][RANK1_ADDRESS_MAPPING] & 1))
1076 value =
1077 (value & ~0x1f8) | ((value >> 1) & 0xa8) | ((value & 0xa8)
1078 << 1);
1079
1080 read32((value << 3) | (total_rank << 28));
1081
1082 write_mchbar8(0x271, (read_mchbar8(0x271) & 0xC3) | 2);
1083 write_mchbar8(0x671, (read_mchbar8(0x671) & 0xC3) | 2);
1084
1085 read32(total_rank << 28);
1086}
1087
1088enum {
1089 MR1_RZQ12 = 512,
1090 MR1_RZQ2 = 64,
1091 MR1_RZQ4 = 4,
1092 MR1_ODS34OHM = 2
1093};
1094
1095enum {
1096 MR0_BT_INTERLEAVED = 8,
1097 MR0_DLL_RESET_ON = 256
1098};
1099
1100enum {
1101 MR2_RTT_WR_DISABLED = 0,
1102 MR2_RZQ2 = 1 << 10
1103};
1104
1105static void jedec_init(struct raminfo *info)
1106{
1107 int write_recovery;
1108 int channel, slot, rank;
1109 int total_rank;
1110 int dll_on;
1111 int self_refresh_temperature;
1112 int auto_self_refresh;
1113
1114 auto_self_refresh = 1;
1115 self_refresh_temperature = 1;
1116 if (info->board_lane_delay[3] <= 10) {
1117 if (info->board_lane_delay[3] <= 8)
1118 write_recovery = info->board_lane_delay[3] - 4;
1119 else
1120 write_recovery = 5;
1121 } else {
1122 write_recovery = 6;
1123 }
1124 FOR_POPULATED_RANKS {
1125 auto_self_refresh &=
1126 (info->spd[channel][slot][THERMAL_AND_REFRESH] >> 2) & 1;
1127 self_refresh_temperature &=
1128 info->spd[channel][slot][THERMAL_AND_REFRESH] & 1;
1129 }
1130 if (auto_self_refresh == 1)
1131 self_refresh_temperature = 0;
1132
1133 dll_on = ((info->silicon_revision != 2 && info->silicon_revision != 3)
1134 || (info->populated_ranks[0][0][0]
1135 && info->populated_ranks[0][1][0])
1136 || (info->populated_ranks[1][0][0]
1137 && info->populated_ranks[1][1][0]));
1138
1139 total_rank = 0;
1140
1141 for (channel = NUM_CHANNELS - 1; channel >= 0; channel--) {
1142 int rtt, rtt_wr = MR2_RTT_WR_DISABLED;
1143 int rzq_reg58e;
1144
1145 if (info->silicon_revision == 2 || info->silicon_revision == 3) {
1146 rzq_reg58e = 64;
1147 rtt = MR1_RZQ2;
1148 if (info->clock_speed_index != 0) {
1149 rzq_reg58e = 4;
1150 if (info->populated_ranks_mask[channel] == 3)
1151 rtt = MR1_RZQ4;
1152 }
1153 } else {
1154 if ((info->populated_ranks_mask[channel] & 5) == 5) {
1155 rtt = MR1_RZQ12;
1156 rzq_reg58e = 64;
1157 rtt_wr = MR2_RZQ2;
1158 } else {
1159 rzq_reg58e = 4;
1160 rtt = MR1_RZQ4;
1161 }
1162 }
1163
1164 write_mchbar16(0x588 + (channel << 10), 0x0);
1165 write_mchbar16(0x58a + (channel << 10), 0x4);
1166 write_mchbar16(0x58c + (channel << 10), rtt | MR1_ODS34OHM);
1167 write_mchbar16(0x58e + (channel << 10), rzq_reg58e | 0x82);
1168 write_mchbar16(0x590 + (channel << 10), 0x1282);
1169
1170 for (slot = 0; slot < NUM_SLOTS; slot++)
1171 for (rank = 0; rank < NUM_RANKS; rank++)
1172 if (info->populated_ranks[channel][slot][rank]) {
1173 jedec_read(info, channel, slot, rank,
1174 total_rank, 0x28,
1175 rtt_wr | (info->
1176 clock_speed_index
1177 << 3)
1178 | (auto_self_refresh << 6) |
1179 (self_refresh_temperature <<
1180 7));
1181 jedec_read(info, channel, slot, rank,
1182 total_rank, 0x38, 0);
1183 jedec_read(info, channel, slot, rank,
1184 total_rank, 0x18,
1185 rtt | MR1_ODS34OHM);
1186 jedec_read(info, channel, slot, rank,
1187 total_rank, 6,
1188 (dll_on << 12) |
1189 (write_recovery << 9)
1190 | ((info->cas_latency - 4) <<
1191 4) | MR0_BT_INTERLEAVED |
1192 MR0_DLL_RESET_ON);
1193 total_rank++;
1194 }
1195 }
1196}
1197
1198static void program_modules_memory_map(struct raminfo *info, int pre_jedec)
1199{
1200 unsigned channel, slot, rank;
1201 unsigned int total_mb[2] = { 0, 0 }; /* total memory per channel in MB */
1202 unsigned int channel_0_non_interleaved;
1203
1204 FOR_ALL_RANKS {
1205 if (info->populated_ranks[channel][slot][rank]) {
1206 total_mb[channel] +=
1207 pre_jedec ? 256 : (256 << info->
1208 density[channel][slot] >> info->
1209 is_x16_module[channel][slot]);
1210 write_mchbar8(0x208 + rank + 2 * slot + (channel << 10),
1211 (pre_jedec ? (1 | ((1 + 1) << 1))
1212 : (info->
1213 is_x16_module[channel][slot] |
1214 ((info->density[channel][slot] +
1215 1) << 1))) | 0x80);
1216 }
1217 write_mchbar16(0x200 + (channel << 10) + 4 * slot + 2 * rank,
1218 total_mb[channel] >> 6);
1219 }
1220
1221 info->total_memory_mb = total_mb[0] + total_mb[1];
1222
1223 info->interleaved_part_mb =
1224 pre_jedec ? 0 : 2 * min(total_mb[0], total_mb[1]);
1225 info->non_interleaved_part_mb =
1226 total_mb[0] + total_mb[1] - info->interleaved_part_mb;
1227 channel_0_non_interleaved = total_mb[0] - info->interleaved_part_mb / 2;
1228 write_mchbar32(0x100,
1229 channel_0_non_interleaved | (info->
1230 non_interleaved_part_mb <<
1231 16));
1232 if (!pre_jedec)
1233 write_mchbar16(0x104, info->interleaved_part_mb);
1234}
1235
1236static void program_board_delay(struct raminfo *info)
1237{
1238 int cas_latency_shift;
1239 int some_delay_ns;
1240 int some_delay_3_half_cycles;
1241
1242 unsigned channel, i;
1243 int high_multiplier;
1244 int lane_3_delay;
1245 int cas_latency_derived;
1246
1247 high_multiplier = 0;
1248 some_delay_ns = 200;
1249 some_delay_3_half_cycles = 4;
1250 cas_latency_shift = info->silicon_revision == 0
1251 || info->silicon_revision == 1 ? 1 : 0;
1252 if (info->revision < 8) {
1253 some_delay_ns = 600;
1254 cas_latency_shift = 0;
1255 }
1256 {
1257 int speed_bit;
1258 speed_bit =
1259 ((info->clock_speed_index > 1
1260 || (info->silicon_revision != 2
1261 && info->silicon_revision != 3))) ^ (info->revision >=
1262 0x10);
1263 write_500(info, 0, speed_bit | ((!info->use_ecc) << 1), 0x60e,
1264 3, 1);
1265 write_500(info, 1, speed_bit | ((!info->use_ecc) << 1), 0x60e,
1266 3, 1);
1267 if (info->revision >= 0x10 && info->clock_speed_index <= 1
1268 && (info->silicon_revision == 2
1269 || info->silicon_revision == 3))
1270 rmw_1d0(0x116, 5, 2, 4, 1);
1271 }
1272 write_mchbar32(0x120,
1273 (1 << (info->max_slots_used_in_channel + 28)) |
1274 0x188e7f9f);
1275
1276 write_mchbar8(0x124,
1277 info->board_lane_delay[4] +
1278 ((frequency_01(info) + 999) / 1000));
1279 write_mchbar16(0x125, 0x1360);
1280 write_mchbar8(0x127, 0x40);
1281 if (info->fsb_frequency < frequency_11(info) / 2) {
1282 unsigned some_delay_2_half_cycles;
1283 high_multiplier = 1;
1284 some_delay_2_half_cycles = ps_to_halfcycles(info,
1285 ((3 *
1286 fsbcycle_ps(info))
1287 >> 1) +
1288 (halfcycle_ps(info)
1289 *
1290 reg178_min[info->
1291 clock_speed_index]
1292 >> 6)
1293 +
1294 4 *
1295 halfcycle_ps(info)
1296 + 2230);
1297 some_delay_3_half_cycles =
1298 min((some_delay_2_half_cycles +
1299 (frequency_11(info) * 2) * (28 -
1300 some_delay_2_half_cycles) /
1301 (frequency_11(info) * 2 -
1302 4 * (info->fsb_frequency))) >> 3, 7);
1303 }
1304 if (read_mchbar8(0x2ca9) & 1)
1305 some_delay_3_half_cycles = 3;
1306 for (channel = 0; channel < NUM_CHANNELS; channel++) {
1307 write_mchbar32(0x220 + (channel << 10),
1308 read_mchbar32(0x220 +
1309 (channel << 10)) | 0x18001117);
1310 write_mchbar32(0x224 + (channel << 10),
1311 (info->max_slots_used_in_channel - 1)
1312 |
1313 ((info->cas_latency - 5 -
1314 info->clock_speed_index) << 21)
1315 |
1316 ((info->max_slots_used_in_channel +
1317 info->cas_latency - cas_latency_shift -
1318 4) << 16)
1319 | ((info->cas_latency - cas_latency_shift - 4) <<
1320 26)
1321 |
1322 ((info->cas_latency - info->clock_speed_index +
1323 info->max_slots_used_in_channel - 6) << 8));
1324 write_mchbar32(0x228 + (channel << 10),
1325 info->max_slots_used_in_channel);
1326 write_mchbar8(0x239 + (channel << 10), 32);
1327 write_mchbar32(0x248 + (channel << 10),
1328 (high_multiplier << 24) |
1329 (some_delay_3_half_cycles << 25) | 0x840000);
1330 write_mchbar32(0x278 + (channel << 10), 0xc362042);
1331 write_mchbar32(0x27c + (channel << 10), 0x8b000062);
1332 write_mchbar32(0x24c + (channel << 10),
1333 ((! !info->
1334 clock_speed_index) << 17) | (((2 +
1335 info->
1336 clock_speed_index
1337 -
1338 (! !info->
1339 clock_speed_index)))
1340 << 12) | 0x10200);
1341
1342 write_mchbar8(0x267 + (channel << 10), 0x4);
1343 write_mchbar16(0x272 + (channel << 10), 0x155);
1344 write_mchbar32(0x2bc + (channel << 10),
1345 (read_mchbar32(0x2bc + (channel << 10)) &
1346 0xFF000000)
1347 | 0x707070);
1348
1349 write_500(info, channel,
1350 ((!info->populated_ranks[channel][1][1])
1351 | (!info->populated_ranks[channel][1][0] << 1)
1352 | (!info->populated_ranks[channel][0][1] << 2)
1353 | (!info->populated_ranks[channel][0][0] << 3)),
1354 0x4c9, 4, 1);
1355 }
1356
1357 write_mchbar8(0x2c4, ((1 + (info->clock_speed_index != 0)) << 6) | 0xC);
1358 {
1359 u8 freq_divisor = 2;
1360 if (info->fsb_frequency == frequency_11(info))
1361 freq_divisor = 3;
1362 else if (2 * info->fsb_frequency < 3 * (frequency_11(info) / 2))
1363 freq_divisor = 1;
1364 else
1365 freq_divisor = 2;
1366 write_mchbar32(0x2c0, (freq_divisor << 11) | 0x6009c400);
1367 }
1368
1369 if (info->board_lane_delay[3] <= 10) {
1370 if (info->board_lane_delay[3] <= 8)
1371 lane_3_delay = info->board_lane_delay[3];
1372 else
1373 lane_3_delay = 10;
1374 } else {
1375 lane_3_delay = 12;
1376 }
1377 cas_latency_derived = info->cas_latency - info->clock_speed_index + 2;
1378 if (info->clock_speed_index > 1)
1379 cas_latency_derived++;
1380 for (channel = 0; channel < NUM_CHANNELS; channel++) {
1381 write_mchbar32(0x240 + (channel << 10),
1382 ((info->clock_speed_index ==
1383 0) * 0x11000) | 0x1002100 | ((2 +
1384 info->
1385 clock_speed_index)
1386 << 4) | (info->
1387 cas_latency
1388 - 3));
1389 write_500(info, channel, (info->clock_speed_index << 1) | 1,
1390 0x609, 6, 1);
1391 write_500(info, channel,
1392 info->clock_speed_index + 2 * info->cas_latency - 7,
1393 0x601, 6, 1);
1394
1395 write_mchbar32(0x250 + (channel << 10),
1396 ((lane_3_delay + info->clock_speed_index +
1397 9) << 6)
1398 | (info->board_lane_delay[7] << 2) | (info->
1399 board_lane_delay
1400 [4] << 16)
1401 | (info->board_lane_delay[1] << 25) | (info->
1402 board_lane_delay
1403 [1] << 29)
1404 | 1);
1405 write_mchbar32(0x254 + (channel << 10),
1406 (info->
1407 board_lane_delay[1] >> 3) | ((info->
1408 board_lane_delay
1409 [8] +
1410 4 *
1411 info->
1412 use_ecc) << 6) |
1413 0x80 | (info->board_lane_delay[6] << 1) | (info->
1414 board_lane_delay
1415 [2] <<
1416 28) |
1417 (cas_latency_derived << 16) | 0x4700000);
1418 write_mchbar32(0x258 + (channel << 10),
1419 ((info->board_lane_delay[5] +
1420 info->clock_speed_index +
1421 9) << 12) | ((info->clock_speed_index -
1422 info->cas_latency + 12) << 8)
1423 | (info->board_lane_delay[2] << 17) | (info->
1424 board_lane_delay
1425 [4] << 24)
1426 | 0x47);
1427 write_mchbar32(0x25c + (channel << 10),
1428 (info->board_lane_delay[1] << 1) | (info->
1429 board_lane_delay
1430 [0] << 8) |
1431 0x1da50000);
1432 write_mchbar8(0x264 + (channel << 10), 0xff);
1433 write_mchbar8(0x5f8 + (channel << 10),
1434 (cas_latency_shift << 3) | info->use_ecc);
1435 }
1436
1437 program_modules_memory_map(info, 1);
1438
1439 write_mchbar16(0x610,
1440 (min(ns_to_cycles(info, some_delay_ns) / 2, 127) << 9)
1441 | (read_mchbar16(0x610) & 0x1C3) | 0x3C);
1442 write_mchbar16(0x612, read_mchbar16(0x612) | 0x100);
1443 write_mchbar16(0x214, read_mchbar16(0x214) | 0x3E00);
1444 for (i = 0; i < 8; i++) {
1445 pcie_write_config32(PCI_DEV (QUICKPATH_BUS, 0, 1), 0x80 + 4 * i,
1446 (info->total_memory_mb - 64) | !i | 2);
1447 pcie_write_config32(PCI_DEV (QUICKPATH_BUS, 0, 1), 0xc0 + 4 * i, 0);
1448 }
1449}
1450
1451#define BETTER_MEMORY_MAP 0
1452
1453static void program_total_memory_map(struct raminfo *info)
1454{
1455 unsigned int TOM, TOLUD, TOUUD;
1456 unsigned int quickpath_reserved;
1457 unsigned int REMAPbase;
1458 unsigned int uma_base_igd;
1459 unsigned int uma_base_gtt;
1460 int memory_remap;
1461 unsigned int memory_map[8];
1462 int i;
1463 unsigned int current_limit;
1464 unsigned int tseg_base;
1465 int uma_size_igd = 0, uma_size_gtt = 0;
1466
1467 memset(memory_map, 0, sizeof(memory_map));
1468
1469#if REAL
1470 if (info->uma_enabled) {
1471 u16 t = pcie_read_config16(NORTHBRIDGE, D0F0_GGC);
1472 gav(t);
1473 const int uma_sizes_gtt[16] =
1474 { 0, 1, 0, 2, 0, 0, 0, 0, 0, 2, 3, 4, 42, 42, 42, 42 };
1475 /* Igd memory */
1476 const int uma_sizes_igd[16] = {
1477 0, 0, 0, 0, 0, 32, 48, 64, 128, 256, 96, 160, 224, 352,
1478 256, 512
1479 };
1480
1481 uma_size_igd = uma_sizes_igd[(t >> 4) & 0xF];
1482 uma_size_gtt = uma_sizes_gtt[(t >> 8) & 0xF];
1483 }
1484#endif
1485
1486 TOM = info->total_memory_mb;
1487 if (TOM == 4096)
1488 TOM = 4032;
1489 TOUUD = ALIGN_DOWN(TOM - info->memory_reserved_for_heci_mb, 64);
1490 TOLUD = ALIGN_DOWN(min(3072 + ALIGN_UP(uma_size_igd + uma_size_gtt, 64)
1491 , TOUUD), 64);
1492 memory_remap = 0;
1493 if (TOUUD - TOLUD > 64) {
1494 memory_remap = 1;
1495 REMAPbase = max(4096, TOUUD);
1496 TOUUD = TOUUD - TOLUD + 4096;
1497 }
1498 if (TOUUD > 4096)
1499 memory_map[2] = TOUUD | 1;
1500 quickpath_reserved = 0;
1501
1502 {
1503 u32 t;
1504
1505 gav(t = pcie_read_config32(PCI_DEV(QUICKPATH_BUS, 0, 1), 0x68));
1506 if (t & 0x800)
1507 quickpath_reserved =
1508 (1 << find_lowest_bit_set32(t >> 20));
1509 }
1510 if (memory_remap)
1511 TOUUD -= quickpath_reserved;
1512
1513#if !REAL
1514 if (info->uma_enabled) {
1515 u16 t = pcie_read_config16(NORTHBRIDGE, D0F0_GGC);
1516 gav(t);
1517 const int uma_sizes_gtt[16] =
1518 { 0, 1, 0, 2, 0, 0, 0, 0, 0, 2, 3, 4, 42, 42, 42, 42 };
1519 /* Igd memory */
1520 const int uma_sizes_igd[16] = {
1521 0, 0, 0, 0, 0, 32, 48, 64, 128, 256, 96, 160, 224, 352,
1522 256, 512
1523 };
1524
1525 uma_size_igd = uma_sizes_igd[(t >> 4) & 0xF];
1526 uma_size_gtt = uma_sizes_gtt[(t >> 8) & 0xF];
1527 }
1528#endif
1529
1530 uma_base_igd = TOLUD - uma_size_igd;
1531 uma_base_gtt = uma_base_igd - uma_size_gtt;
1532 tseg_base = ALIGN_DOWN(uma_base_gtt, 64) - (CONFIG_SMM_TSEG_SIZE >> 20);
1533 if (!memory_remap)
1534 tseg_base -= quickpath_reserved;
1535 tseg_base = ALIGN_DOWN(tseg_base, 8);
1536
1537 pcie_write_config16(NORTHBRIDGE, D0F0_TOLUD, TOLUD << 4);
1538 pcie_write_config16(NORTHBRIDGE, D0F0_TOM, TOM >> 6);
1539 if (memory_remap) {
1540 pcie_write_config16(NORTHBRIDGE, D0F0_REMAPBASE, REMAPbase >> 6);
1541 pcie_write_config16(NORTHBRIDGE, D0F0_REMAPLIMIT, (TOUUD - 64) >> 6);
1542 }
1543 pcie_write_config16(NORTHBRIDGE, D0F0_TOUUD, TOUUD);
1544
1545 if (info->uma_enabled) {
1546 pcie_write_config32(NORTHBRIDGE, D0F0_IGD_BASE, uma_base_igd << 20);
1547 pcie_write_config32(NORTHBRIDGE, D0F0_GTT_BASE, uma_base_gtt << 20);
1548 }
1549 pcie_write_config32(NORTHBRIDGE, TSEG, tseg_base << 20);
1550
1551 current_limit = 0;
1552 memory_map[0] = ALIGN_DOWN(uma_base_gtt, 64) | 1;
1553 memory_map[1] = 4096;
1554 for (i = 0; i < ARRAY_SIZE(memory_map); i++) {
1555 current_limit = max(current_limit, memory_map[i] & ~1);
1556 pcie_write_config32(PCI_DEV(QUICKPATH_BUS, 0, 1), 4 * i + 0x80,
1557 (memory_map[i] & 1) | ALIGN_DOWN(current_limit -
1558 1, 64) | 2);
1559 pcie_write_config32(PCI_DEV(QUICKPATH_BUS, 0, 1), 4 * i + 0xc0, 0);
1560 }
1561}
1562
1563static void collect_system_info(struct raminfo *info)
1564{
1565 u32 capid0[3];
1566 int i;
1567 unsigned channel;
1568
1569 /* Wait for some bit, maybe TXT clear. */
1570 while (!(read8(0xfed40000) & (1 << 7))) ;
1571
1572 if (!info->heci_bar)
1573 gav(info->heci_bar =
1574 pcie_read_config32(HECIDEV, HECIBAR) & 0xFFFFFFF8);
1575 if (!info->memory_reserved_for_heci_mb) {
1576 /* Wait for ME to be ready */
1577 intel_early_me_init();
1578 info->memory_reserved_for_heci_mb = intel_early_me_uma_size();
1579 }
1580
1581 for (i = 0; i < 3; i++)
1582 gav(capid0[i] =
1583 pcie_read_config32(NORTHBRIDGE, D0F0_CAPID0 | (i << 2)));
1584 gav(info->revision = pcie_read_config8(NORTHBRIDGE, PCI_REVISION_ID));
1585 info->max_supported_clock_speed_index = (~capid0[1] & 7);
1586
1587 if ((capid0[1] >> 11) & 1)
1588 info->uma_enabled = 0;
1589 else
1590 gav(info->uma_enabled =
1591 pcie_read_config8(NORTHBRIDGE, D0F0_DEVEN) & 8);
1592 /* Unrecognised: [0000:fffd3d2d] 37f81.37f82 ! CPUID: eax: 00000001; ecx: 00000e00 => 00020655.00010800.029ae3ff.bfebfbff */
1593 info->silicon_revision = 0;
1594
1595 if (capid0[2] & 2) {
1596 info->silicon_revision = 0;
1597 info->max_supported_clock_speed_index = 2;
1598 for (channel = 0; channel < NUM_CHANNELS; channel++)
1599 if (info->populated_ranks[channel][0][0]
1600 && (info->spd[channel][0][MODULE_TYPE] & 0xf) ==
1601 3) {
1602 info->silicon_revision = 2;
1603 info->max_supported_clock_speed_index = 1;
1604 }
1605 } else {
1606 switch (((capid0[2] >> 18) & 1) + 2 * ((capid0[1] >> 3) & 1)) {
1607 case 1:
1608 case 2:
1609 info->silicon_revision = 3;
1610 break;
1611 case 3:
1612 info->silicon_revision = 0;
1613 break;
1614 case 0:
1615 info->silicon_revision = 2;
1616 break;
1617 }
1618 switch (pcie_read_config16(NORTHBRIDGE, PCI_DEVICE_ID)) {
1619 case 0x40:
1620 info->silicon_revision = 0;
1621 break;
1622 case 0x48:
1623 info->silicon_revision = 1;
1624 break;
1625 }
1626 }
1627}
1628
1629static void write_training_data(struct raminfo *info)
1630{
1631 int tm, channel, slot, rank, lane;
1632 if (info->revision < 8)
1633 return;
1634
1635 for (tm = 0; tm < 4; tm++)
1636 for (channel = 0; channel < NUM_CHANNELS; channel++)
1637 for (slot = 0; slot < NUM_SLOTS; slot++)
1638 for (rank = 0; rank < NUM_RANKS; rank++)
1639 for (lane = 0; lane < 9; lane++)
1640 write_500(info, channel,
1641 info->
1642 cached_training->
1643 lane_timings[tm]
1644 [channel][slot][rank]
1645 [lane],
1646 get_timing_register_addr
1647 (lane, tm, slot,
1648 rank), 9, 0);
1649 write_1d0(info->cached_training->reg_178, 0x178, 7, 1);
1650 write_1d0(info->cached_training->reg_10b, 0x10b, 6, 1);
1651}
1652
1653static void dump_timings(struct raminfo *info)
1654{
1655#if REAL
1656 int channel, slot, rank, lane, i;
1657 printk(BIOS_DEBUG, "Timings:\n");
1658 FOR_POPULATED_RANKS {
1659 printk(BIOS_DEBUG, "channel %d, slot %d, rank %d\n", channel,
1660 slot, rank);
1661 for (lane = 0; lane < 9; lane++) {
1662 printk(BIOS_DEBUG, "lane %d: ", lane);
1663 for (i = 0; i < 4; i++) {
1664 printk(BIOS_DEBUG, "%x (%x) ",
1665 read_500(info, channel,
1666 get_timing_register_addr
1667 (lane, i, slot, rank),
1668 9),
1669 info->training.
1670 lane_timings[i][channel][slot][rank]
1671 [lane]);
1672 }
1673 printk(BIOS_DEBUG, "\n");
1674 }
1675 }
1676 printk(BIOS_DEBUG, "[178] = %x (%x)\n", read_1d0(0x178, 7),
1677 info->training.reg_178);
1678 printk(BIOS_DEBUG, "[10b] = %x (%x)\n", read_1d0(0x10b, 6),
1679 info->training.reg_10b);
1680#endif
1681}
1682
Vladimir Serbinenkof7a42de2014-01-09 11:10:04 +01001683/* Read timings and other registers that need to be restored verbatim and
1684 put them to CBMEM.
1685 */
Vladimir Serbinenkoc6f6be02013-11-12 22:32:08 +01001686static void save_timings(struct raminfo *info)
1687{
Vladimir Serbinenkoc6f6be02013-11-12 22:32:08 +01001688 struct ram_training train;
1689 struct mrc_data_container *mrcdata;
1690 int output_len = ALIGN(sizeof(train), 16);
1691 int channel, slot, rank, lane, i;
1692
1693 train = info->training;
1694 FOR_POPULATED_RANKS for (lane = 0; lane < 9; lane++)
1695 for (i = 0; i < 4; i++)
1696 train.lane_timings[i][channel][slot][rank][lane] =
1697 read_500(info, channel,
1698 get_timing_register_addr(lane, i, slot,
1699 rank), 9);
1700 train.reg_178 = read_1d0(0x178, 7);
1701 train.reg_10b = read_1d0(0x10b, 6);
1702
Vladimir Serbinenkof7a42de2014-01-09 11:10:04 +01001703 for (channel = 0; channel < NUM_CHANNELS; channel++) {
1704 u32 reg32;
1705 reg32 = read_mchbar32 ((channel << 10) + 0x274);
1706 train.reg274265[channel][0] = reg32 >> 16;
1707 train.reg274265[channel][1] = reg32 & 0xffff;
1708 train.reg274265[channel][2] = read_mchbar16 ((channel << 10) + 0x265) >> 8;
1709 }
1710 train.reg2ca9_bit0 = read_mchbar8(0x2ca9) & 1;
1711 train.reg_6dc = read_mchbar32 (0x6dc);
1712 train.reg_6e8 = read_mchbar32 (0x6e8);
1713
1714 printk (BIOS_SPEW, "[6dc] = %x\n", train.reg_6dc);
1715 printk (BIOS_SPEW, "[6e8] = %x\n", train.reg_6e8);
1716
Vladimir Serbinenkoc6f6be02013-11-12 22:32:08 +01001717 /* Save the MRC S3 restore data to cbmem */
Vladimir Serbinenkoc6f6be02013-11-12 22:32:08 +01001718 mrcdata = cbmem_add
1719 (CBMEM_ID_MRCDATA, output_len + sizeof(struct mrc_data_container));
1720
Kyösti Mälkki743a2182014-06-15 15:59:44 +03001721 if (mrcdata != NULL) {
1722 printk(BIOS_DEBUG, "Relocate MRC DATA from %p to %p (%u bytes)\n",
1723 &train, mrcdata, output_len);
Vladimir Serbinenkoc6f6be02013-11-12 22:32:08 +01001724
Kyösti Mälkki743a2182014-06-15 15:59:44 +03001725 mrcdata->mrc_signature = MRC_DATA_SIGNATURE;
1726 mrcdata->mrc_data_size = output_len;
1727 mrcdata->reserved = 0;
1728 memcpy(mrcdata->mrc_data, &train, sizeof(train));
Vladimir Serbinenkoc6f6be02013-11-12 22:32:08 +01001729
Kyösti Mälkki743a2182014-06-15 15:59:44 +03001730 /* Zero the unused space in aligned buffer. */
1731 if (output_len > sizeof(train))
1732 memset(mrcdata->mrc_data + sizeof(train), 0,
1733 output_len - sizeof(train));
Vladimir Serbinenkoc6f6be02013-11-12 22:32:08 +01001734
Kyösti Mälkki743a2182014-06-15 15:59:44 +03001735 mrcdata->mrc_checksum = compute_ip_checksum(mrcdata->mrc_data,
1736 mrcdata->mrc_data_size);
1737 }
Vladimir Serbinenkoc6f6be02013-11-12 22:32:08 +01001738}
1739
1740#if REAL
1741static const struct ram_training *get_cached_training(void)
1742{
1743 struct mrc_data_container *cont;
1744 cont = find_current_mrc_cache();
1745 if (!cont)
1746 return 0;
1747 return (void *)cont->mrc_data;
1748}
1749#endif
1750
1751/* FIXME: add timeout. */
1752static void wait_heci_ready(void)
1753{
1754 while (!(read32(DEFAULT_HECIBAR | 0xc) & 8)) ; // = 0x8000000c
1755 write32((DEFAULT_HECIBAR | 0x4),
1756 (read32(DEFAULT_HECIBAR | 0x4) & ~0x10) | 0xc);
1757}
1758
1759/* FIXME: add timeout. */
1760static void wait_heci_cb_avail(int len)
1761{
1762 union {
1763 struct mei_csr csr;
1764 u32 raw;
1765 } csr;
1766
1767 while (!(read32(DEFAULT_HECIBAR | 0xc) & 8)) ;
1768
1769 do
1770 csr.raw = read32(DEFAULT_HECIBAR | 0x4);
1771 while (len >
1772 csr.csr.buffer_depth - (csr.csr.buffer_write_ptr -
1773 csr.csr.buffer_read_ptr));
1774}
1775
1776static void send_heci_packet(struct mei_header *head, u32 * payload)
1777{
1778 int len = (head->length + 3) / 4;
1779 int i;
1780
1781 wait_heci_cb_avail(len + 1);
1782
1783 /* FIXME: handle leftovers correctly. */
1784 write32(DEFAULT_HECIBAR | 0, *(u32 *) head);
1785 for (i = 0; i < len - 1; i++)
1786 write32(DEFAULT_HECIBAR | 0, payload[i]);
1787
1788 write32(DEFAULT_HECIBAR | 0, payload[i] & ((1 << (8 * len)) - 1));
1789 write32(DEFAULT_HECIBAR | 0x4, read32(DEFAULT_HECIBAR | 0x4) | 0x4);
1790}
1791
1792static void
1793send_heci_message(u8 * msg, int len, u8 hostaddress, u8 clientaddress)
1794{
1795 struct mei_header head;
1796 int maxlen;
1797
1798 wait_heci_ready();
1799 maxlen = (read32(DEFAULT_HECIBAR | 0x4) >> 24) * 4 - 4;
1800
1801 while (len) {
1802 int cur = len;
1803 if (cur > maxlen) {
1804 cur = maxlen;
1805 head.is_complete = 0;
1806 } else
1807 head.is_complete = 1;
1808 head.length = cur;
1809 head.reserved = 0;
1810 head.client_address = clientaddress;
1811 head.host_address = hostaddress;
1812 send_heci_packet(&head, (u32 *) msg);
1813 len -= cur;
1814 msg += cur;
1815 }
1816}
1817
1818/* FIXME: Add timeout. */
1819static int
1820recv_heci_packet(struct raminfo *info, struct mei_header *head, u32 * packet,
1821 u32 * packet_size)
1822{
1823 union {
1824 struct mei_csr csr;
1825 u32 raw;
1826 } csr;
1827 int i = 0;
1828
1829 write32(DEFAULT_HECIBAR | 0x4, read32(DEFAULT_HECIBAR | 0x4) | 2);
1830 do {
1831 csr.raw = read32(DEFAULT_HECIBAR | 0xc);
1832#if !REAL
1833 if (i++ > 346)
1834 return -1;
1835#endif
1836 }
1837 while (csr.csr.buffer_write_ptr == csr.csr.buffer_read_ptr);
1838 *(u32 *) head = read32(DEFAULT_HECIBAR | 0x8);
1839 if (!head->length) {
1840 write32(DEFAULT_HECIBAR | 0x4,
1841 read32(DEFAULT_HECIBAR | 0x4) | 2);
1842 *packet_size = 0;
1843 return 0;
1844 }
1845 if (head->length + 4 > 4 * csr.csr.buffer_depth
1846 || head->length > *packet_size) {
1847 *packet_size = 0;
1848 return -1;
1849 }
1850
1851 do
1852 csr.raw = read32(DEFAULT_HECIBAR | 0xc);
1853 while ((head->length + 3) >> 2 >
1854 csr.csr.buffer_write_ptr - csr.csr.buffer_read_ptr);
1855
1856 for (i = 0; i < (head->length + 3) >> 2; i++)
1857 packet[i++] = read32(DEFAULT_HECIBAR | 0x8);
1858 *packet_size = head->length;
1859 if (!csr.csr.ready)
1860 *packet_size = 0;
1861 write32(DEFAULT_HECIBAR | 0x4, read32(DEFAULT_HECIBAR | 0x4) | 4);
1862 return 0;
1863}
1864
1865/* FIXME: Add timeout. */
1866static int
1867recv_heci_message(struct raminfo *info, u32 * message, u32 * message_size)
1868{
1869 struct mei_header head;
1870 int current_position;
1871
1872 current_position = 0;
1873 while (1) {
1874 u32 current_size;
1875 current_size = *message_size - current_position;
1876 if (recv_heci_packet
1877 (info, &head, message + (current_position >> 2),
1878 &current_size) == -1)
1879 break;
1880 if (!current_size)
1881 break;
1882 current_position += current_size;
1883 if (head.is_complete) {
1884 *message_size = current_position;
1885 return 0;
1886 }
1887
1888 if (current_position >= *message_size)
1889 break;
1890 }
1891 *message_size = 0;
1892 return -1;
1893}
1894
1895static void send_heci_uma_message(struct raminfo *info)
1896{
1897 struct uma_reply {
1898 u8 group_id;
1899 u8 command;
1900 u8 reserved;
1901 u8 result;
1902 u8 field2;
1903 u8 unk3[0x48 - 4 - 1];
1904 } __attribute__ ((packed)) reply;
1905 struct uma_message {
1906 u8 group_id;
1907 u8 cmd;
1908 u8 reserved;
1909 u8 result;
1910 u32 c2;
1911 u64 heci_uma_addr;
1912 u32 memory_reserved_for_heci_mb;
1913 u16 c3;
1914 } __attribute__ ((packed)) msg = {
1915 0, MKHI_SET_UMA, 0, 0,
1916 0x82,
1917 info->heci_uma_addr, info->memory_reserved_for_heci_mb, 0};
1918 u32 reply_size;
1919
1920 send_heci_message((u8 *) & msg, sizeof(msg), 0, 7);
1921
1922 reply_size = sizeof(reply);
1923 if (recv_heci_message(info, (u32 *) & reply, &reply_size) == -1)
1924 return;
1925
1926 if (reply.command != (MKHI_SET_UMA | (1 << 7)))
1927 die("HECI init failed\n");
1928}
1929
1930static void setup_heci_uma(struct raminfo *info)
1931{
1932 u32 reg44;
1933
1934 reg44 = pcie_read_config32(HECIDEV, 0x44); // = 0x80010020
1935 info->memory_reserved_for_heci_mb = 0;
1936 info->heci_uma_addr = 0;
1937 if (!((reg44 & 0x10000) && !(pcie_read_config32(HECIDEV, 0x40) & 0x20)))
1938 return;
1939
1940 info->heci_bar = pcie_read_config32(HECIDEV, 0x10) & 0xFFFFFFF0;
1941 info->memory_reserved_for_heci_mb = reg44 & 0x3f;
1942 info->heci_uma_addr =
1943 ((u64)
1944 ((((u64) pcie_read_config16(NORTHBRIDGE, D0F0_TOM)) << 6) -
1945 info->memory_reserved_for_heci_mb)) << 20;
1946
1947 pcie_read_config32(NORTHBRIDGE, DMIBAR);
1948 if (info->memory_reserved_for_heci_mb) {
1949 write32(DEFAULT_DMIBAR | 0x14,
1950 read32(DEFAULT_DMIBAR | 0x14) & ~0x80);
1951 write32(DEFAULT_RCBA | 0x14,
1952 read32(DEFAULT_RCBA | 0x14) & ~0x80);
1953 write32(DEFAULT_DMIBAR | 0x20,
1954 read32(DEFAULT_DMIBAR | 0x20) & ~0x80);
1955 write32(DEFAULT_RCBA | 0x20,
1956 read32(DEFAULT_RCBA | 0x20) & ~0x80);
1957 write32(DEFAULT_DMIBAR | 0x2c,
1958 read32(DEFAULT_DMIBAR | 0x2c) & ~0x80);
1959 write32(DEFAULT_RCBA | 0x30,
1960 read32(DEFAULT_RCBA | 0x30) & ~0x80);
1961 write32(DEFAULT_DMIBAR | 0x38,
1962 read32(DEFAULT_DMIBAR | 0x38) & ~0x80);
1963 write32(DEFAULT_RCBA | 0x40,
1964 read32(DEFAULT_RCBA | 0x40) & ~0x80);
1965
1966 write32(DEFAULT_RCBA | 0x40, 0x87000080); // OK
1967 write32(DEFAULT_DMIBAR | 0x38, 0x87000080); // OK
1968 while (read16(DEFAULT_RCBA | 0x46) & 2
1969 && read16(DEFAULT_DMIBAR | 0x3e) & 2) ;
1970 }
1971
1972 write_mchbar32(0x24, 0x10000 + info->memory_reserved_for_heci_mb);
1973
1974 send_heci_uma_message(info);
1975
1976 pcie_write_config32(HECIDEV, 0x10, 0x0);
1977 pcie_write_config8(HECIDEV, 0x4, 0x0);
1978
1979}
1980
1981static int have_match_ranks(struct raminfo *info, int channel, int ranks)
1982{
1983 int ranks_in_channel;
1984 ranks_in_channel = info->populated_ranks[channel][0][0]
1985 + info->populated_ranks[channel][0][1]
1986 + info->populated_ranks[channel][1][0]
1987 + info->populated_ranks[channel][1][1];
1988
1989 /* empty channel */
1990 if (ranks_in_channel == 0)
1991 return 1;
1992
1993 if (ranks_in_channel != ranks)
1994 return 0;
1995 /* single slot */
1996 if (info->populated_ranks[channel][0][0] !=
1997 info->populated_ranks[channel][1][0])
1998 return 1;
1999 if (info->populated_ranks[channel][0][1] !=
2000 info->populated_ranks[channel][1][1])
2001 return 1;
2002 if (info->is_x16_module[channel][0] != info->is_x16_module[channel][1])
2003 return 0;
2004 if (info->density[channel][0] != info->density[channel][1])
2005 return 0;
2006 return 1;
2007}
2008
Vladimir Serbinenkoc6f6be02013-11-12 22:32:08 +01002009static void read_4090(struct raminfo *info)
2010{
2011 int i, channel, slot, rank, lane;
2012 for (i = 0; i < 2; i++)
2013 for (slot = 0; slot < NUM_SLOTS; slot++)
2014 for (rank = 0; rank < NUM_RANKS; rank++)
2015 for (lane = 0; lane < 9; lane++)
2016 info->training.
2017 lane_timings[0][i][slot][rank][lane]
2018 = 32;
2019
2020 for (i = 1; i < 4; i++)
2021 for (channel = 0; channel < NUM_CHANNELS; channel++)
2022 for (slot = 0; slot < NUM_SLOTS; slot++)
2023 for (rank = 0; rank < NUM_RANKS; rank++)
2024 for (lane = 0; lane < 9; lane++) {
2025 info->training.
2026 lane_timings[i][channel]
2027 [slot][rank][lane] =
2028 read_500(info, channel,
2029 get_timing_register_addr
2030 (lane, i, slot,
2031 rank), 9)
2032 + (i == 1) * 11; // !!!!
2033 }
2034
2035}
2036
2037static u32 get_etalon2(int flip, u32 addr)
2038{
2039 const u16 invmask[] = {
2040 0xaaaa, 0x6db6, 0x4924, 0xeeee, 0xcccc, 0x8888, 0x7bde, 0x739c,
2041 0x6318, 0x4210, 0xefbe, 0xcf3c, 0x8e38, 0x0c30, 0x0820
2042 };
2043 u32 ret;
2044 u32 comp4 = addr / 480;
2045 addr %= 480;
2046 u32 comp1 = addr & 0xf;
2047 u32 comp2 = (addr >> 4) & 1;
2048 u32 comp3 = addr >> 5;
2049
2050 if (comp4)
2051 ret = 0x1010101 << (comp4 - 1);
2052 else
2053 ret = 0;
2054 if (flip ^ (((invmask[comp3] >> comp1) ^ comp2) & 1))
2055 ret = ~ret;
2056
2057 return ret;
2058}
2059
2060static void disable_cache(void)
2061{
2062 msr_t msr = {.lo = 0, .hi = 0 };
2063
2064 wrmsr(MTRRphysBase_MSR(3), msr);
2065 wrmsr(MTRRphysMask_MSR(3), msr);
2066}
2067
2068static void enable_cache(unsigned int base, unsigned int size)
2069{
2070 msr_t msr;
2071 msr.lo = base | MTRR_TYPE_WRPROT;
2072 msr.hi = 0;
2073 wrmsr(MTRRphysBase_MSR(3), msr);
2074 msr.lo = ((~(ALIGN_DOWN(size + 4096, 4096) - 1) | MTRRdefTypeEn)
2075 & 0xffffffff);
2076 msr.hi = 0x0000000f;
2077 wrmsr(MTRRphysMask_MSR(3), msr);
2078}
2079
2080static void flush_cache(u32 start, u32 size)
2081{
2082 u32 end;
2083 u32 addr;
2084
2085 end = start + (ALIGN_DOWN(size + 4096, 4096));
2086 for (addr = start; addr < end; addr += 64)
2087 clflush(addr);
2088}
2089
2090static void clear_errors(void)
2091{
2092 pcie_write_config8(NORTHBRIDGE, 0xc0, 0x01);
2093}
2094
2095static void write_testing(struct raminfo *info, int totalrank, int flip)
2096{
2097 int nwrites = 0;
2098 /* in 8-byte units. */
2099 u32 offset;
2100 u32 base;
2101
2102 base = totalrank << 28;
2103 for (offset = 0; offset < 9 * 480; offset += 2) {
2104 write32(base + offset * 8, get_etalon2(flip, offset));
2105 write32(base + offset * 8 + 4, get_etalon2(flip, offset));
2106 write32(base + offset * 8 + 8, get_etalon2(flip, offset + 1));
2107 write32(base + offset * 8 + 12, get_etalon2(flip, offset + 1));
2108 nwrites += 4;
2109 if (nwrites >= 320) {
2110 clear_errors();
2111 nwrites = 0;
2112 }
2113 }
2114}
2115
2116static u8 check_testing(struct raminfo *info, u8 total_rank, int flip)
2117{
2118 u8 failmask = 0;
2119 int i;
2120 int comp1, comp2, comp3;
2121 u32 failxor[2] = { 0, 0 };
2122
2123 enable_cache((total_rank << 28), 1728 * 5 * 4);
2124
2125 for (comp3 = 0; comp3 < 9 && failmask != 0xff; comp3++) {
2126 for (comp1 = 0; comp1 < 4; comp1++)
2127 for (comp2 = 0; comp2 < 60; comp2++) {
2128 u32 re[4];
2129 u32 curroffset =
2130 comp3 * 8 * 60 + 2 * comp1 + 8 * comp2;
2131 read128((total_rank << 28) | (curroffset << 3),
2132 (u64 *) re);
2133 failxor[0] |=
2134 get_etalon2(flip, curroffset) ^ re[0];
2135 failxor[1] |=
2136 get_etalon2(flip, curroffset) ^ re[1];
2137 failxor[0] |=
2138 get_etalon2(flip, curroffset | 1) ^ re[2];
2139 failxor[1] |=
2140 get_etalon2(flip, curroffset | 1) ^ re[3];
2141 }
2142 for (i = 0; i < 8; i++)
2143 if ((0xff << (8 * (i % 4))) & failxor[i / 4])
2144 failmask |= 1 << i;
2145 }
2146 disable_cache();
2147 flush_cache((total_rank << 28), 1728 * 5 * 4);
2148 return failmask;
2149}
2150
2151const u32 seed1[0x18] = {
2152 0x3a9d5ab5, 0x576cb65b, 0x555773b6, 0x2ab772ee,
2153 0x555556ee, 0x3a9d5ab5, 0x576cb65b, 0x555773b6,
2154 0x2ab772ee, 0x555556ee, 0x5155a555, 0x5155a555,
2155 0x5155a555, 0x5155a555, 0x3a9d5ab5, 0x576cb65b,
2156 0x555773b6, 0x2ab772ee, 0x555556ee, 0x55d6b4a5,
2157 0x366d6b3a, 0x2ae5ddbb, 0x3b9ddbb7, 0x55d6b4a5,
2158};
2159
2160static u32 get_seed2(int a, int b)
2161{
2162 const u32 seed2[5] = {
2163 0x55555555, 0x33333333, 0x2e555a55, 0x55555555,
2164 0x5b6db6db,
2165 };
2166 u32 r;
2167 r = seed2[(a + (a >= 10)) / 5];
2168 return b ? ~r : r;
2169}
2170
2171static int make_shift(int comp2, int comp5, int x)
2172{
2173 const u8 seed3[32] = {
2174 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
2175 0x00, 0x00, 0x38, 0x1c, 0x3c, 0x18, 0x38, 0x38,
2176 0x38, 0x38, 0x38, 0x38, 0x0f, 0x0f, 0x0f, 0x0f,
2177 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
2178 };
2179
2180 return (comp2 - ((seed3[comp5] >> (x & 7)) & 1)) & 0x1f;
2181}
2182
2183static u32 get_etalon(int flip, u32 addr)
2184{
2185 u32 mask_byte = 0;
2186 int comp1 = (addr >> 1) & 1;
2187 int comp2 = (addr >> 3) & 0x1f;
2188 int comp3 = (addr >> 8) & 0xf;
2189 int comp4 = (addr >> 12) & 0xf;
2190 int comp5 = (addr >> 16) & 0x1f;
2191 u32 mask_bit = ~(0x10001 << comp3);
2192 u32 part1;
2193 u32 part2;
2194 int byte;
2195
2196 part2 =
2197 ((seed1[comp5] >>
2198 make_shift(comp2, comp5,
2199 (comp3 >> 3) | (comp1 << 2) | 2)) & 1) ^ flip;
2200 part1 =
2201 ((seed1[comp5] >>
2202 make_shift(comp2, comp5,
2203 (comp3 >> 3) | (comp1 << 2) | 0)) & 1) ^ flip;
2204
2205 for (byte = 0; byte < 4; byte++)
2206 if ((get_seed2(comp5, comp4) >>
2207 make_shift(comp2, comp5, (byte | (comp1 << 2)))) & 1)
2208 mask_byte |= 0xff << (8 * byte);
2209
2210 return (mask_bit & mask_byte) | (part1 << comp3) | (part2 <<
2211 (comp3 + 16));
2212}
2213
2214static void
2215write_testing_type2(struct raminfo *info, u8 totalrank, u8 region, u8 block,
2216 char flip)
2217{
2218 int i;
2219 for (i = 0; i < 2048; i++)
2220 write32((totalrank << 28) | (region << 25) | (block << 16) |
2221 (i << 2), get_etalon(flip, (block << 16) | (i << 2)));
2222}
2223
2224static u8
2225check_testing_type2(struct raminfo *info, u8 totalrank, u8 region, u8 block,
2226 char flip)
2227{
2228 u8 failmask = 0;
2229 u32 failxor[2];
2230 int i;
2231 int comp1, comp2, comp3;
2232
2233 failxor[0] = 0;
2234 failxor[1] = 0;
2235
2236 enable_cache(totalrank << 28, 134217728);
2237 for (comp3 = 0; comp3 < 2 && failmask != 0xff; comp3++) {
2238 for (comp1 = 0; comp1 < 16; comp1++)
2239 for (comp2 = 0; comp2 < 64; comp2++) {
2240 u32 addr =
2241 (totalrank << 28) | (region << 25) | (block
2242 << 16)
2243 | (comp3 << 12) | (comp2 << 6) | (comp1 <<
2244 2);
2245 failxor[comp1 & 1] |=
2246 read32(addr) ^ get_etalon(flip, addr);
2247 }
2248 for (i = 0; i < 8; i++)
2249 if ((0xff << (8 * (i % 4))) & failxor[i / 4])
2250 failmask |= 1 << i;
2251 }
2252 disable_cache();
2253 flush_cache((totalrank << 28) | (region << 25) | (block << 16), 16384);
2254 return failmask;
2255}
2256
2257static int check_bounded(unsigned short *vals, u16 bound)
2258{
2259 int i;
2260
2261 for (i = 0; i < 8; i++)
2262 if (vals[i] < bound)
2263 return 0;
2264 return 1;
2265}
2266
2267enum state {
2268 BEFORE_USABLE = 0, AT_USABLE = 1, AT_MARGIN = 2, COMPLETE = 3
2269};
2270
2271static int validate_state(enum state *in)
2272{
2273 int i;
2274 for (i = 0; i < 8; i++)
2275 if (in[i] != COMPLETE)
2276 return 0;
2277 return 1;
2278}
2279
2280static void
2281do_fsm(enum state *state, u16 * counter,
2282 u8 fail_mask, int margin, int uplimit,
2283 u8 * res_low, u8 * res_high, u8 val)
2284{
2285 int lane;
2286
2287 for (lane = 0; lane < 8; lane++) {
2288 int is_fail = (fail_mask >> lane) & 1;
2289 switch (state[lane]) {
2290 case BEFORE_USABLE:
2291 if (!is_fail) {
2292 counter[lane] = 1;
2293 state[lane] = AT_USABLE;
2294 break;
2295 }
2296 counter[lane] = 0;
2297 state[lane] = BEFORE_USABLE;
2298 break;
2299 case AT_USABLE:
2300 if (!is_fail) {
2301 ++counter[lane];
2302 if (counter[lane] >= margin) {
2303 state[lane] = AT_MARGIN;
2304 res_low[lane] = val - margin + 1;
2305 break;
2306 }
2307 state[lane] = 1;
2308 break;
2309 }
2310 counter[lane] = 0;
2311 state[lane] = BEFORE_USABLE;
2312 break;
2313 case AT_MARGIN:
2314 if (is_fail) {
2315 state[lane] = COMPLETE;
2316 res_high[lane] = val - 1;
2317 } else {
2318 counter[lane]++;
2319 state[lane] = AT_MARGIN;
2320 if (val == uplimit) {
2321 state[lane] = COMPLETE;
2322 res_high[lane] = uplimit;
2323 }
2324 }
2325 break;
2326 case COMPLETE:
2327 break;
2328 }
2329 }
2330}
2331
2332static void
2333train_ram_at_178(struct raminfo *info, u8 channel, int slot, int rank,
2334 u8 total_rank, u8 reg_178, int first_run, int niter,
2335 timing_bounds_t * timings)
2336{
2337 int lane;
2338 enum state state[8];
2339 u16 count[8];
2340 u8 lower_usable[8];
2341 u8 upper_usable[8];
2342 unsigned short num_sucessfully_checked[8];
2343 u8 secondary_total_rank;
2344 u8 reg1b3;
2345
2346 if (info->populated_ranks_mask[1]) {
2347 if (channel == 1)
2348 secondary_total_rank =
2349 info->populated_ranks[1][0][0] +
2350 info->populated_ranks[1][0][1]
2351 + info->populated_ranks[1][1][0] +
2352 info->populated_ranks[1][1][1];
2353 else
2354 secondary_total_rank = 0;
2355 } else
2356 secondary_total_rank = total_rank;
2357
2358 {
2359 int i;
2360 for (i = 0; i < 8; i++)
2361 state[i] = BEFORE_USABLE;
2362 }
2363
2364 if (!first_run) {
2365 int is_all_ok = 1;
2366 for (lane = 0; lane < 8; lane++)
2367 if (timings[reg_178][channel][slot][rank][lane].
2368 smallest ==
2369 timings[reg_178][channel][slot][rank][lane].
2370 largest) {
2371 timings[reg_178][channel][slot][rank][lane].
2372 smallest = 0;
2373 timings[reg_178][channel][slot][rank][lane].
2374 largest = 0;
2375 is_all_ok = 0;
2376 }
2377 if (is_all_ok) {
2378 int i;
2379 for (i = 0; i < 8; i++)
2380 state[i] = COMPLETE;
2381 }
2382 }
2383
2384 for (reg1b3 = 0; reg1b3 < 0x30 && !validate_state(state); reg1b3++) {
2385 u8 failmask = 0;
2386 write_1d0(reg1b3 ^ 32, 0x1b3, 6, 1);
2387 write_1d0(reg1b3 ^ 32, 0x1a3, 6, 1);
2388 failmask = check_testing(info, total_rank, 0);
2389 write_mchbar32(0xfb0, read_mchbar32(0xfb0) | 0x00030000);
2390 do_fsm(state, count, failmask, 5, 47, lower_usable,
2391 upper_usable, reg1b3);
2392 }
2393
2394 if (reg1b3) {
2395 write_1d0(0, 0x1b3, 6, 1);
2396 write_1d0(0, 0x1a3, 6, 1);
2397 for (lane = 0; lane < 8; lane++) {
2398 if (state[lane] == COMPLETE) {
2399 timings[reg_178][channel][slot][rank][lane].
2400 smallest =
2401 lower_usable[lane] +
2402 (info->training.
2403 lane_timings[0][channel][slot][rank][lane]
2404 & 0x3F) - 32;
2405 timings[reg_178][channel][slot][rank][lane].
2406 largest =
2407 upper_usable[lane] +
2408 (info->training.
2409 lane_timings[0][channel][slot][rank][lane]
2410 & 0x3F) - 32;
2411 }
2412 }
2413 }
2414
2415 if (!first_run) {
2416 for (lane = 0; lane < 8; lane++)
2417 if (state[lane] == COMPLETE) {
2418 write_500(info, channel,
2419 timings[reg_178][channel][slot][rank]
2420 [lane].smallest,
2421 get_timing_register_addr(lane, 0,
2422 slot, rank),
2423 9, 1);
2424 write_500(info, channel,
2425 timings[reg_178][channel][slot][rank]
2426 [lane].smallest +
2427 info->training.
2428 lane_timings[1][channel][slot][rank]
2429 [lane]
2430 -
2431 info->training.
2432 lane_timings[0][channel][slot][rank]
2433 [lane], get_timing_register_addr(lane,
2434 1,
2435 slot,
2436 rank),
2437 9, 1);
2438 num_sucessfully_checked[lane] = 0;
2439 } else
2440 num_sucessfully_checked[lane] = -1;
2441
2442 do {
2443 u8 failmask = 0;
2444 int i;