blob: c967e3979f9ba90128ea27d2dccb6889b469df41 [file] [log] [blame]
Vladimir Serbinenkoc6f6be02013-11-12 22:32:08 +01001/*
2 * This file is part of the coreboot project.
3 *
4 * Copyright (C) 2013 Vladimir Serbinenko.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21/* Please don't remove this. It's needed it to do debugging
22 and reverse engineering to support in futur more nehalem variants. */
23#ifndef REAL
24#define REAL 1
25#endif
26
27#if REAL
28#include <console/console.h>
29#include <string.h>
30#include <arch/hlt.h>
31#include <arch/io.h>
32#include <cpu/x86/msr.h>
33#include <cbmem.h>
34#include <arch/cbfs.h>
35#include <cbfs.h>
36#include <ip_checksum.h>
37#include <pc80/mc146818rtc.h>
38#include <device/pci_def.h>
39#include <arch/cpu.h>
40#include <spd.h>
41#include "raminit.h"
42#include <timestamp.h>
43#include <cpu/x86/mtrr.h>
44#include <cpu/intel/speedstep.h>
45#include <cpu/intel/turbo.h>
46#endif
47
48#if !REAL
49typedef unsigned char u8;
50typedef unsigned short u16;
51typedef unsigned int u32;
52typedef u32 device_t;
53#endif
54
55#include "nehalem.h"
56
57#include "southbridge/intel/ibexpeak/me.h"
58
59#if REAL
60#include <delay.h>
61#endif
62
63#define NORTHBRIDGE PCI_DEV(0, 0, 0)
64#define SOUTHBRIDGE PCI_DEV(0, 0x1f, 0)
65#define GMA PCI_DEV (0, 0x2, 0x0)
66#define HECIDEV PCI_DEV(0, 0x16, 0)
67#define HECIBAR 0x10
68
69#define FOR_ALL_RANKS \
70 for (channel = 0; channel < NUM_CHANNELS; channel++) \
71 for (slot = 0; slot < NUM_SLOTS; slot++) \
72 for (rank = 0; rank < NUM_RANKS; rank++)
73
74#define FOR_POPULATED_RANKS \
75 for (channel = 0; channel < NUM_CHANNELS; channel++) \
76 for (slot = 0; slot < NUM_SLOTS; slot++) \
77 for (rank = 0; rank < NUM_RANKS; rank++) \
78 if (info->populated_ranks[channel][slot][rank])
79
80#define FOR_POPULATED_RANKS_BACKWARDS \
81 for (channel = NUM_CHANNELS - 1; channel >= 0; channel--) \
82 for (slot = 0; slot < NUM_SLOTS; slot++) \
83 for (rank = 0; rank < NUM_RANKS; rank++) \
84 if (info->populated_ranks[channel][slot][rank])
85
86/* [REG_178][CHANNEL][2 * SLOT + RANK][LANE] */
87typedef struct {
88 u8 smallest;
89 u8 largest;
90} timing_bounds_t[2][2][2][9];
91
92struct ram_training {
93 /* [TM][CHANNEL][SLOT][RANK][LANE] */
94 u16 lane_timings[4][2][2][2][9];
95 u16 reg_178;
96 u16 reg_10b;
97
98 u8 reg178_center;
99 u8 reg178_smallest;
100 u8 reg178_largest;
101 timing_bounds_t timing_bounds[2];
102 u16 timing_offset[2][2][2][9];
103 u16 timing2_offset[2][2][2][9];
104 u16 timing2_bounds[2][2][2][9][2];
Vladimir Serbinenkof7a42de2014-01-09 11:10:04 +0100105 u8 reg274265[2][3]; /* [CHANNEL][REGISTER] */
106 u8 reg2ca9_bit0;
107 u32 reg_6dc;
108 u32 reg_6e8;
Vladimir Serbinenkoc6f6be02013-11-12 22:32:08 +0100109};
110
111#if !REAL
112#include "raminit_fake.c"
113#else
114
115#include <lib.h> /* Prototypes */
116
117static inline void write_mchbar32(u32 addr, u32 val)
118{
119 MCHBAR32(addr) = val;
120}
121
122static inline void write_mchbar16(u32 addr, u16 val)
123{
124 MCHBAR16(addr) = val;
125}
126
127static inline void write_mchbar8(u32 addr, u8 val)
128{
129 MCHBAR8(addr) = val;
130}
131
132
133static inline u32 read_mchbar32(u32 addr)
134{
135 return MCHBAR32(addr);
136}
137
138static inline u16 read_mchbar16(u32 addr)
139{
140 return MCHBAR16(addr);
141}
142
143static inline u8 read_mchbar8(u32 addr)
144{
145 return MCHBAR8(addr);
146}
147
148static inline u8 read_mchbar8_bypass(u32 addr)
149{
150 return MCHBAR8(addr);
151}
152
153static void clflush(u32 addr)
154{
155 asm volatile ("clflush (%0)"::"r" (addr));
156}
157
158typedef struct _u128 {
159 u64 lo;
160 u64 hi;
161} u128;
162
163static void read128(u32 addr, u64 * out)
164{
165 u128 ret;
166 u128 stor;
167 asm volatile ("movdqu %%xmm0, %0\n"
168 "movdqa (%2), %%xmm0\n"
169 "movdqu %%xmm0, %1\n"
170 "movdqu %0, %%xmm0":"+m" (stor), "=m"(ret):"r"(addr));
171 out[0] = ret.lo;
172 out[1] = ret.hi;
173}
174
175#endif
176
177/* OK */
178static void write_1d0(u32 val, u16 addr, int bits, int flag)
179{
180 write_mchbar32(0x1d0, 0);
181 while (read_mchbar32(0x1d0) & 0x800000) ;
182 write_mchbar32(0x1d4,
183 (val & ((1 << bits) - 1)) | (2 << bits) | (flag <<
184 bits));
185 write_mchbar32(0x1d0, 0x40000000 | addr);
186 while (read_mchbar32(0x1d0) & 0x800000) ;
187}
188
189/* OK */
190static u16 read_1d0(u16 addr, int split)
191{
192 u32 val;
193 write_mchbar32(0x1d0, 0);
194 while (read_mchbar32(0x1d0) & 0x800000) ;
195 write_mchbar32(0x1d0,
196 0x80000000 | (((read_mchbar8(0x246) >> 2) & 3) +
197 0x361 - addr));
198 while (read_mchbar32(0x1d0) & 0x800000) ;
199 val = read_mchbar32(0x1d8);
200 write_1d0(0, 0x33d, 0, 0);
201 write_1d0(0, 0x33d, 0, 0);
202 val &= ((1 << split) - 1);
203 // printk (BIOS_ERR, "R1D0C [%x] => %x\n", addr, val);
204 return val;
205}
206
207static void sfence(void)
208{
209#if REAL
210 asm volatile ("sfence");
211#endif
212}
213
214static inline u16 get_lane_offset(int slot, int rank, int lane)
215{
216 return 0x124 * lane + ((lane & 4) ? 0x23e : 0) + 11 * rank + 22 * slot -
217 0x452 * (lane == 8);
218}
219
220static inline u16 get_timing_register_addr(int lane, int tm, int slot, int rank)
221{
222 const u16 offs[] = { 0x1d, 0xa8, 0xe6, 0x5c };
223 return get_lane_offset(slot, rank, lane) + offs[(tm + 3) % 4];
224}
225
226#if REAL
227static u32 gav_real(int line, u32 in)
228{
229 // printk (BIOS_DEBUG, "%d: GAV: %x\n", line, in);
230 return in;
231}
232
233#define gav(x) gav_real (__LINE__, (x))
234#endif
235struct raminfo {
236 u16 clock_speed_index; /* clock_speed (REAL, not DDR) / 133.(3) - 3 */
237 u16 fsb_frequency; /* in 1.(1)/2 MHz. */
238 u8 is_x16_module[2][2]; /* [CHANNEL][SLOT] */
239 u8 density[2][2]; /* [CHANNEL][SLOT] */
240 u8 populated_ranks[2][2][2]; /* [CHANNEL][SLOT][RANK] */
241 int rank_start[2][2][2];
242 u8 cas_latency;
243 u8 board_lane_delay[9];
244 u8 use_ecc;
245 u8 revision;
246 u8 max_supported_clock_speed_index;
247 u8 uma_enabled;
248 u8 spd[2][2][151]; /* [CHANNEL][SLOT][BYTE] */
249 u8 silicon_revision;
250 u8 populated_ranks_mask[2];
251 u8 max_slots_used_in_channel;
252 u8 mode4030[2];
253 u16 avg4044[2];
254 u16 max4048[2];
255 unsigned total_memory_mb;
256 unsigned interleaved_part_mb;
257 unsigned non_interleaved_part_mb;
258
259 u32 heci_bar;
260 u64 heci_uma_addr;
261 unsigned memory_reserved_for_heci_mb;
262
263 struct ram_training training;
264 u32 last_500_command[2];
265
Vladimir Serbinenkoc6f6be02013-11-12 22:32:08 +0100266 u32 delay46_ps[2];
267 u32 delay54_ps[2];
268 u8 revision_flag_1;
269 u8 some_delay_1_cycle_floor;
270 u8 some_delay_2_halfcycles_ceil;
271 u8 some_delay_3_ps_rounded;
272
273 const struct ram_training *cached_training;
274};
275
276static void
277write_500(struct raminfo *info, int channel, u32 val, u16 addr, int bits,
278 int flag);
279
280/* OK */
281static u16
282read_500(struct raminfo *info, int channel, u16 addr, int split)
283{
284 u32 val;
285 info->last_500_command[channel] = 0x80000000;
286 write_mchbar32(0x500 + (channel << 10), 0);
287 while (read_mchbar32(0x500 + (channel << 10)) & 0x800000) ;
288 write_mchbar32(0x500 + (channel << 10),
289 0x80000000 |
290 (((read_mchbar8(0x246 + (channel << 10)) >> 2) &
291 3) + 0xb88 - addr));
292 while (read_mchbar32(0x500 + (channel << 10)) & 0x800000) ;
293 val = read_mchbar32(0x508 + (channel << 10));
294 return val & ((1 << split) - 1);
295}
296
297/* OK */
298static void
299write_500(struct raminfo *info, int channel, u32 val, u16 addr, int bits,
300 int flag)
301{
302 if (info->last_500_command[channel] == 0x80000000) {
303 info->last_500_command[channel] = 0x40000000;
304 write_500(info, channel, 0, 0xb61, 0, 0);
305 }
306 write_mchbar32(0x500 + (channel << 10), 0);
307 while (read_mchbar32(0x500 + (channel << 10)) & 0x800000) ;
308 write_mchbar32(0x504 + (channel << 10),
309 (val & ((1 << bits) - 1)) | (2 << bits) | (flag <<
310 bits));
311 write_mchbar32(0x500 + (channel << 10), 0x40000000 | addr);
312 while (read_mchbar32(0x500 + (channel << 10)) & 0x800000) ;
313}
314
315static int rw_test(int rank)
316{
317 const u32 mask = 0xf00fc33c;
318 int ok = 0xff;
319 int i;
320 for (i = 0; i < 64; i++)
321 write32((rank << 28) | (i << 2), 0);
322 sfence();
323 for (i = 0; i < 64; i++)
324 gav(read32((rank << 28) | (i << 2)));
325 sfence();
326 for (i = 0; i < 32; i++) {
327 u32 pat = (((mask >> i) & 1) ? 0xffffffff : 0);
328 write32((rank << 28) | (i << 3), pat);
329 write32((rank << 28) | (i << 3) | 4, pat);
330 }
331 sfence();
332 for (i = 0; i < 32; i++) {
333 u8 pat = (((mask >> i) & 1) ? 0xff : 0);
334 int j;
335 u32 val;
336 gav(val = read32((rank << 28) | (i << 3)));
337 for (j = 0; j < 4; j++)
338 if (((val >> (j * 8)) & 0xff) != pat)
339 ok &= ~(1 << j);
340 gav(val = read32((rank << 28) | (i << 3) | 4));
341 for (j = 0; j < 4; j++)
342 if (((val >> (j * 8)) & 0xff) != pat)
343 ok &= ~(16 << j);
344 }
345 sfence();
346 for (i = 0; i < 64; i++)
347 write32((rank << 28) | (i << 2), 0);
348 sfence();
349 for (i = 0; i < 64; i++)
350 gav(read32((rank << 28) | (i << 2)));
351
352 return ok;
353}
354
355static void
356program_timings(struct raminfo *info, u16 base, int channel, int slot, int rank)
357{
358 int lane;
359 for (lane = 0; lane < 8; lane++) {
360 write_500(info, channel,
361 base +
362 info->training.
363 lane_timings[2][channel][slot][rank][lane],
364 get_timing_register_addr(lane, 2, slot, rank), 9, 0);
365 write_500(info, channel,
366 base +
367 info->training.
368 lane_timings[3][channel][slot][rank][lane],
369 get_timing_register_addr(lane, 3, slot, rank), 9, 0);
370 }
371}
372
373static void write_26c(int channel, u16 si)
374{
375 write_mchbar32(0x26c + (channel << 10), 0x03243f35);
376 write_mchbar32(0x268 + (channel << 10), 0xcfc00000 | (si << 9));
377 write_mchbar16(0x2b9 + (channel << 10), si);
378}
379
380static u32 get_580(int channel, u8 addr)
381{
382 u32 ret;
383 gav(read_1d0(0x142, 3));
384 write_mchbar8(0x5ff, 0x0); /* OK */
385 write_mchbar8(0x5ff, 0x80); /* OK */
386 write_mchbar32(0x580 + (channel << 10), 0x8493c012 | addr);
387 write_mchbar8(0x580 + (channel << 10),
388 read_mchbar8(0x580 + (channel << 10)) | 1);
389 while (!((ret = read_mchbar32(0x580 + (channel << 10))) & 0x10000)) ;
390 write_mchbar8(0x580 + (channel << 10),
391 read_mchbar8(0x580 + (channel << 10)) & ~1);
392 return ret;
393}
394
395const int cached_config = 0;
396
397#define NUM_CHANNELS 2
398#define NUM_SLOTS 2
399#define NUM_RANKS 2
400#define RANK_SHIFT 28
401#define CHANNEL_SHIFT 10
402
403#include "raminit_tables.c"
404
405static void seq9(struct raminfo *info, int channel, int slot, int rank)
406{
407 int i, lane;
408
409 for (i = 0; i < 2; i++)
410 for (lane = 0; lane < 8; lane++)
411 write_500(info, channel,
412 info->training.lane_timings[i +
413 1][channel][slot]
414 [rank][lane], get_timing_register_addr(lane,
415 i + 1,
416 slot,
417 rank),
418 9, 0);
419
420 write_1d0(1, 0x103, 6, 1);
421 for (lane = 0; lane < 8; lane++)
422 write_500(info, channel,
423 info->training.
424 lane_timings[0][channel][slot][rank][lane],
425 get_timing_register_addr(lane, 0, slot, rank), 9, 0);
426
427 for (i = 0; i < 2; i++) {
428 for (lane = 0; lane < 8; lane++)
429 write_500(info, channel,
430 info->training.lane_timings[i +
431 1][channel][slot]
432 [rank][lane], get_timing_register_addr(lane,
433 i + 1,
434 slot,
435 rank),
436 9, 0);
437 gav(get_580(channel, ((i + 1) << 2) | (rank << 5)));
438 }
439
440 gav(read_1d0(0x142, 3)); // = 0x10408118
441 write_mchbar8(0x5ff, 0x0); /* OK */
442 write_mchbar8(0x5ff, 0x80); /* OK */
443 write_1d0(0x2, 0x142, 3, 1);
444 for (lane = 0; lane < 8; lane++) {
445 // printk (BIOS_ERR, "before: %x\n", info->training.lane_timings[2][channel][slot][rank][lane]);
446 info->training.lane_timings[2][channel][slot][rank][lane] =
447 read_500(info, channel,
448 get_timing_register_addr(lane, 2, slot, rank), 9);
449 //printk (BIOS_ERR, "after: %x\n", info->training.lane_timings[2][channel][slot][rank][lane]);
450 info->training.lane_timings[3][channel][slot][rank][lane] =
451 info->training.lane_timings[2][channel][slot][rank][lane] +
452 0x20;
453 }
454}
455
456static int count_ranks_in_channel(struct raminfo *info, int channel)
457{
458 int slot, rank;
459 int res = 0;
460 for (slot = 0; slot < NUM_SLOTS; slot++)
461 for (rank = 0; rank < NUM_SLOTS; rank++)
462 res += info->populated_ranks[channel][slot][rank];
463 return res;
464}
465
466static void
467config_rank(struct raminfo *info, int s3resume, int channel, int slot, int rank)
468{
469 int add;
470
471 write_1d0(0, 0x178, 7, 1);
472 seq9(info, channel, slot, rank);
473 program_timings(info, 0x80, channel, slot, rank);
474
475 if (channel == 0)
476 add = count_ranks_in_channel(info, 1);
477 else
478 add = 0;
479 if (!s3resume)
480 gav(rw_test(rank + add));
481 program_timings(info, 0x00, channel, slot, rank);
482 if (!s3resume)
483 gav(rw_test(rank + add));
484 if (!s3resume)
485 gav(rw_test(rank + add));
486 write_1d0(0, 0x142, 3, 1);
487 write_1d0(0, 0x103, 6, 1);
488
489 gav(get_580(channel, 0xc | (rank << 5)));
490 gav(read_1d0(0x142, 3));
491
492 write_mchbar8(0x5ff, 0x0); /* OK */
493 write_mchbar8(0x5ff, 0x80); /* OK */
494}
495
496static void set_4cf(struct raminfo *info, int channel, u8 val)
497{
498 gav(read_500(info, channel, 0x4cf, 4)); // = 0xc2300cf9
499 write_500(info, channel, val, 0x4cf, 4, 1);
500 gav(read_500(info, channel, 0x659, 4)); // = 0x80300839
501 write_500(info, channel, val, 0x659, 4, 1);
502 gav(read_500(info, channel, 0x697, 4)); // = 0x80300839
503 write_500(info, channel, val, 0x697, 4, 1);
504}
505
506static void set_334(int zero)
507{
508 int j, k, channel;
509 const u32 val3[] = { 0x2a2b2a2b, 0x26272627, 0x2e2f2e2f, 0x2a2b };
510 u32 vd8[2][16];
511
512 for (channel = 0; channel < NUM_CHANNELS; channel++) {
513 for (j = 0; j < 4; j++) {
514 u32 a = (j == 1) ? 0x29292929 : 0x31313131;
515 u32 lmask = (j == 3) ? 0xffff : 0xffffffff;
516 u16 c;
517 if ((j == 0 || j == 3) && zero)
518 c = 0;
519 else if (j == 3)
520 c = 0x5f;
521 else
522 c = 0x5f5f;
523
524 for (k = 0; k < 2; k++) {
525 write_mchbar32(0x138 + 8 * k,
526 (channel << 26) | (j << 24));
527 gav(vd8[1][(channel << 3) | (j << 1) | k] =
528 read_mchbar32(0x138 + 8 * k));
529 gav(vd8[0][(channel << 3) | (j << 1) | k] =
530 read_mchbar32(0x13c + 8 * k));
531 }
532
533 write_mchbar32(0x334 + (channel << 10) + (j * 0x44),
534 zero ? 0 : val3[j]);
535 write_mchbar32(0x32c + (channel << 10) + (j * 0x44),
536 zero ? 0 : (0x18191819 & lmask));
537 write_mchbar16(0x34a + (channel << 10) + (j * 0x44), c);
538 write_mchbar32(0x33c + (channel << 10) + (j * 0x44),
539 zero ? 0 : (a & lmask));
540 write_mchbar32(0x344 + (channel << 10) + (j * 0x44),
541 zero ? 0 : (a & lmask));
542 }
543 }
544
545 write_mchbar32(0x130, read_mchbar32(0x130) | 1); /* OK */
546 while (read_mchbar8(0x130) & 1) ; /* OK */
547}
548
549static void rmw_1d0(u16 addr, u32 and, u32 or, int split, int flag)
550{
551 u32 v;
552 v = read_1d0(addr, split);
553 write_1d0((v & and) | or, addr, split, flag);
554}
555
556static int find_highest_bit_set(u16 val)
557{
558 int i;
559 for (i = 15; i >= 0; i--)
560 if (val & (1 << i))
561 return i;
562 return -1;
563}
564
565static int find_lowest_bit_set32(u32 val)
566{
567 int i;
568 for (i = 0; i < 32; i++)
569 if (val & (1 << i))
570 return i;
571 return -1;
572}
573
574#define max(a,b) (((a) > (b)) ? (a) : (b))
575#define min(a,b) (((a) < (b)) ? (a) : (b))
576
577enum {
578 DEVICE_TYPE = 2,
579 MODULE_TYPE = 3,
580 DENSITY = 4,
581 RANKS_AND_DQ = 7,
582 MEMORY_BUS_WIDTH = 8,
583 TIMEBASE_DIVIDEND = 10,
584 TIMEBASE_DIVISOR = 11,
585 CYCLETIME = 12,
586
587 CAS_LATENCIES_LSB = 14,
588 CAS_LATENCIES_MSB = 15,
589 CAS_LATENCY_TIME = 16,
590 THERMAL_AND_REFRESH = 31,
591 REFERENCE_RAW_CARD_USED = 62,
592 RANK1_ADDRESS_MAPPING = 63
593};
594
595static void calculate_timings(struct raminfo *info)
596{
597 unsigned cycletime;
598 unsigned cas_latency_time;
599 unsigned supported_cas_latencies;
600 unsigned channel, slot;
601 unsigned clock_speed_index;
602 unsigned min_cas_latency;
603 unsigned cas_latency;
604 unsigned max_clock_index;
605
606 /* Find common CAS latency */
607 supported_cas_latencies = 0x3fe;
608 for (channel = 0; channel < NUM_CHANNELS; channel++)
609 for (slot = 0; slot < NUM_SLOTS; slot++)
610 if (info->populated_ranks[channel][slot][0])
611 supported_cas_latencies &=
612 2 *
613 (info->
614 spd[channel][slot][CAS_LATENCIES_LSB] |
615 (info->
616 spd[channel][slot][CAS_LATENCIES_MSB] <<
617 8));
618
619 max_clock_index = min(3, info->max_supported_clock_speed_index);
620
621 cycletime = min_cycletime[max_clock_index];
622 cas_latency_time = min_cas_latency_time[max_clock_index];
623
624 for (channel = 0; channel < NUM_CHANNELS; channel++)
625 for (slot = 0; slot < NUM_SLOTS; slot++)
626 if (info->populated_ranks[channel][slot][0]) {
627 unsigned timebase;
628 timebase =
629 1000 *
630 info->
631 spd[channel][slot][TIMEBASE_DIVIDEND] /
632 info->spd[channel][slot][TIMEBASE_DIVISOR];
633 cycletime =
634 max(cycletime,
635 timebase *
636 info->spd[channel][slot][CYCLETIME]);
637 cas_latency_time =
638 max(cas_latency_time,
639 timebase *
640 info->
641 spd[channel][slot][CAS_LATENCY_TIME]);
642 }
643 for (clock_speed_index = 0; clock_speed_index < 3; clock_speed_index++) {
644 if (cycletime == min_cycletime[clock_speed_index])
645 break;
646 if (cycletime > min_cycletime[clock_speed_index]) {
647 clock_speed_index--;
648 cycletime = min_cycletime[clock_speed_index];
649 break;
650 }
651 }
652 min_cas_latency = (cas_latency_time + cycletime - 1) / cycletime;
653 cas_latency = 0;
654 while (supported_cas_latencies) {
655 cas_latency = find_highest_bit_set(supported_cas_latencies) + 3;
656 if (cas_latency <= min_cas_latency)
657 break;
658 supported_cas_latencies &=
659 ~(1 << find_highest_bit_set(supported_cas_latencies));
660 }
661
662 if (cas_latency != min_cas_latency && clock_speed_index)
663 clock_speed_index--;
664
665 if (cas_latency * min_cycletime[clock_speed_index] > 20000)
666 die("Couldn't configure DRAM");
667 info->clock_speed_index = clock_speed_index;
668 info->cas_latency = cas_latency;
669}
670
671static void program_base_timings(struct raminfo *info)
672{
673 unsigned channel;
674 unsigned slot, rank, lane;
675 unsigned extended_silicon_revision;
676 int i;
677
678 extended_silicon_revision = info->silicon_revision;
679 if (info->silicon_revision == 0)
680 for (channel = 0; channel < NUM_CHANNELS; channel++)
681 for (slot = 0; slot < NUM_SLOTS; slot++)
682 if ((info->
683 spd[channel][slot][MODULE_TYPE] & 0xF) ==
684 3)
685 extended_silicon_revision = 4;
686
687 for (channel = 0; channel < NUM_CHANNELS; channel++) {
688 for (slot = 0; slot < NUM_SLOTS; slot++)
689 for (rank = 0; rank < NUM_SLOTS; rank++) {
690 int card_timing_2;
691 if (!info->populated_ranks[channel][slot][rank])
692 continue;
693
694 for (lane = 0; lane < 9; lane++) {
695 int tm_reg;
696 int card_timing;
697
698 card_timing = 0;
699 if ((info->
700 spd[channel][slot][MODULE_TYPE] &
701 0xF) == 3) {
702 int reference_card;
703 reference_card =
704 info->
705 spd[channel][slot]
706 [REFERENCE_RAW_CARD_USED] &
707 0x1f;
708 if (reference_card == 3)
709 card_timing =
710 u16_ffd1188[0][lane]
711 [info->
712 clock_speed_index];
713 if (reference_card == 5)
714 card_timing =
715 u16_ffd1188[1][lane]
716 [info->
717 clock_speed_index];
718 }
719
720 info->training.
721 lane_timings[0][channel][slot][rank]
722 [lane] =
723 u8_FFFD1218[info->
724 clock_speed_index];
725 info->training.
726 lane_timings[1][channel][slot][rank]
727 [lane] = 256;
728
729 for (tm_reg = 2; tm_reg < 4; tm_reg++)
730 info->training.
731 lane_timings[tm_reg]
732 [channel][slot][rank][lane]
733 =
734 u8_FFFD1240[channel]
735 [extended_silicon_revision]
736 [lane][2 * slot +
737 rank][info->
738 clock_speed_index]
739 + info->max4048[channel]
740 +
741 u8_FFFD0C78[channel]
742 [extended_silicon_revision]
743 [info->
744 mode4030[channel]][slot]
745 [rank][info->
746 clock_speed_index]
747 + card_timing;
748 for (tm_reg = 0; tm_reg < 4; tm_reg++)
749 write_500(info, channel,
750 info->training.
751 lane_timings[tm_reg]
752 [channel][slot][rank]
753 [lane],
754 get_timing_register_addr
755 (lane, tm_reg, slot,
756 rank), 9, 0);
757 }
758
759 card_timing_2 = 0;
760 if (!(extended_silicon_revision != 4
761 || (info->
762 populated_ranks_mask[channel] & 5) ==
763 5)) {
764 if ((info->
765 spd[channel][slot]
766 [REFERENCE_RAW_CARD_USED] & 0x1F)
767 == 3)
768 card_timing_2 =
769 u16_FFFE0EB8[0][info->
770 clock_speed_index];
771 if ((info->
772 spd[channel][slot]
773 [REFERENCE_RAW_CARD_USED] & 0x1F)
774 == 5)
775 card_timing_2 =
776 u16_FFFE0EB8[1][info->
777 clock_speed_index];
778 }
779
780 for (i = 0; i < 3; i++)
781 write_500(info, channel,
782 (card_timing_2 +
783 info->max4048[channel]
784 +
785 u8_FFFD0EF8[channel]
786 [extended_silicon_revision]
787 [info->
788 mode4030[channel]][info->
789 clock_speed_index]),
790 u16_fffd0c50[i][slot][rank],
791 8, 1);
792 write_500(info, channel,
793 (info->max4048[channel] +
794 u8_FFFD0C78[channel]
795 [extended_silicon_revision][info->
796 mode4030
797 [channel]]
798 [slot][rank][info->
799 clock_speed_index]),
800 u16_fffd0c70[slot][rank], 7, 1);
801 }
802 if (!info->populated_ranks_mask[channel])
803 continue;
804 for (i = 0; i < 3; i++)
805 write_500(info, channel,
806 (info->max4048[channel] +
807 info->avg4044[channel]
808 +
809 u8_FFFD17E0[channel]
810 [extended_silicon_revision][info->
811 mode4030
812 [channel]][info->
813 clock_speed_index]),
814 u16_fffd0c68[i], 8, 1);
815 }
816}
817
818static unsigned int fsbcycle_ps(struct raminfo *info)
819{
820 return 900000 / info->fsb_frequency;
821}
822
823/* The time of DDR transfer in ps. */
824static unsigned int halfcycle_ps(struct raminfo *info)
825{
826 return 3750 / (info->clock_speed_index + 3);
827}
828
829/* The time of clock cycle in ps. */
830static unsigned int cycle_ps(struct raminfo *info)
831{
832 return 2 * halfcycle_ps(info);
833}
834
835/* Frequency in 1.(1)=10/9 MHz units. */
836static unsigned frequency_11(struct raminfo *info)
837{
838 return (info->clock_speed_index + 3) * 120;
839}
840
841/* Frequency in 0.1 MHz units. */
842static unsigned frequency_01(struct raminfo *info)
843{
844 return 100 * frequency_11(info) / 9;
845}
846
847static unsigned ps_to_halfcycles(struct raminfo *info, unsigned int ps)
848{
849 return (frequency_11(info) * 2) * ps / 900000;
850}
851
852static unsigned ns_to_cycles(struct raminfo *info, unsigned int ns)
853{
854 return (frequency_11(info)) * ns / 900;
855}
856
857static void compute_derived_timings(struct raminfo *info)
858{
859 unsigned channel, slot, rank;
860 int extended_silicon_revision;
861 int some_delay_1_ps;
862 int some_delay_2_ps;
863 int some_delay_2_halfcycles_ceil;
864 int some_delay_2_halfcycles_floor;
865 int some_delay_3_ps;
866 int some_delay_3_halfcycles;
867 int some_delay_3_ps_rounded;
868 int some_delay_1_cycle_ceil;
869 int some_delay_1_cycle_floor;
870
871 some_delay_3_halfcycles = 0;
872 some_delay_3_ps_rounded = 0;
873 extended_silicon_revision = info->silicon_revision;
874 if (!info->silicon_revision)
875 for (channel = 0; channel < NUM_CHANNELS; channel++)
876 for (slot = 0; slot < NUM_SLOTS; slot++)
877 if ((info->
878 spd[channel][slot][MODULE_TYPE] & 0xF) ==
879 3)
880 extended_silicon_revision = 4;
881 if (info->board_lane_delay[7] < 5)
882 info->board_lane_delay[7] = 5;
883 info->revision_flag_1 = 2;
884 if (info->silicon_revision == 2 || info->silicon_revision == 3)
885 info->revision_flag_1 = 0;
886 if (info->revision < 16)
887 info->revision_flag_1 = 0;
888
889 if (info->revision < 8)
890 info->revision_flag_1 = 0;
891 if (info->revision >= 8 && (info->silicon_revision == 0
892 || info->silicon_revision == 1))
893 some_delay_2_ps = 735;
894 else
895 some_delay_2_ps = 750;
896
897 if (info->revision >= 0x10 && (info->silicon_revision == 0
898 || info->silicon_revision == 1))
899 some_delay_1_ps = 3929;
900 else
901 some_delay_1_ps = 3490;
902
903 some_delay_1_cycle_floor = some_delay_1_ps / cycle_ps(info);
904 some_delay_1_cycle_ceil = some_delay_1_ps / cycle_ps(info);
905 if (some_delay_1_ps % cycle_ps(info))
906 some_delay_1_cycle_ceil++;
907 else
908 some_delay_1_cycle_floor--;
909 info->some_delay_1_cycle_floor = some_delay_1_cycle_floor;
910 if (info->revision_flag_1)
911 some_delay_2_ps = halfcycle_ps(info) >> 6;
912 some_delay_2_ps +=
913 max(some_delay_1_ps - 30,
914 2 * halfcycle_ps(info) * (some_delay_1_cycle_ceil - 1) + 1000) +
915 375;
916 some_delay_3_ps =
917 halfcycle_ps(info) - some_delay_2_ps % halfcycle_ps(info);
918 if (info->revision_flag_1) {
919 if (some_delay_3_ps < 150)
920 some_delay_3_halfcycles = 0;
921 else
922 some_delay_3_halfcycles =
923 (some_delay_3_ps << 6) / halfcycle_ps(info);
924 some_delay_3_ps_rounded =
925 halfcycle_ps(info) * some_delay_3_halfcycles >> 6;
926 }
927 some_delay_2_halfcycles_ceil =
928 (some_delay_2_ps + halfcycle_ps(info) - 1) / halfcycle_ps(info) -
929 2 * (some_delay_1_cycle_ceil - 1);
930 if (info->revision_flag_1 && some_delay_3_ps < 150)
931 some_delay_2_halfcycles_ceil++;
932 some_delay_2_halfcycles_floor = some_delay_2_halfcycles_ceil;
933 if (info->revision < 0x10)
934 some_delay_2_halfcycles_floor =
935 some_delay_2_halfcycles_ceil - 1;
936 if (!info->revision_flag_1)
937 some_delay_2_halfcycles_floor++;
938 info->some_delay_2_halfcycles_ceil = some_delay_2_halfcycles_ceil;
939 info->some_delay_3_ps_rounded = some_delay_3_ps_rounded;
940 if ((info->populated_ranks[0][0][0] && info->populated_ranks[0][1][0])
941 || (info->populated_ranks[1][0][0]
942 && info->populated_ranks[1][1][0]))
943 info->max_slots_used_in_channel = 2;
944 else
945 info->max_slots_used_in_channel = 1;
946 for (channel = 0; channel < 2; channel++)
947 write_mchbar32(0x244 + (channel << 10),
948 ((info->revision < 8) ? 1 : 0x200)
949 | ((2 - info->max_slots_used_in_channel) << 17) |
950 (channel << 21) | (info->
951 some_delay_1_cycle_floor <<
952 18) | 0x9510);
953 if (info->max_slots_used_in_channel == 1) {
954 info->mode4030[0] = (count_ranks_in_channel(info, 0) == 2);
955 info->mode4030[1] = (count_ranks_in_channel(info, 1) == 2);
956 } else {
957 info->mode4030[0] = ((count_ranks_in_channel(info, 0) == 1) || (count_ranks_in_channel(info, 0) == 2)) ? 2 : 3; /* 2 if 1 or 2 ranks */
958 info->mode4030[1] = ((count_ranks_in_channel(info, 1) == 1)
959 || (count_ranks_in_channel(info, 1) ==
960 2)) ? 2 : 3;
961 }
962 for (channel = 0; channel < NUM_CHANNELS; channel++) {
963 int max_of_unk;
964 int min_of_unk_2;
965
966 int i, count;
967 int sum;
968
969 if (!info->populated_ranks_mask[channel])
970 continue;
971
972 max_of_unk = 0;
973 min_of_unk_2 = 32767;
974
975 sum = 0;
976 count = 0;
977 for (i = 0; i < 3; i++) {
978 int unk1;
979 if (info->revision < 8)
980 unk1 =
981 u8_FFFD1891[0][channel][info->
982 clock_speed_index]
983 [i];
984 else if (!
985 (info->revision >= 0x10
986 || info->revision_flag_1))
987 unk1 =
988 u8_FFFD1891[1][channel][info->
989 clock_speed_index]
990 [i];
991 else
992 unk1 = 0;
993 for (slot = 0; slot < NUM_SLOTS; slot++)
994 for (rank = 0; rank < NUM_RANKS; rank++) {
995 int a = 0;
996 int b = 0;
997
998 if (!info->
999 populated_ranks[channel][slot]
1000 [rank])
1001 continue;
1002 if (extended_silicon_revision == 4
1003 && (info->
1004 populated_ranks_mask[channel] &
1005 5) != 5) {
1006 if ((info->
1007 spd[channel][slot]
1008 [REFERENCE_RAW_CARD_USED] &
1009 0x1F) == 3) {
1010 a = u16_ffd1178[0]
1011 [info->
1012 clock_speed_index];
1013 b = u16_fe0eb8[0][info->
1014 clock_speed_index];
1015 } else
1016 if ((info->
1017 spd[channel][slot]
1018 [REFERENCE_RAW_CARD_USED]
1019 & 0x1F) == 5) {
1020 a = u16_ffd1178[1]
1021 [info->
1022 clock_speed_index];
1023 b = u16_fe0eb8[1][info->
1024 clock_speed_index];
1025 }
1026 }
1027 min_of_unk_2 = min(min_of_unk_2, a);
1028 min_of_unk_2 = min(min_of_unk_2, b);
1029 if (rank == 0) {
1030 sum += a;
1031 count++;
1032 }
1033 {
1034 int t;
1035 t = b +
1036 u8_FFFD0EF8[channel]
1037 [extended_silicon_revision]
1038 [info->
1039 mode4030[channel]][info->
1040 clock_speed_index];
1041 if (unk1 >= t)
1042 max_of_unk =
1043 max(max_of_unk,
1044 unk1 - t);
1045 }
1046 }
1047 {
1048 int t =
1049 u8_FFFD17E0[channel]
1050 [extended_silicon_revision][info->
1051 mode4030
1052 [channel]]
1053 [info->clock_speed_index] + min_of_unk_2;
1054 if (unk1 >= t)
1055 max_of_unk = max(max_of_unk, unk1 - t);
1056 }
1057 }
1058
1059 info->avg4044[channel] = sum / count;
1060 info->max4048[channel] = max_of_unk;
1061 }
1062}
1063
1064static void jedec_read(struct raminfo *info,
1065 int channel, int slot, int rank,
1066 int total_rank, u8 addr3, unsigned int value)
1067{
1068 /* Handle mirrored mapping. */
1069 if ((rank & 1) && (info->spd[channel][slot][RANK1_ADDRESS_MAPPING] & 1))
1070 addr3 =
1071 (addr3 & 0xCF) | ((addr3 & 0x10) << 1) | ((addr3 >> 1) &
1072 0x10);
1073 write_mchbar8(0x271, addr3 | (read_mchbar8(0x271) & 0xC1));
1074 write_mchbar8(0x671, addr3 | (read_mchbar8(0x671) & 0xC1));
1075
1076 /* Handle mirrored mapping. */
1077 if ((rank & 1) && (info->spd[channel][slot][RANK1_ADDRESS_MAPPING] & 1))
1078 value =
1079 (value & ~0x1f8) | ((value >> 1) & 0xa8) | ((value & 0xa8)
1080 << 1);
1081
1082 read32((value << 3) | (total_rank << 28));
1083
1084 write_mchbar8(0x271, (read_mchbar8(0x271) & 0xC3) | 2);
1085 write_mchbar8(0x671, (read_mchbar8(0x671) & 0xC3) | 2);
1086
1087 read32(total_rank << 28);
1088}
1089
1090enum {
1091 MR1_RZQ12 = 512,
1092 MR1_RZQ2 = 64,
1093 MR1_RZQ4 = 4,
1094 MR1_ODS34OHM = 2
1095};
1096
1097enum {
1098 MR0_BT_INTERLEAVED = 8,
1099 MR0_DLL_RESET_ON = 256
1100};
1101
1102enum {
1103 MR2_RTT_WR_DISABLED = 0,
1104 MR2_RZQ2 = 1 << 10
1105};
1106
1107static void jedec_init(struct raminfo *info)
1108{
1109 int write_recovery;
1110 int channel, slot, rank;
1111 int total_rank;
1112 int dll_on;
1113 int self_refresh_temperature;
1114 int auto_self_refresh;
1115
1116 auto_self_refresh = 1;
1117 self_refresh_temperature = 1;
1118 if (info->board_lane_delay[3] <= 10) {
1119 if (info->board_lane_delay[3] <= 8)
1120 write_recovery = info->board_lane_delay[3] - 4;
1121 else
1122 write_recovery = 5;
1123 } else {
1124 write_recovery = 6;
1125 }
1126 FOR_POPULATED_RANKS {
1127 auto_self_refresh &=
1128 (info->spd[channel][slot][THERMAL_AND_REFRESH] >> 2) & 1;
1129 self_refresh_temperature &=
1130 info->spd[channel][slot][THERMAL_AND_REFRESH] & 1;
1131 }
1132 if (auto_self_refresh == 1)
1133 self_refresh_temperature = 0;
1134
1135 dll_on = ((info->silicon_revision != 2 && info->silicon_revision != 3)
1136 || (info->populated_ranks[0][0][0]
1137 && info->populated_ranks[0][1][0])
1138 || (info->populated_ranks[1][0][0]
1139 && info->populated_ranks[1][1][0]));
1140
1141 total_rank = 0;
1142
1143 for (channel = NUM_CHANNELS - 1; channel >= 0; channel--) {
1144 int rtt, rtt_wr = MR2_RTT_WR_DISABLED;
1145 int rzq_reg58e;
1146
1147 if (info->silicon_revision == 2 || info->silicon_revision == 3) {
1148 rzq_reg58e = 64;
1149 rtt = MR1_RZQ2;
1150 if (info->clock_speed_index != 0) {
1151 rzq_reg58e = 4;
1152 if (info->populated_ranks_mask[channel] == 3)
1153 rtt = MR1_RZQ4;
1154 }
1155 } else {
1156 if ((info->populated_ranks_mask[channel] & 5) == 5) {
1157 rtt = MR1_RZQ12;
1158 rzq_reg58e = 64;
1159 rtt_wr = MR2_RZQ2;
1160 } else {
1161 rzq_reg58e = 4;
1162 rtt = MR1_RZQ4;
1163 }
1164 }
1165
1166 write_mchbar16(0x588 + (channel << 10), 0x0);
1167 write_mchbar16(0x58a + (channel << 10), 0x4);
1168 write_mchbar16(0x58c + (channel << 10), rtt | MR1_ODS34OHM);
1169 write_mchbar16(0x58e + (channel << 10), rzq_reg58e | 0x82);
1170 write_mchbar16(0x590 + (channel << 10), 0x1282);
1171
1172 for (slot = 0; slot < NUM_SLOTS; slot++)
1173 for (rank = 0; rank < NUM_RANKS; rank++)
1174 if (info->populated_ranks[channel][slot][rank]) {
1175 jedec_read(info, channel, slot, rank,
1176 total_rank, 0x28,
1177 rtt_wr | (info->
1178 clock_speed_index
1179 << 3)
1180 | (auto_self_refresh << 6) |
1181 (self_refresh_temperature <<
1182 7));
1183 jedec_read(info, channel, slot, rank,
1184 total_rank, 0x38, 0);
1185 jedec_read(info, channel, slot, rank,
1186 total_rank, 0x18,
1187 rtt | MR1_ODS34OHM);
1188 jedec_read(info, channel, slot, rank,
1189 total_rank, 6,
1190 (dll_on << 12) |
1191 (write_recovery << 9)
1192 | ((info->cas_latency - 4) <<
1193 4) | MR0_BT_INTERLEAVED |
1194 MR0_DLL_RESET_ON);
1195 total_rank++;
1196 }
1197 }
1198}
1199
1200static void program_modules_memory_map(struct raminfo *info, int pre_jedec)
1201{
1202 unsigned channel, slot, rank;
1203 unsigned int total_mb[2] = { 0, 0 }; /* total memory per channel in MB */
1204 unsigned int channel_0_non_interleaved;
1205
1206 FOR_ALL_RANKS {
1207 if (info->populated_ranks[channel][slot][rank]) {
1208 total_mb[channel] +=
1209 pre_jedec ? 256 : (256 << info->
1210 density[channel][slot] >> info->
1211 is_x16_module[channel][slot]);
1212 write_mchbar8(0x208 + rank + 2 * slot + (channel << 10),
1213 (pre_jedec ? (1 | ((1 + 1) << 1))
1214 : (info->
1215 is_x16_module[channel][slot] |
1216 ((info->density[channel][slot] +
1217 1) << 1))) | 0x80);
1218 }
1219 write_mchbar16(0x200 + (channel << 10) + 4 * slot + 2 * rank,
1220 total_mb[channel] >> 6);
1221 }
1222
1223 info->total_memory_mb = total_mb[0] + total_mb[1];
1224
1225 info->interleaved_part_mb =
1226 pre_jedec ? 0 : 2 * min(total_mb[0], total_mb[1]);
1227 info->non_interleaved_part_mb =
1228 total_mb[0] + total_mb[1] - info->interleaved_part_mb;
1229 channel_0_non_interleaved = total_mb[0] - info->interleaved_part_mb / 2;
1230 write_mchbar32(0x100,
1231 channel_0_non_interleaved | (info->
1232 non_interleaved_part_mb <<
1233 16));
1234 if (!pre_jedec)
1235 write_mchbar16(0x104, info->interleaved_part_mb);
1236}
1237
1238static void program_board_delay(struct raminfo *info)
1239{
1240 int cas_latency_shift;
1241 int some_delay_ns;
1242 int some_delay_3_half_cycles;
1243
1244 unsigned channel, i;
1245 int high_multiplier;
1246 int lane_3_delay;
1247 int cas_latency_derived;
1248
1249 high_multiplier = 0;
1250 some_delay_ns = 200;
1251 some_delay_3_half_cycles = 4;
1252 cas_latency_shift = info->silicon_revision == 0
1253 || info->silicon_revision == 1 ? 1 : 0;
1254 if (info->revision < 8) {
1255 some_delay_ns = 600;
1256 cas_latency_shift = 0;
1257 }
1258 {
1259 int speed_bit;
1260 speed_bit =
1261 ((info->clock_speed_index > 1
1262 || (info->silicon_revision != 2
1263 && info->silicon_revision != 3))) ^ (info->revision >=
1264 0x10);
1265 write_500(info, 0, speed_bit | ((!info->use_ecc) << 1), 0x60e,
1266 3, 1);
1267 write_500(info, 1, speed_bit | ((!info->use_ecc) << 1), 0x60e,
1268 3, 1);
1269 if (info->revision >= 0x10 && info->clock_speed_index <= 1
1270 && (info->silicon_revision == 2
1271 || info->silicon_revision == 3))
1272 rmw_1d0(0x116, 5, 2, 4, 1);
1273 }
1274 write_mchbar32(0x120,
1275 (1 << (info->max_slots_used_in_channel + 28)) |
1276 0x188e7f9f);
1277
1278 write_mchbar8(0x124,
1279 info->board_lane_delay[4] +
1280 ((frequency_01(info) + 999) / 1000));
1281 write_mchbar16(0x125, 0x1360);
1282 write_mchbar8(0x127, 0x40);
1283 if (info->fsb_frequency < frequency_11(info) / 2) {
1284 unsigned some_delay_2_half_cycles;
1285 high_multiplier = 1;
1286 some_delay_2_half_cycles = ps_to_halfcycles(info,
1287 ((3 *
1288 fsbcycle_ps(info))
1289 >> 1) +
1290 (halfcycle_ps(info)
1291 *
1292 reg178_min[info->
1293 clock_speed_index]
1294 >> 6)
1295 +
1296 4 *
1297 halfcycle_ps(info)
1298 + 2230);
1299 some_delay_3_half_cycles =
1300 min((some_delay_2_half_cycles +
1301 (frequency_11(info) * 2) * (28 -
1302 some_delay_2_half_cycles) /
1303 (frequency_11(info) * 2 -
1304 4 * (info->fsb_frequency))) >> 3, 7);
1305 }
1306 if (read_mchbar8(0x2ca9) & 1)
1307 some_delay_3_half_cycles = 3;
1308 for (channel = 0; channel < NUM_CHANNELS; channel++) {
1309 write_mchbar32(0x220 + (channel << 10),
1310 read_mchbar32(0x220 +
1311 (channel << 10)) | 0x18001117);
1312 write_mchbar32(0x224 + (channel << 10),
1313 (info->max_slots_used_in_channel - 1)
1314 |
1315 ((info->cas_latency - 5 -
1316 info->clock_speed_index) << 21)
1317 |
1318 ((info->max_slots_used_in_channel +
1319 info->cas_latency - cas_latency_shift -
1320 4) << 16)
1321 | ((info->cas_latency - cas_latency_shift - 4) <<
1322 26)
1323 |
1324 ((info->cas_latency - info->clock_speed_index +
1325 info->max_slots_used_in_channel - 6) << 8));
1326 write_mchbar32(0x228 + (channel << 10),
1327 info->max_slots_used_in_channel);
1328 write_mchbar8(0x239 + (channel << 10), 32);
1329 write_mchbar32(0x248 + (channel << 10),
1330 (high_multiplier << 24) |
1331 (some_delay_3_half_cycles << 25) | 0x840000);
1332 write_mchbar32(0x278 + (channel << 10), 0xc362042);
1333 write_mchbar32(0x27c + (channel << 10), 0x8b000062);
1334 write_mchbar32(0x24c + (channel << 10),
1335 ((! !info->
1336 clock_speed_index) << 17) | (((2 +
1337 info->
1338 clock_speed_index
1339 -
1340 (! !info->
1341 clock_speed_index)))
1342 << 12) | 0x10200);
1343
1344 write_mchbar8(0x267 + (channel << 10), 0x4);
1345 write_mchbar16(0x272 + (channel << 10), 0x155);
1346 write_mchbar32(0x2bc + (channel << 10),
1347 (read_mchbar32(0x2bc + (channel << 10)) &
1348 0xFF000000)
1349 | 0x707070);
1350
1351 write_500(info, channel,
1352 ((!info->populated_ranks[channel][1][1])
1353 | (!info->populated_ranks[channel][1][0] << 1)
1354 | (!info->populated_ranks[channel][0][1] << 2)
1355 | (!info->populated_ranks[channel][0][0] << 3)),
1356 0x4c9, 4, 1);
1357 }
1358
1359 write_mchbar8(0x2c4, ((1 + (info->clock_speed_index != 0)) << 6) | 0xC);
1360 {
1361 u8 freq_divisor = 2;
1362 if (info->fsb_frequency == frequency_11(info))
1363 freq_divisor = 3;
1364 else if (2 * info->fsb_frequency < 3 * (frequency_11(info) / 2))
1365 freq_divisor = 1;
1366 else
1367 freq_divisor = 2;
1368 write_mchbar32(0x2c0, (freq_divisor << 11) | 0x6009c400);
1369 }
1370
1371 if (info->board_lane_delay[3] <= 10) {
1372 if (info->board_lane_delay[3] <= 8)
1373 lane_3_delay = info->board_lane_delay[3];
1374 else
1375 lane_3_delay = 10;
1376 } else {
1377 lane_3_delay = 12;
1378 }
1379 cas_latency_derived = info->cas_latency - info->clock_speed_index + 2;
1380 if (info->clock_speed_index > 1)
1381 cas_latency_derived++;
1382 for (channel = 0; channel < NUM_CHANNELS; channel++) {
1383 write_mchbar32(0x240 + (channel << 10),
1384 ((info->clock_speed_index ==
1385 0) * 0x11000) | 0x1002100 | ((2 +
1386 info->
1387 clock_speed_index)
1388 << 4) | (info->
1389 cas_latency
1390 - 3));
1391 write_500(info, channel, (info->clock_speed_index << 1) | 1,
1392 0x609, 6, 1);
1393 write_500(info, channel,
1394 info->clock_speed_index + 2 * info->cas_latency - 7,
1395 0x601, 6, 1);
1396
1397 write_mchbar32(0x250 + (channel << 10),
1398 ((lane_3_delay + info->clock_speed_index +
1399 9) << 6)
1400 | (info->board_lane_delay[7] << 2) | (info->
1401 board_lane_delay
1402 [4] << 16)
1403 | (info->board_lane_delay[1] << 25) | (info->
1404 board_lane_delay
1405 [1] << 29)
1406 | 1);
1407 write_mchbar32(0x254 + (channel << 10),
1408 (info->
1409 board_lane_delay[1] >> 3) | ((info->
1410 board_lane_delay
1411 [8] +
1412 4 *
1413 info->
1414 use_ecc) << 6) |
1415 0x80 | (info->board_lane_delay[6] << 1) | (info->
1416 board_lane_delay
1417 [2] <<
1418 28) |
1419 (cas_latency_derived << 16) | 0x4700000);
1420 write_mchbar32(0x258 + (channel << 10),
1421 ((info->board_lane_delay[5] +
1422 info->clock_speed_index +
1423 9) << 12) | ((info->clock_speed_index -
1424 info->cas_latency + 12) << 8)
1425 | (info->board_lane_delay[2] << 17) | (info->
1426 board_lane_delay
1427 [4] << 24)
1428 | 0x47);
1429 write_mchbar32(0x25c + (channel << 10),
1430 (info->board_lane_delay[1] << 1) | (info->
1431 board_lane_delay
1432 [0] << 8) |
1433 0x1da50000);
1434 write_mchbar8(0x264 + (channel << 10), 0xff);
1435 write_mchbar8(0x5f8 + (channel << 10),
1436 (cas_latency_shift << 3) | info->use_ecc);
1437 }
1438
1439 program_modules_memory_map(info, 1);
1440
1441 write_mchbar16(0x610,
1442 (min(ns_to_cycles(info, some_delay_ns) / 2, 127) << 9)
1443 | (read_mchbar16(0x610) & 0x1C3) | 0x3C);
1444 write_mchbar16(0x612, read_mchbar16(0x612) | 0x100);
1445 write_mchbar16(0x214, read_mchbar16(0x214) | 0x3E00);
1446 for (i = 0; i < 8; i++) {
1447 pcie_write_config32(PCI_DEV (QUICKPATH_BUS, 0, 1), 0x80 + 4 * i,
1448 (info->total_memory_mb - 64) | !i | 2);
1449 pcie_write_config32(PCI_DEV (QUICKPATH_BUS, 0, 1), 0xc0 + 4 * i, 0);
1450 }
1451}
1452
1453#define BETTER_MEMORY_MAP 0
1454
1455static void program_total_memory_map(struct raminfo *info)
1456{
1457 unsigned int TOM, TOLUD, TOUUD;
1458 unsigned int quickpath_reserved;
1459 unsigned int REMAPbase;
1460 unsigned int uma_base_igd;
1461 unsigned int uma_base_gtt;
1462 int memory_remap;
1463 unsigned int memory_map[8];
1464 int i;
1465 unsigned int current_limit;
1466 unsigned int tseg_base;
1467 int uma_size_igd = 0, uma_size_gtt = 0;
1468
1469 memset(memory_map, 0, sizeof(memory_map));
1470
1471#if REAL
1472 if (info->uma_enabled) {
1473 u16 t = pcie_read_config16(NORTHBRIDGE, D0F0_GGC);
1474 gav(t);
1475 const int uma_sizes_gtt[16] =
1476 { 0, 1, 0, 2, 0, 0, 0, 0, 0, 2, 3, 4, 42, 42, 42, 42 };
1477 /* Igd memory */
1478 const int uma_sizes_igd[16] = {
1479 0, 0, 0, 0, 0, 32, 48, 64, 128, 256, 96, 160, 224, 352,
1480 256, 512
1481 };
1482
1483 uma_size_igd = uma_sizes_igd[(t >> 4) & 0xF];
1484 uma_size_gtt = uma_sizes_gtt[(t >> 8) & 0xF];
1485 }
1486#endif
1487
1488 TOM = info->total_memory_mb;
1489 if (TOM == 4096)
1490 TOM = 4032;
1491 TOUUD = ALIGN_DOWN(TOM - info->memory_reserved_for_heci_mb, 64);
1492 TOLUD = ALIGN_DOWN(min(3072 + ALIGN_UP(uma_size_igd + uma_size_gtt, 64)
1493 , TOUUD), 64);
1494 memory_remap = 0;
1495 if (TOUUD - TOLUD > 64) {
1496 memory_remap = 1;
1497 REMAPbase = max(4096, TOUUD);
1498 TOUUD = TOUUD - TOLUD + 4096;
1499 }
1500 if (TOUUD > 4096)
1501 memory_map[2] = TOUUD | 1;
1502 quickpath_reserved = 0;
1503
1504 {
1505 u32 t;
1506
1507 gav(t = pcie_read_config32(PCI_DEV(QUICKPATH_BUS, 0, 1), 0x68));
1508 if (t & 0x800)
1509 quickpath_reserved =
1510 (1 << find_lowest_bit_set32(t >> 20));
1511 }
1512 if (memory_remap)
1513 TOUUD -= quickpath_reserved;
1514
1515#if !REAL
1516 if (info->uma_enabled) {
1517 u16 t = pcie_read_config16(NORTHBRIDGE, D0F0_GGC);
1518 gav(t);
1519 const int uma_sizes_gtt[16] =
1520 { 0, 1, 0, 2, 0, 0, 0, 0, 0, 2, 3, 4, 42, 42, 42, 42 };
1521 /* Igd memory */
1522 const int uma_sizes_igd[16] = {
1523 0, 0, 0, 0, 0, 32, 48, 64, 128, 256, 96, 160, 224, 352,
1524 256, 512
1525 };
1526
1527 uma_size_igd = uma_sizes_igd[(t >> 4) & 0xF];
1528 uma_size_gtt = uma_sizes_gtt[(t >> 8) & 0xF];
1529 }
1530#endif
1531
1532 uma_base_igd = TOLUD - uma_size_igd;
1533 uma_base_gtt = uma_base_igd - uma_size_gtt;
1534 tseg_base = ALIGN_DOWN(uma_base_gtt, 64) - (CONFIG_SMM_TSEG_SIZE >> 20);
1535 if (!memory_remap)
1536 tseg_base -= quickpath_reserved;
1537 tseg_base = ALIGN_DOWN(tseg_base, 8);
1538
1539 pcie_write_config16(NORTHBRIDGE, D0F0_TOLUD, TOLUD << 4);
1540 pcie_write_config16(NORTHBRIDGE, D0F0_TOM, TOM >> 6);
1541 if (memory_remap) {
1542 pcie_write_config16(NORTHBRIDGE, D0F0_REMAPBASE, REMAPbase >> 6);
1543 pcie_write_config16(NORTHBRIDGE, D0F0_REMAPLIMIT, (TOUUD - 64) >> 6);
1544 }
1545 pcie_write_config16(NORTHBRIDGE, D0F0_TOUUD, TOUUD);
1546
1547 if (info->uma_enabled) {
1548 pcie_write_config32(NORTHBRIDGE, D0F0_IGD_BASE, uma_base_igd << 20);
1549 pcie_write_config32(NORTHBRIDGE, D0F0_GTT_BASE, uma_base_gtt << 20);
1550 }
1551 pcie_write_config32(NORTHBRIDGE, TSEG, tseg_base << 20);
1552
1553 current_limit = 0;
1554 memory_map[0] = ALIGN_DOWN(uma_base_gtt, 64) | 1;
1555 memory_map[1] = 4096;
1556 for (i = 0; i < ARRAY_SIZE(memory_map); i++) {
1557 current_limit = max(current_limit, memory_map[i] & ~1);
1558 pcie_write_config32(PCI_DEV(QUICKPATH_BUS, 0, 1), 4 * i + 0x80,
1559 (memory_map[i] & 1) | ALIGN_DOWN(current_limit -
1560 1, 64) | 2);
1561 pcie_write_config32(PCI_DEV(QUICKPATH_BUS, 0, 1), 4 * i + 0xc0, 0);
1562 }
1563}
1564
1565static void collect_system_info(struct raminfo *info)
1566{
1567 u32 capid0[3];
1568 int i;
1569 unsigned channel;
1570
1571 /* Wait for some bit, maybe TXT clear. */
1572 while (!(read8(0xfed40000) & (1 << 7))) ;
1573
1574 if (!info->heci_bar)
1575 gav(info->heci_bar =
1576 pcie_read_config32(HECIDEV, HECIBAR) & 0xFFFFFFF8);
1577 if (!info->memory_reserved_for_heci_mb) {
1578 /* Wait for ME to be ready */
1579 intel_early_me_init();
1580 info->memory_reserved_for_heci_mb = intel_early_me_uma_size();
1581 }
1582
1583 for (i = 0; i < 3; i++)
1584 gav(capid0[i] =
1585 pcie_read_config32(NORTHBRIDGE, D0F0_CAPID0 | (i << 2)));
1586 gav(info->revision = pcie_read_config8(NORTHBRIDGE, PCI_REVISION_ID));
1587 info->max_supported_clock_speed_index = (~capid0[1] & 7);
1588
1589 if ((capid0[1] >> 11) & 1)
1590 info->uma_enabled = 0;
1591 else
1592 gav(info->uma_enabled =
1593 pcie_read_config8(NORTHBRIDGE, D0F0_DEVEN) & 8);
1594 /* Unrecognised: [0000:fffd3d2d] 37f81.37f82 ! CPUID: eax: 00000001; ecx: 00000e00 => 00020655.00010800.029ae3ff.bfebfbff */
1595 info->silicon_revision = 0;
1596
1597 if (capid0[2] & 2) {
1598 info->silicon_revision = 0;
1599 info->max_supported_clock_speed_index = 2;
1600 for (channel = 0; channel < NUM_CHANNELS; channel++)
1601 if (info->populated_ranks[channel][0][0]
1602 && (info->spd[channel][0][MODULE_TYPE] & 0xf) ==
1603 3) {
1604 info->silicon_revision = 2;
1605 info->max_supported_clock_speed_index = 1;
1606 }
1607 } else {
1608 switch (((capid0[2] >> 18) & 1) + 2 * ((capid0[1] >> 3) & 1)) {
1609 case 1:
1610 case 2:
1611 info->silicon_revision = 3;
1612 break;
1613 case 3:
1614 info->silicon_revision = 0;
1615 break;
1616 case 0:
1617 info->silicon_revision = 2;
1618 break;
1619 }
1620 switch (pcie_read_config16(NORTHBRIDGE, PCI_DEVICE_ID)) {
1621 case 0x40:
1622 info->silicon_revision = 0;
1623 break;
1624 case 0x48:
1625 info->silicon_revision = 1;
1626 break;
1627 }
1628 }
1629}
1630
1631static void write_training_data(struct raminfo *info)
1632{
1633 int tm, channel, slot, rank, lane;
1634 if (info->revision < 8)
1635 return;
1636
1637 for (tm = 0; tm < 4; tm++)
1638 for (channel = 0; channel < NUM_CHANNELS; channel++)
1639 for (slot = 0; slot < NUM_SLOTS; slot++)
1640 for (rank = 0; rank < NUM_RANKS; rank++)
1641 for (lane = 0; lane < 9; lane++)
1642 write_500(info, channel,
1643 info->
1644 cached_training->
1645 lane_timings[tm]
1646 [channel][slot][rank]
1647 [lane],
1648 get_timing_register_addr
1649 (lane, tm, slot,
1650 rank), 9, 0);
1651 write_1d0(info->cached_training->reg_178, 0x178, 7, 1);
1652 write_1d0(info->cached_training->reg_10b, 0x10b, 6, 1);
1653}
1654
1655static void dump_timings(struct raminfo *info)
1656{
1657#if REAL
1658 int channel, slot, rank, lane, i;
1659 printk(BIOS_DEBUG, "Timings:\n");
1660 FOR_POPULATED_RANKS {
1661 printk(BIOS_DEBUG, "channel %d, slot %d, rank %d\n", channel,
1662 slot, rank);
1663 for (lane = 0; lane < 9; lane++) {
1664 printk(BIOS_DEBUG, "lane %d: ", lane);
1665 for (i = 0; i < 4; i++) {
1666 printk(BIOS_DEBUG, "%x (%x) ",
1667 read_500(info, channel,
1668 get_timing_register_addr
1669 (lane, i, slot, rank),
1670 9),
1671 info->training.
1672 lane_timings[i][channel][slot][rank]
1673 [lane]);
1674 }
1675 printk(BIOS_DEBUG, "\n");
1676 }
1677 }
1678 printk(BIOS_DEBUG, "[178] = %x (%x)\n", read_1d0(0x178, 7),
1679 info->training.reg_178);
1680 printk(BIOS_DEBUG, "[10b] = %x (%x)\n", read_1d0(0x10b, 6),
1681 info->training.reg_10b);
1682#endif
1683}
1684
Vladimir Serbinenkof7a42de2014-01-09 11:10:04 +01001685/* Read timings and other registers that need to be restored verbatim and
1686 put them to CBMEM.
1687 */
Vladimir Serbinenkoc6f6be02013-11-12 22:32:08 +01001688static void save_timings(struct raminfo *info)
1689{
1690#if CONFIG_EARLY_CBMEM_INIT
1691 struct ram_training train;
1692 struct mrc_data_container *mrcdata;
1693 int output_len = ALIGN(sizeof(train), 16);
1694 int channel, slot, rank, lane, i;
1695
1696 train = info->training;
1697 FOR_POPULATED_RANKS for (lane = 0; lane < 9; lane++)
1698 for (i = 0; i < 4; i++)
1699 train.lane_timings[i][channel][slot][rank][lane] =
1700 read_500(info, channel,
1701 get_timing_register_addr(lane, i, slot,
1702 rank), 9);
1703 train.reg_178 = read_1d0(0x178, 7);
1704 train.reg_10b = read_1d0(0x10b, 6);
1705
Vladimir Serbinenkof7a42de2014-01-09 11:10:04 +01001706 for (channel = 0; channel < NUM_CHANNELS; channel++) {
1707 u32 reg32;
1708 reg32 = read_mchbar32 ((channel << 10) + 0x274);
1709 train.reg274265[channel][0] = reg32 >> 16;
1710 train.reg274265[channel][1] = reg32 & 0xffff;
1711 train.reg274265[channel][2] = read_mchbar16 ((channel << 10) + 0x265) >> 8;
1712 }
1713 train.reg2ca9_bit0 = read_mchbar8(0x2ca9) & 1;
1714 train.reg_6dc = read_mchbar32 (0x6dc);
1715 train.reg_6e8 = read_mchbar32 (0x6e8);
1716
1717 printk (BIOS_SPEW, "[6dc] = %x\n", train.reg_6dc);
1718 printk (BIOS_SPEW, "[6e8] = %x\n", train.reg_6e8);
1719
Vladimir Serbinenkoc6f6be02013-11-12 22:32:08 +01001720 /* Save the MRC S3 restore data to cbmem */
Kyösti Mälkki2d8520b2014-01-06 17:20:31 +02001721 cbmem_recovery(0);
Vladimir Serbinenkoc6f6be02013-11-12 22:32:08 +01001722 mrcdata = cbmem_add
1723 (CBMEM_ID_MRCDATA, output_len + sizeof(struct mrc_data_container));
1724
1725 printk(BIOS_DEBUG, "Relocate MRC DATA from %p to %p (%u bytes)\n",
1726 &train, mrcdata, output_len);
1727
1728 mrcdata->mrc_signature = MRC_DATA_SIGNATURE;
1729 mrcdata->mrc_data_size = output_len;
1730 mrcdata->reserved = 0;
1731 memcpy(mrcdata->mrc_data, &train, sizeof(train));
1732
1733 /* Zero the unused space in aligned buffer. */
1734 if (output_len > sizeof(train))
1735 memset(mrcdata->mrc_data + sizeof(train), 0,
1736 output_len - sizeof(train));
1737
1738 mrcdata->mrc_checksum = compute_ip_checksum(mrcdata->mrc_data,
1739 mrcdata->mrc_data_size);
1740#endif
1741}
1742
1743#if REAL
1744static const struct ram_training *get_cached_training(void)
1745{
1746 struct mrc_data_container *cont;
1747 cont = find_current_mrc_cache();
1748 if (!cont)
1749 return 0;
1750 return (void *)cont->mrc_data;
1751}
1752#endif
1753
1754/* FIXME: add timeout. */
1755static void wait_heci_ready(void)
1756{
1757 while (!(read32(DEFAULT_HECIBAR | 0xc) & 8)) ; // = 0x8000000c
1758 write32((DEFAULT_HECIBAR | 0x4),
1759 (read32(DEFAULT_HECIBAR | 0x4) & ~0x10) | 0xc);
1760}
1761
1762/* FIXME: add timeout. */
1763static void wait_heci_cb_avail(int len)
1764{
1765 union {
1766 struct mei_csr csr;
1767 u32 raw;
1768 } csr;
1769
1770 while (!(read32(DEFAULT_HECIBAR | 0xc) & 8)) ;
1771
1772 do
1773 csr.raw = read32(DEFAULT_HECIBAR | 0x4);
1774 while (len >
1775 csr.csr.buffer_depth - (csr.csr.buffer_write_ptr -
1776 csr.csr.buffer_read_ptr));
1777}
1778
1779static void send_heci_packet(struct mei_header *head, u32 * payload)
1780{
1781 int len = (head->length + 3) / 4;
1782 int i;
1783
1784 wait_heci_cb_avail(len + 1);
1785
1786 /* FIXME: handle leftovers correctly. */
1787 write32(DEFAULT_HECIBAR | 0, *(u32 *) head);
1788 for (i = 0; i < len - 1; i++)
1789 write32(DEFAULT_HECIBAR | 0, payload[i]);
1790
1791 write32(DEFAULT_HECIBAR | 0, payload[i] & ((1 << (8 * len)) - 1));
1792 write32(DEFAULT_HECIBAR | 0x4, read32(DEFAULT_HECIBAR | 0x4) | 0x4);
1793}
1794
1795static void
1796send_heci_message(u8 * msg, int len, u8 hostaddress, u8 clientaddress)
1797{
1798 struct mei_header head;
1799 int maxlen;
1800
1801 wait_heci_ready();
1802 maxlen = (read32(DEFAULT_HECIBAR | 0x4) >> 24) * 4 - 4;
1803
1804 while (len) {
1805 int cur = len;
1806 if (cur > maxlen) {
1807 cur = maxlen;
1808 head.is_complete = 0;
1809 } else
1810 head.is_complete = 1;
1811 head.length = cur;
1812 head.reserved = 0;
1813 head.client_address = clientaddress;
1814 head.host_address = hostaddress;
1815 send_heci_packet(&head, (u32 *) msg);
1816 len -= cur;
1817 msg += cur;
1818 }
1819}
1820
1821/* FIXME: Add timeout. */
1822static int
1823recv_heci_packet(struct raminfo *info, struct mei_header *head, u32 * packet,
1824 u32 * packet_size)
1825{
1826 union {
1827 struct mei_csr csr;
1828 u32 raw;
1829 } csr;
1830 int i = 0;
1831
1832 write32(DEFAULT_HECIBAR | 0x4, read32(DEFAULT_HECIBAR | 0x4) | 2);
1833 do {
1834 csr.raw = read32(DEFAULT_HECIBAR | 0xc);
1835#if !REAL
1836 if (i++ > 346)
1837 return -1;
1838#endif
1839 }
1840 while (csr.csr.buffer_write_ptr == csr.csr.buffer_read_ptr);
1841 *(u32 *) head = read32(DEFAULT_HECIBAR | 0x8);
1842 if (!head->length) {
1843 write32(DEFAULT_HECIBAR | 0x4,
1844 read32(DEFAULT_HECIBAR | 0x4) | 2);
1845 *packet_size = 0;
1846 return 0;
1847 }
1848 if (head->length + 4 > 4 * csr.csr.buffer_depth
1849 || head->length > *packet_size) {
1850 *packet_size = 0;
1851 return -1;
1852 }
1853
1854 do
1855 csr.raw = read32(DEFAULT_HECIBAR | 0xc);
1856 while ((head->length + 3) >> 2 >
1857 csr.csr.buffer_write_ptr - csr.csr.buffer_read_ptr);
1858
1859 for (i = 0; i < (head->length + 3) >> 2; i++)
1860 packet[i++] = read32(DEFAULT_HECIBAR | 0x8);
1861 *packet_size = head->length;
1862 if (!csr.csr.ready)
1863 *packet_size = 0;
1864 write32(DEFAULT_HECIBAR | 0x4, read32(DEFAULT_HECIBAR | 0x4) | 4);
1865 return 0;
1866}
1867
1868/* FIXME: Add timeout. */
1869static int
1870recv_heci_message(struct raminfo *info, u32 * message, u32 * message_size)
1871{
1872 struct mei_header head;
1873 int current_position;
1874
1875 current_position = 0;
1876 while (1) {
1877 u32 current_size;
1878 current_size = *message_size - current_position;
1879 if (recv_heci_packet
1880 (info, &head, message + (current_position >> 2),
1881 &current_size) == -1)
1882 break;
1883 if (!current_size)
1884 break;
1885 current_position += current_size;
1886 if (head.is_complete) {
1887 *message_size = current_position;
1888 return 0;
1889 }
1890
1891 if (current_position >= *message_size)
1892 break;
1893 }
1894 *message_size = 0;
1895 return -1;
1896}
1897
1898static void send_heci_uma_message(struct raminfo *info)
1899{
1900 struct uma_reply {
1901 u8 group_id;
1902 u8 command;
1903 u8 reserved;
1904 u8 result;
1905 u8 field2;
1906 u8 unk3[0x48 - 4 - 1];
1907 } __attribute__ ((packed)) reply;
1908 struct uma_message {
1909 u8 group_id;
1910 u8 cmd;
1911 u8 reserved;
1912 u8 result;
1913 u32 c2;
1914 u64 heci_uma_addr;
1915 u32 memory_reserved_for_heci_mb;
1916 u16 c3;
1917 } __attribute__ ((packed)) msg = {
1918 0, MKHI_SET_UMA, 0, 0,
1919 0x82,
1920 info->heci_uma_addr, info->memory_reserved_for_heci_mb, 0};
1921 u32 reply_size;
1922
1923 send_heci_message((u8 *) & msg, sizeof(msg), 0, 7);
1924
1925 reply_size = sizeof(reply);
1926 if (recv_heci_message(info, (u32 *) & reply, &reply_size) == -1)
1927 return;
1928
1929 if (reply.command != (MKHI_SET_UMA | (1 << 7)))
1930 die("HECI init failed\n");
1931}
1932
1933static void setup_heci_uma(struct raminfo *info)
1934{
1935 u32 reg44;
1936
1937 reg44 = pcie_read_config32(HECIDEV, 0x44); // = 0x80010020
1938 info->memory_reserved_for_heci_mb = 0;
1939 info->heci_uma_addr = 0;
1940 if (!((reg44 & 0x10000) && !(pcie_read_config32(HECIDEV, 0x40) & 0x20)))
1941 return;
1942
1943 info->heci_bar = pcie_read_config32(HECIDEV, 0x10) & 0xFFFFFFF0;
1944 info->memory_reserved_for_heci_mb = reg44 & 0x3f;
1945 info->heci_uma_addr =
1946 ((u64)
1947 ((((u64) pcie_read_config16(NORTHBRIDGE, D0F0_TOM)) << 6) -
1948 info->memory_reserved_for_heci_mb)) << 20;
1949
1950 pcie_read_config32(NORTHBRIDGE, DMIBAR);
1951 if (info->memory_reserved_for_heci_mb) {
1952 write32(DEFAULT_DMIBAR | 0x14,
1953 read32(DEFAULT_DMIBAR | 0x14) & ~0x80);
1954 write32(DEFAULT_RCBA | 0x14,
1955 read32(DEFAULT_RCBA | 0x14) & ~0x80);
1956 write32(DEFAULT_DMIBAR | 0x20,
1957 read32(DEFAULT_DMIBAR | 0x20) & ~0x80);
1958 write32(DEFAULT_RCBA | 0x20,
1959 read32(DEFAULT_RCBA | 0x20) & ~0x80);
1960 write32(DEFAULT_DMIBAR | 0x2c,
1961 read32(DEFAULT_DMIBAR | 0x2c) & ~0x80);
1962 write32(DEFAULT_RCBA | 0x30,
1963 read32(DEFAULT_RCBA | 0x30) & ~0x80);
1964 write32(DEFAULT_DMIBAR | 0x38,
1965 read32(DEFAULT_DMIBAR | 0x38) & ~0x80);
1966 write32(DEFAULT_RCBA | 0x40,
1967 read32(DEFAULT_RCBA | 0x40) & ~0x80);
1968
1969 write32(DEFAULT_RCBA | 0x40, 0x87000080); // OK
1970 write32(DEFAULT_DMIBAR | 0x38, 0x87000080); // OK
1971 while (read16(DEFAULT_RCBA | 0x46) & 2
1972 && read16(DEFAULT_DMIBAR | 0x3e) & 2) ;
1973 }
1974
1975 write_mchbar32(0x24, 0x10000 + info->memory_reserved_for_heci_mb);
1976
1977 send_heci_uma_message(info);
1978
1979 pcie_write_config32(HECIDEV, 0x10, 0x0);
1980 pcie_write_config8(HECIDEV, 0x4, 0x0);
1981
1982}
1983
1984static int have_match_ranks(struct raminfo *info, int channel, int ranks)
1985{
1986 int ranks_in_channel;
1987 ranks_in_channel = info->populated_ranks[channel][0][0]
1988 + info->populated_ranks[channel][0][1]
1989 + info->populated_ranks[channel][1][0]
1990 + info->populated_ranks[channel][1][1];
1991
1992 /* empty channel */
1993 if (ranks_in_channel == 0)
1994 return 1;
1995
1996 if (ranks_in_channel != ranks)
1997 return 0;
1998 /* single slot */
1999 if (info->populated_ranks[channel][0][0] !=
2000 info->populated_ranks[channel][1][0])
2001 return 1;
2002 if (info->populated_ranks[channel][0][1] !=
2003 info->populated_ranks[channel][1][1])
2004 return 1;
2005 if (info->is_x16_module[channel][0] != info->is_x16_module[channel][1])
2006 return 0;
2007 if (info->density[channel][0] != info->density[channel][1])
2008 return 0;
2009 return 1;
2010}
2011
Vladimir Serbinenkoc6f6be02013-11-12 22:32:08 +01002012static void read_4090(struct raminfo *info)
2013{
2014 int i, channel, slot, rank, lane;
2015 for (i = 0; i < 2; i++)
2016 for (slot = 0; slot < NUM_SLOTS; slot++)
2017 for (rank = 0; rank < NUM_RANKS; rank++)
2018 for (lane = 0; lane < 9; lane++)
2019 info->training.
2020 lane_timings[0][i][slot][rank][lane]
2021 = 32;
2022
2023 for (i = 1; i < 4; i++)
2024 for (channel = 0; channel < NUM_CHANNELS; channel++)
2025 for (slot = 0; slot < NUM_SLOTS; slot++)
2026 for (rank = 0; rank < NUM_RANKS; rank++)
2027 for (lane = 0; lane < 9; lane++) {
2028 info->training.
2029 lane_timings[i][channel]
2030 [slot][rank][lane] =
2031 read_500(info, channel,
2032 get_timing_register_addr
2033 (lane, i, slot,
2034 rank), 9)
2035 + (i == 1) * 11; // !!!!
2036 }
2037
2038}
2039
2040static u32 get_etalon2(int flip, u32 addr)
2041{
2042 const u16 invmask[] = {
2043 0xaaaa, 0x6db6, 0x4924, 0xeeee, 0xcccc, 0x8888, 0x7bde, 0x739c,
2044 0x6318, 0x4210, 0xefbe, 0xcf3c, 0x8e38, 0x0c30, 0x0820
2045 };
2046 u32 ret;
2047 u32 comp4 = addr / 480;
2048 addr %= 480;
2049 u32 comp1 = addr & 0xf;
2050 u32 comp2 = (addr >> 4) & 1;
2051 u32 comp3 = addr >> 5;
2052
2053 if (comp4)
2054 ret = 0x1010101 << (comp4 - 1);
2055 else
2056 ret = 0;
2057 if (flip ^ (((invmask[comp3] >> comp1) ^ comp2) & 1))
2058 ret = ~ret;
2059
2060 return ret;
2061}
2062
2063static void disable_cache(void)
2064{
2065 msr_t msr = {.lo = 0, .hi = 0 };
2066
2067 wrmsr(MTRRphysBase_MSR(3), msr);
2068 wrmsr(MTRRphysMask_MSR(3), msr);
2069}
2070
2071static void enable_cache(unsigned int base, unsigned int size)
2072{
2073 msr_t msr;
2074 msr.lo = base | MTRR_TYPE_WRPROT;
2075 msr.hi = 0;
2076 wrmsr(MTRRphysBase_MSR(3), msr);
2077 msr.lo = ((~(ALIGN_DOWN(size + 4096, 4096) - 1) | MTRRdefTypeEn)
2078 & 0xffffffff);
2079 msr.hi = 0x0000000f;
2080 wrmsr(MTRRphysMask_MSR(3), msr);
2081}
2082
2083static void flush_cache(u32 start, u32 size)
2084{
2085 u32 end;
2086 u32 addr;
2087
2088 end = start + (ALIGN_DOWN(size + 4096, 4096));
2089 for (addr = start; addr < end; addr += 64)
2090 clflush(addr);
2091}
2092
2093static void clear_errors(void)
2094{
2095 pcie_write_config8(NORTHBRIDGE, 0xc0, 0x01);
2096}
2097
2098static void write_testing(struct raminfo *info, int totalrank, int flip)
2099{
2100 int nwrites = 0;
2101 /* in 8-byte units. */
2102 u32 offset;
2103 u32 base;
2104
2105 base = totalrank << 28;
2106 for (offset = 0; offset < 9 * 480; offset += 2) {
2107 write32(base + offset * 8, get_etalon2(flip, offset));
2108 write32(base + offset * 8 + 4, get_etalon2(flip, offset));
2109 write32(base + offset * 8 + 8, get_etalon2(flip, offset + 1));
2110 write32(base + offset * 8 + 12, get_etalon2(flip, offset + 1));
2111 nwrites += 4;
2112 if (nwrites >= 320) {
2113 clear_errors();
2114 nwrites = 0;
2115 }
2116 }
2117}
2118
2119static u8 check_testing(struct raminfo *info, u8 total_rank, int flip)
2120{
2121 u8 failmask = 0;
2122 int i;
2123 int comp1, comp2, comp3;
2124 u32 failxor[2] = { 0, 0 };
2125
2126 enable_cache((total_rank << 28), 1728 * 5 * 4);
2127
2128 for (comp3 = 0; comp3 < 9 && failmask != 0xff; comp3++) {
2129 for (comp1 = 0; comp1 < 4; comp1++)
2130 for (comp2 = 0; comp2 < 60; comp2++) {
2131 u32 re[4];
2132 u32 curroffset =
2133 comp3 * 8 * 60 + 2 * comp1 + 8 * comp2;
2134 read128((total_rank << 28) | (curroffset << 3),
2135 (u64 *) re);
2136 failxor[0] |=
2137 get_etalon2(flip, curroffset) ^ re[0];
2138 failxor[1] |=
2139 get_etalon2(flip, curroffset) ^ re[1];
2140 failxor[0] |=
2141 get_etalon2(flip, curroffset | 1) ^ re[2];
2142 failxor[1] |=
2143 get_etalon2(flip, curroffset | 1) ^ re[3];
2144 }
2145 for (i = 0; i < 8; i++)
2146 if ((0xff << (8 * (i % 4))) & failxor[i / 4])
2147 failmask |= 1 << i;
2148 }
2149 disable_cache();
2150 flush_cache((total_rank << 28), 1728 * 5 * 4);
2151 return failmask;
2152}
2153
2154const u32 seed1[0x18] = {
2155 0x3a9d5ab5, 0x576cb65b, 0x555773b6, 0x2ab772ee,
2156 0x555556ee, 0x3a9d5ab5, 0x576cb65b, 0x555773b6,
2157 0x2ab772ee, 0x555556ee, 0x5155a555, 0x5155a555,
2158 0x5155a555, 0x5155a555, 0x3a9d5ab5, 0x576cb65b,
2159 0x555773b6, 0x2ab772ee, 0x555556ee, 0x55d6b4a5,
2160 0x366d6b3a, 0x2ae5ddbb, 0x3b9ddbb7, 0x55d6b4a5,
2161};
2162
2163static u32 get_seed2(int a, int b)
2164{
2165 const u32 seed2[5] = {
2166 0x55555555, 0x33333333, 0x2e555a55, 0x55555555,
2167 0x5b6db6db,
2168 };
2169 u32 r;
2170 r = seed2[(a + (a >= 10)) / 5];
2171 return b ? ~r : r;
2172}
2173
2174static int make_shift(int comp2, int comp5, int x)
2175{
2176 const u8 seed3[32] = {
2177 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
2178 0x00, 0x00, 0x38, 0x1c, 0x3c, 0x18, 0x38, 0x38,
2179 0x38, 0x38, 0x38, 0x38, 0x0f, 0x0f, 0x0f, 0x0f,
2180 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
2181 };
2182
2183 return (comp2 - ((seed3[comp5] >> (x & 7)) & 1)) & 0x1f;
2184}
2185
2186static u32 get_etalon(int flip, u32 addr)
2187{
2188 u32 mask_byte = 0;
2189 int comp1 = (addr >> 1) & 1;
2190 int comp2 = (addr >> 3) & 0x1f;
2191 int comp3 = (addr >> 8) & 0xf;
2192 int comp4 = (addr >> 12) & 0xf;
2193 int comp5 = (addr >> 16) & 0x1f;
2194 u32 mask_bit = ~(0x10001 << comp3);
2195 u32 part1;
2196 u32 part2;
2197 int byte;
2198
2199 part2 =
2200 ((seed1[comp5] >>
2201 make_shift(comp2, comp5,
2202 (comp3 >> 3) | (comp1 << 2) | 2)) & 1) ^ flip;
2203 part1 =
2204 ((seed1[comp5] >>
2205 make_shift(comp2, comp5,
2206 (comp3 >> 3) | (comp1 << 2) | 0)) & 1) ^ flip;
2207
2208 for (byte = 0; byte < 4; byte++)
2209 if ((get_seed2(comp5, comp4) >>
2210 make_shift(comp2, comp5, (byte | (comp1 << 2)))) & 1)
2211 mask_byte |= 0xff << (8 * byte);
2212
2213 return (mask_bit & mask_byte) | (part1 << comp3) | (part2 <<
2214 (comp3 + 16));
2215}
2216
2217static void
2218write_testing_type2(struct raminfo *info, u8 totalrank, u8 region, u8 block,
2219 char flip)
2220{
2221 int i;
2222 for (i = 0; i < 2048; i++)
2223 write32((totalrank << 28) | (region << 25) | (block << 16) |
2224 (i << 2), get_etalon(flip, (block << 16) | (i << 2)));
2225}
2226
2227static u8
2228check_testing_type2(struct raminfo *info, u8 totalrank, u8 region, u8 block,
2229 char flip)
2230{
2231 u8 failmask = 0;
2232 u32 failxor[2];
2233 int i;
2234 int comp1, comp2, comp3;
2235
2236 failxor[0] = 0;
2237 failxor[1] = 0;
2238
2239 enable_cache(totalrank << 28, 134217728);
2240 for (comp3 = 0; comp3 < 2 && failmask != 0xff; comp3++) {
2241 for (comp1 = 0; comp1 < 16; comp1++)
2242 for (comp2 = 0; comp2 < 64; comp2++) {
2243 u32 addr =
2244 (totalrank << 28) | (region << 25) | (block
2245 << 16)
2246 | (comp3 << 12) | (comp2 << 6) | (comp1 <<
2247 2);
2248 failxor[comp1 & 1] |=
2249 read32(addr) ^ get_etalon(flip, addr);
2250 }
2251 for (i = 0; i < 8; i++)
2252 if ((0xff << (8 * (i % 4))) & failxor[i / 4])
2253 failmask |= 1 << i;
2254 }
2255 disable_cache();
2256 flush_cache((totalrank << 28) | (region << 25) | (block << 16), 16384);
2257 return failmask;
2258}
2259
2260static int check_bounded(unsigned short *vals, u16 bound)
2261{
2262 int i;
2263
2264 for (i = 0; i < 8; i++)
2265 if (vals[i] < bound)
2266 return 0;
2267 return 1;
2268}
2269
2270enum state {
2271 BEFORE_USABLE = 0, AT_USABLE = 1, AT_MARGIN = 2, COMPLETE = 3
2272};
2273
2274static int validate_state(enum state *in)
2275{
2276 int i;
2277 for (i = 0; i < 8; i++)
2278 if (in[i] != COMPLETE)
2279 return 0;
2280 return 1;
2281}
2282
2283static void
2284do_fsm(enum state *state, u16 * counter,
2285 u8 fail_mask, int margin, int uplimit,
2286 u8 * res_low, u8 * res_high, u8 val)
2287{
2288 int lane;
2289
2290 for (lane = 0; lane < 8; lane++) {
2291 int is_fail = (fail_mask >> lane) & 1;
2292 switch (state[lane]) {
2293 case BEFORE_USABLE:
2294 if (!is_fail) {
2295 counter[lane] = 1;
2296 state[lane] = AT_USABLE;
2297 break;
2298 }
2299 counter[lane] = 0;
2300 state[lane] = BEFORE_USABLE;
2301 break;
2302 case AT_USABLE:
2303 if (!is_fail) {
2304 ++counter[lane];
2305 if (counter[lane] >= margin) {
2306 state[lane] = AT_MARGIN;
2307 res_low[lane] = val - margin + 1;
2308 break;
2309 }
2310 state[lane] = 1;
2311 break;
2312 }
2313 counter[lane] = 0;
2314 state[lane] = BEFORE_USABLE;
2315 break;
2316 case AT_MARGIN:
2317 if (is_fail) {
2318 state[lane] = COMPLETE;
2319 res_high[lane] = val - 1;
2320 } else {
2321 counter[lane]++;
2322 state[lane] = AT_MARGIN;
2323 if (val == uplimit) {
2324 state[lane] = COMPLETE;
2325 res_high[lane] = uplimit;
2326 }
2327 }
2328 break;
2329 case COMPLETE:
2330 break;
2331 }
2332 }
2333}
2334
2335static void
2336train_ram_at_178(struct raminfo *info, u8 channel, int slot, int rank,
2337 u8 total_rank, u8 reg_178, int first_run, int niter,
2338 timing_bounds_t * timings)
2339{
2340 int lane;
2341 enum state state[8];
2342 u16 count[8];
2343 u8 lower_usable[8];
2344 u8 upper_usable[8];
2345 unsigned short num_sucessfully_checked[8];
2346 u8 secondary_total_rank;
2347 u8 reg1b3;
2348
2349 if (info->populated_ranks_mask[1]) {
2350 if (channel == 1)
2351 secondary_total_rank =
2352 info->populated_ranks[1][0][0] +
2353 info->populated_ranks[1][0][1]
2354 + info->populated_ranks[1][1][0] +
2355 info->populated_ranks[1][1][1];
2356 else
2357 secondary_total_rank = 0;
2358 } else
2359 secondary_total_rank = total_rank;
2360
2361 {
2362 int i;
2363 for (i = 0; i < 8; i++)
2364 state[i] = BEFORE_USABLE;
2365 }
2366
2367 if (!first_run) {
2368 int is_all_ok = 1;
2369 for (lane = 0; lane < 8; lane++)
2370 if (timings[reg_178][channel][slot][rank][lane].
2371 smallest ==
2372 timings[reg_178][channel][slot][rank][lane].
2373 largest) {
2374 timings[reg_178][channel][slot][rank][lane].
2375 smallest = 0;
2376 timings[reg_178][channel][slot][rank][lane].
2377 largest = 0;
2378 is_all_ok = 0;
2379 }
2380 if (is_all_ok) {
2381 int i;
2382 for (i = 0; i < 8; i++)
2383 state[i] = COMPLETE;
2384 }
2385 }
2386
2387 for (reg1b3 = 0; reg1b3 < 0x30 && !validate_state(state); reg1b3++) {
2388 u8 failmask = 0;
2389 write_1d0(reg1b3 ^ 32, 0x1b3, 6, 1);
2390 write_1d0(reg1b3 ^ 32, 0x1a3, 6, 1);
2391 failmask = check_testing(info, total_rank, 0);
2392 write_mchbar32(0xfb0, read_mchbar32(0xfb0) | 0x00030000);
2393 do_fsm(state, count, failmask, 5, 47, lower_usable,
2394 upper_usable, reg1b3);
2395 }
2396
2397 if (reg1b3) {
2398 write_1d0(0, 0x1b3, 6, 1);
2399 write_1d0(0, 0x1a3, 6, 1);
2400 for (lane = 0; lane < 8; lane++) {
2401 if (state[lane] == COMPLETE) {
2402 timings[reg_178][channel][slot][rank][lane].
2403 smallest =
2404 lower_usable[lane] +
2405 (info->training.
2406 lane_timings[0][channel][slot][rank][lane]
2407 & 0x3F) - 32;
2408 timings[reg_178][channel][slot][rank][lane].
2409 largest =
2410 upper_usable[lane] +
2411 (info->training.
2412 lane_timings[0][channel][slot][rank][lane]
2413 & 0x3F) - 32;
2414 }
2415 }
2416 }
2417
2418 if (!first_run) {
2419 for (lane = 0; lane < 8; lane++)
2420 if (state[lane] == COMPLETE) {
2421 write_500(info, channel,
2422 timings[reg_178][channel][slot][rank]
2423 [lane].smallest,
2424 get_timing_register_addr(lane, 0,
2425 slot, rank),
2426 9, 1);
2427 write_500(info, channel,
2428 timings[reg_178][channel][slot][rank]
2429 [lane].smallest +
2430 info->training.
2431 lane_timings[1][channel][slot][rank]
2432 [lane]
2433 -
2434 info->training.
2435 lane_timings[0][channel][slot][rank]
2436 [lane], get_timing_register_addr(lane,
2437 1,
2438 slot,
2439 rank),
2440 9, 1);
2441 num_sucessfully_checked[lane] = 0;
2442 } else
2443 num_sucessfully_checked[lane] = -1;
2444
2445 do {
2446 u8 failmask = 0;
2447 int i;
2448 for (i = 0; i < niter; i++) {
2449 if (failmask == 0xFF)
2450 break;
2451 failmask |=
2452 check_testing_type2(info, total_rank, 2, i,
2453 0);
2454 failmask |=
2455 check_testing_type2(info, total_rank, 3, i,
2456 1);
2457 }
2458 write_mchbar32(0xfb0,
2459 read_mchbar32(0xfb0) | 0x00030000);
2460 for (lane = 0; lane < 8; lane++)
2461 if (num_sucessfully_checked[lane] != 0xffff) {
2462 if ((1 << lane) & failmask) {
2463 if (timings[reg_178][channel]
2464 [slot][rank][lane].
2465 largest <=
2466 timings[reg_178][channel]
2467 [slot][rank][lane].smallest)
2468 num_sucessfully_checked
2469 [lane] = -1;
2470 else {
2471 num_sucessfully_checked
2472 [lane] = 0;
2473 timings[reg_178]
2474 [channel][slot]
2475 [rank][lane].
2476 smallest++;
2477 write_500(info, channel,
2478 timings
2479 [reg_178]
2480 [channel]
2481 [slot][rank]
2482 [lane].
2483 smallest,
2484 get_timing_register_addr
2485 (lane, 0,
2486 slot, rank),
2487 9, 1);
2488 write_500(info, channel,
2489 timings
2490 [reg_178]
2491 [channel]
2492 [slot][rank]
2493 [lane].
2494 smallest +
2495 info->
2496 training.
2497 lane_timings
2498 [1][channel]
2499 [slot][rank]
2500 [lane]
2501 -
2502 info->
2503 training.
2504 lane_timings
2505 [0][channel]
2506 [slot][rank]
2507 [lane],
2508 get_timing_register_addr
2509 (lane, 1,
2510 slot, rank),
2511 9, 1);
2512 }
2513 } else
2514 num_sucessfully_checked[lane]++;
2515 }
2516 }
2517 while (!check_bounded(num_sucessfully_checked, 2));
2518
2519 for (lane = 0; lane < 8; lane++)
2520 if (state[lane] == COMPLETE) {
2521 write_500(info, channel,
2522 timings[reg_178][channel][slot][rank]
2523 [lane].largest,
2524 get_timing_register_addr(lane, 0,
2525 slot, rank),
2526 9, 1);
2527 write_500(info, channel,
2528 timings[reg_178][channel][slot][rank]
2529 [lane].largest +
2530 info->training.
2531 lane_timings[1][channel][slot][rank]
2532 [lane]
2533 -
2534 info->training.
2535 lane_timings[0][channel][slot][rank]
2536 [lane], get_timing_register_addr(lane,
2537 1,
2538 slot,
2539 rank),
2540 9, 1);
2541 num_sucessfully_checked[lane] = 0;
2542 } else
2543 num_sucessfully_checked[lane] = -1;
2544
2545 do {
2546 int failmask = 0;
2547 int i;
2548 for (i = 0; i < niter; i++) {
2549 if (failmask == 0xFF)
2550 break;
2551 failmask |=
2552 check_testing_type2(info, total_rank, 2, i,
2553 0);
2554 failmask |=
2555 check_testing_type2(info, total_rank, 3, i,
2556 1);
2557 }
2558
2559 write_mchbar32(0xfb0,
2560 read_mchbar32(0xfb0) | 0x00030000);
2561 for (lane = 0; lane < 8; lane++) {
2562 if (num_sucessfully_checked[lane] != 0xffff) {
2563 if ((1 << lane) & failmask) {
2564 if (timings[reg_178][channel]
2565 [slot][rank][lane].
2566 largest <=
2567 timings[reg_178][channel]
2568 [slot][rank][lane].
2569 smallest) {
2570 num_sucessfully_checked
2571 [lane] = -1;
2572 } else {
2573 num_sucessfully_checked
2574 [lane] = 0;
2575 timings[reg_178]
2576 [channel][slot]
2577 [rank][lane].
2578 largest--;
2579 write_500(info, channel,
2580 timings
2581 [reg_178]
2582 [channel]
2583 [slot][rank]
2584 [lane].
2585 largest,
2586 get_timing_register_addr
2587 (lane, 0,
2588 slot, rank),
2589 9, 1);
2590 write_500(info, channel,
2591 timings
2592 [reg_178]
2593 [channel]
2594 [slot][rank]
2595 [lane].
2596 largest +
2597 info->
2598 training.
2599 lane_timings
2600 [1][channel]
2601 [slot][rank]
2602 [lane]
2603 -
2604 info->
2605 training.
2606 lane_timings
2607 [0][channel]
2608 [slot][rank]
2609 [lane],
2610 get_timing_register_addr
2611 (lane, 1,
2612 slot, rank),
2613 9, 1);
2614 }
2615 } else
2616 num_sucessfully_checked[lane]++;
2617 }
2618 }
2619 }
2620 while (!check_bounded(num_sucessfully_checked, 3));
2621
2622 for (lane = 0; lane < 8; lane++) {
2623 write_500(info, channel,
2624 info->training.
2625 lane_timings[0][channel][slot][rank][lane],
2626 get_timing_register_addr(lane, 0, slot, rank),
2627 9, 1);
2628 write_500(info, channel,
2629 info->training.
2630 lane_timings[1][channel][slot][rank][lane],
2631 get_timing_register_addr(lane, 1, slot, rank),
2632 9, 1);
2633 if (timings[reg_178][channel][slot][rank][lane].
2634 largest <=
2635 timings[reg_178][channel][slot][rank][lane].
2636 smallest) {
2637 timings[reg_178][channel][slot][rank][lane].
2638 largest = 0;
2639 timings[reg_178][channel][slot][rank][lane].
2640 smallest = 0;
2641 }
2642 }
2643 }
2644}
2645
2646static void set_10b(struct raminfo *info, u8 val)
2647{
2648 int channel;
2649 int slot, rank;
2650 int lane;
2651
2652 if (read_1d0(0x10b, 6) == val)
2653 return;
2654
2655 write_1d0(val, 0x10b, 6, 1);
2656
2657 FOR_POPULATED_RANKS_BACKWARDS for (lane = 0; lane < 9; lane++) {
2658 u16 reg_500;
2659 reg_500 = read_500(info, channel,
2660 get_timing_register_addr(lane, 0, slot,
2661 rank), 9);
2662 if (val == 1) {
2663 if (lut16[info->clock_speed_index] <= reg_500)
2664 reg_500 -= lut16[info->clock_speed_index];
2665 else
2666 reg_500 = 0;
2667 } else {
2668 reg_500 += lut16[info->clock_speed_index];
2669 }
2670 write_500(info, channel, reg_500,
2671 get_timing_register_addr(lane, 0, slot, rank), 9, 1);
2672 }
2673}
2674
2675static void set_ecc(int onoff)
2676{
2677 int channel;
2678 for (channel = 0; channel < NUM_CHANNELS; channel++) {
2679 u8 t;
2680 t = read_mchbar8((channel << 10) + 0x5f8);
2681 if (onoff)
2682 t |= 1;
2683 else
2684 t &= ~1;
2685 write_mchbar8((channel << 10) + 0x5f8, t);
2686 }
2687}
2688
2689static void set_178(u8 val)
2690{
2691 if (val >= 31)
2692 val = val - 31;
2693 else
2694 val = 63 - val;
2695
2696 write_1d0(2 * val, 0x178, 7, 1);
2697}
2698
2699static void
2700write_500_timings_type(struct raminfo *info, int channel, int slot, int rank,
2701 int type)
2702{
2703 int lane;
2704
2705 for (lane = 0; lane < 8; lane++)
2706 write_500(info, channel,
2707 info->training.
2708 lane_timings[type][channel][slot][rank][lane],
2709 get_timing_register_addr(lane, type, slot, rank), 9,
2710 0);
2711}
2712
2713static void
2714try_timing_offsets(struct raminfo *info, int channel,
2715 int slot, int rank, int totalrank)
2716{
2717 u16 count[8];
2718 enum state state[8];
2719 u8 lower_usable[8], upper_usable[8];
2720 int lane;
2721 int i;
2722 int flip = 1;
2723 int timing_offset;
2724
2725 for (i = 0; i < 8; i++)
2726 state[i] = BEFORE_USABLE;
2727
2728 memset(count, 0, sizeof(count));
2729
2730 for (lane = 0; lane < 8; lane++)
2731 write_500(info, channel,
2732 info->training.
2733 lane_timings[2][channel][slot][rank][lane] + 32,
2734 get_timing_register_addr(lane, 3, slot, rank), 9, 1);
2735
2736 for (timing_offset = 0; !validate_state(state) && timing_offset < 64;
2737 timing_offset++) {
2738 u8 failmask;
2739 write_1d0(timing_offset ^ 32, 0x1bb, 6, 1);
2740 failmask = 0;
2741 for (i = 0; i < 2 && failmask != 0xff; i++) {
2742 flip = !flip;
2743 write_testing(info, totalrank, flip);
2744 failmask |= check_testing(info, totalrank, flip);
2745 }
2746 do_fsm(state, count, failmask, 10, 63, lower_usable,
2747 upper_usable, timing_offset);
2748 }
2749 write_1d0(0, 0x1bb, 6, 1);
2750 dump_timings(info);
2751 if (!validate_state(state))
2752 die("Couldn't discover DRAM timings (1)\n");
2753
2754 for (lane = 0; lane < 8; lane++) {
2755 u8 bias = 0;
2756
2757 if (info->silicon_revision) {
2758 int usable_length;
2759
2760 usable_length = upper_usable[lane] - lower_usable[lane];
2761 if (usable_length >= 20) {
2762 bias = usable_length / 2 - 10;
2763 if (bias >= 2)
2764 bias = 2;
2765 }
2766 }
2767 write_500(info, channel,
2768 info->training.
2769 lane_timings[2][channel][slot][rank][lane] +
2770 (upper_usable[lane] + lower_usable[lane]) / 2 - bias,
2771 get_timing_register_addr(lane, 3, slot, rank), 9, 1);
2772 info->training.timing2_bounds[channel][slot][rank][lane][0] =
2773 info->training.lane_timings[2][channel][slot][rank][lane] +
2774 lower_usable[lane];
2775 info->training.timing2_bounds[channel][slot][rank][lane][1] =
2776 info->training.lane_timings[2][channel][slot][rank][lane] +
2777 upper_usable[lane];
2778 info->training.timing2_offset[channel][slot][rank][lane] =
2779 info->training.lane_timings[2][channel][slot][rank][lane];
2780 }
2781}
2782
2783static u8
2784choose_training(struct raminfo *info, int channel, int slot, int rank,
2785 int lane, timing_bounds_t * timings, u8 center_178)
2786{
2787 u16 central_weight;
2788 u16 side_weight;
2789 unsigned int sum = 0, count = 0;
2790 u8 span;
2791 u8 lower_margin, upper_margin;
2792 u8 reg_178;
2793 u8 result;
2794
2795 span = 12;
2796 central_weight = 20;
2797 side_weight = 20;
2798 if (info->silicon_revision == 1 && channel == 1) {
2799 central_weight = 5;
2800 side_weight = 20;
2801 if ((info->
2802 populated_ranks_mask[1] ^ (info->
2803 populated_ranks_mask[1] >> 2)) &
2804 1)
2805 span = 18;
2806 }
2807 if ((info->populated_ranks_mask[0] & 5) == 5) {
2808 central_weight = 20;
2809 side_weight = 20;
2810 }
2811 if (info->clock_speed_index >= 2
2812 && (info->populated_ranks_mask[0] & 5) == 5 && slot == 1) {
2813 if (info->silicon_revision == 1) {
2814 switch (channel) {
2815 case 0:
2816 if (lane == 1) {
2817 central_weight = 10;
2818 side_weight = 20;
2819 }
2820 break;
2821 case 1:
2822 if (lane == 6) {
2823 side_weight = 5;
2824 central_weight = 20;
2825 }
2826 break;
2827 }
2828 }
2829 if (info->silicon_revision == 0 && channel == 0 && lane == 0) {
2830 side_weight = 5;
2831 central_weight = 20;
2832 }
2833 }
2834 for (reg_178 = center_178 - span; reg_178 <= center_178 + span;
2835 reg_178 += span) {
2836 u8 smallest;
2837 u8 largest;
2838 largest = timings[reg_178][channel][slot][rank][lane].largest;
2839 smallest = timings[reg_178][channel][slot][rank][lane].smallest;
2840 if (largest - smallest + 1 >= 5) {
2841 unsigned int weight;
2842 if (reg_178 == center_178)
2843 weight = central_weight;
2844 else
2845 weight = side_weight;
2846 sum += weight * (largest + smallest);
2847 count += weight;
2848 }
2849 }
2850 dump_timings(info);
2851 if (count == 0)
2852 die("Couldn't discover DRAM timings (2)\n");
2853 result = sum / (2 * count);
2854 lower_margin =
2855 result - timings[center_178][channel][slot][rank][lane].smallest;
2856 upper_margin =
2857 timings[center_178][channel][slot][rank][lane].largest - result;
2858 if (upper_margin < 10 && lower_margin > 10)
2859 result -= min(lower_margin - 10, 10 - upper_margin);
2860 if (upper_margin > 10 && lower_margin < 10)
2861 result += min(upper_margin - 10, 10 - lower_margin);
2862 return result;
2863}
2864
2865#define STANDARD_MIN_MARGIN 5
2866
2867static u8 choose_reg178(struct raminfo *info, timing_bounds_t * timings)
2868{
2869 u16 margin[64];
2870 int lane, rank, slot, channel;
2871 u8 reg178;
2872 int count = 0, sum = 0;
2873
2874 for (reg178 = reg178_min[info->clock_speed_index];
2875 reg178 < reg178_max[info->clock_speed_index];
2876 reg178 += reg178_step[info->clock_speed_index]) {
2877 margin[reg178] = -1;
2878 FOR_POPULATED_RANKS_BACKWARDS for (lane = 0; lane < 8; lane++) {
2879 int curmargin =
2880 timings[reg178][channel][slot][rank][lane].largest -
2881 timings[reg178][channel][slot][rank][lane].
2882 smallest + 1;
2883 if (curmargin < margin[reg178])
2884 margin[reg178] = curmargin;
2885 }
2886 if (margin[reg178] >= STANDARD_MIN_MARGIN) {
2887 u16 weight;
2888 weight = margin[reg178] - STANDARD_MIN_MARGIN;
2889 sum += weight * reg178;
2890 count += weight;
2891 }
2892 }
2893 dump_timings(info);
2894 if (count == 0)
2895 die("Couldn't discover DRAM timings (3)\n");
2896
2897 u8 threshold;
2898
2899 for (threshold = 30; threshold >= 5; threshold--) {
2900 int usable_length = 0;
2901 int smallest_fount = 0;
2902 for (reg178 = reg178_min[info->clock_speed_index];
2903 reg178 < reg178_max[info->clock_speed_index];
2904 reg178 += reg178_step[info->clock_speed_index])
2905 if (margin[reg178] >= threshold) {
2906 usable_length +=
2907 reg178_step[info->clock_speed_index];
2908 info->training.reg178_largest =
2909 reg178 -
2910 2 * reg178_step[info->clock_speed_index];
2911
2912 if (!smallest_fount) {
2913 smallest_fount = 1;
2914 info->training.reg178_smallest =
2915 reg178 +
2916 reg178_step[info->
2917 clock_speed_index];
2918 }
2919 }
2920 if (usable_length >= 0x21)
2921 break;
2922 }
2923
2924 return sum / count;
2925}
2926
2927static int check_cached_sanity(struct raminfo *info)
2928{
2929 int lane;
2930 int slot, rank;
2931 int channel;
2932
2933 if (!info->cached_training)
2934 return 0;
2935
2936 for (channel = 0; channel < NUM_CHANNELS; channel++)
2937 for (slot = 0; slot < NUM_SLOTS; slot++)
2938 for (rank = 0; rank < NUM_RANKS; rank++)
2939 for (lane = 0; lane < 8 + info->use_ecc; lane++) {
2940 u16 cached_value, estimation_value;
2941 cached_value =
2942 info->cached_training->
2943 lane_timings[1][channel][slot][rank]
2944 [lane];
2945 if (cached_value >= 0x18
2946 && cached_value <= 0x1E7) {
2947 estimation_value =
2948 info->training.
2949 lane_timings[1][channel]
2950 [slot][rank][lane];
2951 if (estimation_value <
2952 cached_value - 24)
2953 return 0;
2954 if (estimation_value >
2955 cached_value + 24)
2956 return 0;
2957 }
2958 }
2959 return 1;
2960}
2961
2962static int try_cached_training(struct raminfo *info)
2963{
2964 u8 saved_243[2];
2965 u8 tm;
2966
2967 int channel, slot, rank, lane;
2968 int flip = 1;
2969 int i, j;
2970
2971 if (!check_cached_sanity(info))
2972 return 0;
2973
2974 info->training.reg178_center = info->cached_training->reg178_center;
2975 info->training.reg178_smallest = info->cached_training->reg178_smallest;
2976 info->training.reg178_largest = info->cached_training->reg178_largest;
2977 memcpy(&info->training.timing_bounds,
2978 &info->cached_training->timing_bounds,
2979 sizeof(info->training.timing_bounds));
2980 memcpy(&info->training.timing_offset,
2981 &info->cached_training->timing_offset,
2982 sizeof(info->training.timing_offset));
2983
2984 write_1d0(2, 0x142, 3, 1);
2985 saved_243[0] = read_mchbar8(0x243);
2986 saved_243[1] = read_mchbar8(0x643);
2987 write_mchbar8(0x243, saved_243[0] | 2);
2988 write_mchbar8(0x643, saved_243[1] | 2);
2989 set_ecc(0);
2990 pcie_write_config16(NORTHBRIDGE, 0xc8, 3);
2991 if (read_1d0(0x10b, 6) & 1)
2992 set_10b(info, 0);
2993 for (tm = 0; tm < 2; tm++) {
2994 int totalrank;
2995
2996 set_178(tm ? info->cached_training->reg178_largest : info->
2997 cached_training->reg178_smallest);
2998
2999 totalrank = 0;
3000 /* Check timing ranges. With i == 0 we check smallest one and with
3001 i == 1 the largest bound. With j == 0 we check that on the bound
3002 it still works whereas with j == 1 we check that just outside of
3003 bound we fail.
3004 */
3005 FOR_POPULATED_RANKS_BACKWARDS {
3006 for (i = 0; i < 2; i++) {
3007 for (lane = 0; lane < 8; lane++) {
3008 write_500(info, channel,
3009 info->cached_training->
3010 timing2_bounds[channel][slot]
3011 [rank][lane][i],
3012 get_timing_register_addr(lane,
3013 3,
3014 slot,
3015 rank),
3016 9, 1);
3017
3018 if (!i)
3019 write_500(info, channel,
3020 info->
3021 cached_training->
3022 timing2_offset
3023 [channel][slot][rank]
3024 [lane],
3025 get_timing_register_addr
3026 (lane, 2, slot, rank),
3027 9, 1);
3028 write_500(info, channel,
3029 i ? info->cached_training->
3030 timing_bounds[tm][channel]
3031 [slot][rank][lane].
3032 largest : info->
3033 cached_training->
3034 timing_bounds[tm][channel]
3035 [slot][rank][lane].smallest,
3036 get_timing_register_addr(lane,
3037 0,
3038 slot,
3039 rank),
3040 9, 1);
3041 write_500(info, channel,
3042 info->cached_training->
3043 timing_offset[channel][slot]
3044 [rank][lane] +
3045 (i ? info->cached_training->
3046 timing_bounds[tm][channel]
3047 [slot][rank][lane].
3048 largest : info->
3049 cached_training->
3050 timing_bounds[tm][channel]
3051 [slot][rank][lane].
3052 smallest) - 64,
3053 get_timing_register_addr(lane,
3054 1,
3055 slot,
3056 rank),
3057 9, 1);
3058 }
3059 for (j = 0; j < 2; j++) {
3060 u8 failmask;
3061 u8 expected_failmask;
3062 char reg1b3;
3063
3064 reg1b3 = (j == 1) + 4;
3065 reg1b3 =
3066 j == i ? reg1b3 : (-reg1b3) & 0x3f;
3067 write_1d0(reg1b3, 0x1bb, 6, 1);
3068 write_1d0(reg1b3, 0x1b3, 6, 1);
3069 write_1d0(reg1b3, 0x1a3, 6, 1);
3070
3071 flip = !flip;
3072 write_testing(info, totalrank, flip);
3073 failmask =
3074 check_testing(info, totalrank,
3075 flip);
3076 expected_failmask =
3077 j == 0 ? 0x00 : 0xff;
3078 if (failmask != expected_failmask)
3079 goto fail;
3080 }
3081 }
3082 totalrank++;
3083 }
3084 }
3085
3086 set_178(info->cached_training->reg178_center);
3087 if (info->use_ecc)
3088 set_ecc(1);
3089 write_training_data(info);
3090 write_1d0(0, 322, 3, 1);
3091 info->training = *info->cached_training;
3092
3093 write_1d0(0, 0x1bb, 6, 1);
3094 write_1d0(0, 0x1b3, 6, 1);
3095 write_1d0(0, 0x1a3, 6, 1);
3096 write_mchbar8(0x243, saved_243[0]);
3097 write_mchbar8(0x643, saved_243[1]);
3098
3099 return 1;
3100
3101fail:
3102 FOR_POPULATED_RANKS {
3103 write_500_timings_type(info, channel, slot, rank, 1);
3104 write_500_timings_type(info, channel, slot, rank, 2);
3105 write_500_timings_type(info, channel, slot, rank, 3);
3106 }
3107
3108 write_1d0(0, 0x1bb, 6, 1);
3109 write_1d0(0, 0x1b3, 6, 1);
3110 write_1d0(0, 0x1a3, 6, 1);
3111 write_mchbar8(0x243, saved_243[0]);
3112 write_mchbar8(0x643, saved_243[1]);
3113
3114 return 0;
3115}
3116
3117static void do_ram_training(struct raminfo *info)
3118{
3119 u8 saved_243[2];
3120 int totalrank = 0;
3121 u8 reg_178;
3122 int niter;
3123
3124 timing_bounds_t timings[64];
3125 int lane, rank, slot, channel;
3126 u8 reg178_center;
3127
3128 write_1d0(2, 0x142, 3, 1);
3129 saved_243[0] = read_mchbar8(0x243);
3130 saved_243[1] = read_mchbar8(0x643);
3131 write_mchbar8(0x243, saved_243[0] | 2);
3132 write_mchbar8(0x643, saved_243[1] | 2);
3133 switch (info->clock_speed_index) {
3134 case 0:
3135 niter = 5;
3136 break;
3137 case 1:
3138 niter = 10;
3139 break;
3140 default:
3141 niter = 19;
3142 break;
3143 }
3144 set_ecc(0);
3145
3146 FOR_POPULATED_RANKS_BACKWARDS {
3147 int i;
3148
3149 write_500_timings_type(info, channel, slot, rank, 0);
3150
3151 write_testing(info, totalrank, 0);
3152 for (i = 0; i < niter; i++) {
3153 write_testing_type2(info, totalrank, 2, i, 0);
3154 write_testing_type2(info, totalrank, 3, i, 1);
3155 }
3156 pcie_write_config8(NORTHBRIDGE, 0xc0, 0x01);
3157 totalrank++;
3158 }
3159
3160 if (reg178_min[info->clock_speed_index] <
3161 reg178_max[info->clock_speed_index])
3162 memset(timings[reg178_min[info->clock_speed_index]], 0,
3163 sizeof(timings[0]) *
3164 (reg178_max[info->clock_speed_index] -
3165 reg178_min[info->clock_speed_index]));
3166 for (reg_178 = reg178_min[info->clock_speed_index];
3167 reg_178 < reg178_max[info->clock_speed_index];
3168 reg_178 += reg178_step[info->clock_speed_index]) {
3169 totalrank = 0;
3170 set_178(reg_178);
3171 for (channel = NUM_CHANNELS - 1; channel >= 0; channel--)
3172 for (slot = 0; slot < NUM_SLOTS; slot++)
3173 for (rank = 0; rank < NUM_RANKS; rank++) {
3174 memset(&timings[reg_178][channel][slot]
3175 [rank][0].smallest, 0, 16);
3176 if (info->
3177 populated_ranks[channel][slot]
3178 [rank]) {
3179 train_ram_at_178(info, channel,
3180 slot, rank,
3181 totalrank,
3182 reg_178, 1,
3183 niter,
3184 timings);
3185 totalrank++;
3186 }
3187 }
3188 }
3189
3190 reg178_center = choose_reg178(info, timings);
3191
3192 FOR_POPULATED_RANKS_BACKWARDS for (lane = 0; lane < 8; lane++) {
3193 info->training.timing_bounds[0][channel][slot][rank][lane].
3194 smallest =
3195 timings[info->training.
3196 reg178_smallest][channel][slot][rank][lane].
3197 smallest;
3198 info->training.timing_bounds[0][channel][slot][rank][lane].
3199 largest =
3200 timings[info->training.
3201 reg178_smallest][channel][slot][rank][lane].largest;
3202 info->training.timing_bounds[1][channel][slot][rank][lane].
3203 smallest =
3204 timings[info->training.
3205 reg178_largest][channel][slot][rank][lane].smallest;
3206 info->training.timing_bounds[1][channel][slot][rank][lane].
3207 largest =
3208 timings[info->training.
3209 reg178_largest][channel][slot][rank][lane].largest;
3210 info->training.timing_offset[channel][slot][rank][lane] =
3211 info->training.lane_timings[1][channel][slot][rank][lane]
3212 -
3213 info->training.lane_timings[0][channel][slot][rank][lane] +
3214 64;
3215 }
3216
3217 if (info->silicon_revision == 1
3218 && (info->
3219 populated_ranks_mask[1] ^ (info->
3220 populated_ranks_mask[1] >> 2)) & 1) {
3221 int ranks_after_channel1;
3222
3223 totalrank = 0;
3224 for (reg_178 = reg178_center - 18;
3225 reg_178 <= reg178_center + 18; reg_178 += 18) {
3226 totalrank = 0;
3227 set_178(reg_178);
3228 for (slot = 0; slot < NUM_SLOTS; slot++)
3229 for (rank = 0; rank < NUM_RANKS; rank++) {
3230 if (info->
3231 populated_ranks[1][slot][rank]) {
3232 train_ram_at_178(info, 1, slot,
3233 rank,
3234 totalrank,
3235 reg_178, 0,
3236 niter,
3237 timings);
3238 totalrank++;
3239 }
3240 }
3241 }
3242 ranks_after_channel1 = totalrank;
3243
3244 for (reg_178 = reg178_center - 12;
3245 reg_178 <= reg178_center + 12; reg_178 += 12) {
3246 totalrank = ranks_after_channel1;
3247 set_178(reg_178);
3248 for (slot = 0; slot < NUM_SLOTS; slot++)
3249 for (rank = 0; rank < NUM_RANKS; rank++)
3250 if (info->
3251 populated_ranks[0][slot][rank]) {
3252 train_ram_at_178(info, 0, slot,
3253 rank,
3254 totalrank,
3255 reg_178, 0,
3256 niter,
3257 timings);
3258 totalrank++;
3259 }
3260
3261 }
3262 } else {
3263 for (reg_178 = reg178_center - 12;
3264 reg_178 <= reg178_center + 12; reg_178 += 12) {
3265 totalrank = 0;
3266 set_178(reg_178);
3267 FOR_POPULATED_RANKS_BACKWARDS {
3268 train_ram_at_178(info, channel, slot, rank,
3269 totalrank, reg_178, 0, niter,
3270 timings);
3271 totalrank++;
3272 }
3273 }
3274 }
3275
3276 set_178(reg178_center);
3277 FOR_POPULATED_RANKS_BACKWARDS for (lane = 0; lane < 8; lane++) {
3278 u16 tm0;
3279
3280 tm0 =
3281 choose_training(info, channel, slot, rank, lane, timings,
3282 reg178_center);
3283 write_500(info, channel, tm0,
3284 get_timing_register_addr(lane, 0, slot, rank), 9, 1);
3285 write_500(info, channel,
3286 tm0 +
3287 info->training.
3288 lane_timings[1][channel][slot][rank][lane] -
3289 info->training.
3290 lane_timings[0][channel][slot][rank][lane],
3291 get_timing_register_addr(lane, 1, slot, rank), 9, 1);
3292 }
3293
3294 totalrank = 0;
3295 FOR_POPULATED_RANKS_BACKWARDS {
3296 try_timing_offsets(info, channel, slot, rank, totalrank);
3297 totalrank++;
3298 }
3299 write_mchbar8(0x243, saved_243[0]);
3300 write_mchbar8(0x643, saved_243[1]);
3301 write_1d0(0, 0x142, 3, 1);
3302 info->training.reg178_center = reg178_center;
3303}
3304
3305static void ram_training(struct raminfo *info)
3306{
3307 u16 saved_fc4;
3308
3309 saved_fc4 = read_mchbar16(0xfc4);
3310 write_mchbar16(0xfc4, 0xffff);
3311
3312 if (info->revision >= 8)
3313 read_4090(info);
3314
3315 if (!try_cached_training(info))
3316 do_ram_training(info);
3317 if ((info->silicon_revision == 2 || info->silicon_revision == 3)
3318 && info->clock_speed_index < 2)
3319 set_10b(info, 1);
3320 write_mchbar16(0xfc4, saved_fc4);
3321}
3322
3323static unsigned gcd(unsigned a, unsigned b)
3324{
3325 unsigned t;
3326 if (a > b) {
3327 t = a;
3328 a = b;
3329 b = t;
3330 }
3331 /* invariant a < b. */
3332 while (a) {
3333 t = b % a;
3334 b = a;
3335 a = t;
3336 }
3337 return b;
3338}
3339
3340static inline int div_roundup(int a, int b)
3341{
3342 return (a + b - 1) / b;
3343}
3344
3345static unsigned lcm(unsigned a, unsigned b)
3346{
3347 return (a * b) / gcd(a, b);
3348}
3349
3350struct stru1 {
3351 u8 freqs_reversed;
3352 u8 freq_diff_reduced;
3353 u8 freq_min_reduced;
3354 u8 divisor_f4_to_fmax;
3355 u8 divisor_f3_to_fmax;
3356 u8 freq4_to_max_remainder;
3357 u8 freq3_to_2_remainder;
3358 u8 freq3_to_2_remaindera;
3359 u8 freq4_to_2_remainder;
3360 int divisor_f3_to_f1, divisor_f4_to_f2;
3361 int common_time_unit_ps;
3362 int freq_max_reduced;
3363};
3364
3365static void
3366compute_frequence_ratios(struct raminfo *info, u16 freq1, u16 freq2,
3367 int num_cycles_2, int num_cycles_1, int round_it,
3368 int add_freqs, struct stru1 *result)
3369{
3370 int g;
3371 int common_time_unit_ps;
3372 int freq1_reduced, freq2_reduced;
3373 int freq_min_reduced;
3374 int freq_max_reduced;
3375 int freq3, freq4;
3376
3377 g = gcd(freq1, freq2);
3378 freq1_reduced = freq1 / g;
3379 freq2_reduced = freq2 / g;
3380 freq_min_reduced = min(freq1_reduced, freq2_reduced);
3381 freq_max_reduced = max(freq1_reduced, freq2_reduced);
3382
3383 common_time_unit_ps = div_roundup(900000, lcm(freq1, freq2));
3384 freq3 = div_roundup(num_cycles_2, common_time_unit_ps) - 1;
3385 freq4 = div_roundup(num_cycles_1, common_time_unit_ps) - 1;
3386 if (add_freqs) {
3387 freq3 += freq2_reduced;
3388 freq4 += freq1_reduced;
3389 }
3390
3391 if (round_it) {
3392 result->freq3_to_2_remainder = 0;
3393 result->freq3_to_2_remaindera = 0;
3394 result->freq4_to_max_remainder = 0;
3395 result->divisor_f4_to_f2 = 0;
3396 result->divisor_f3_to_f1 = 0;
3397 } else {
3398 if (freq2_reduced < freq1_reduced) {
3399 result->freq3_to_2_remainder =
3400 result->freq3_to_2_remaindera =
3401 freq3 % freq1_reduced - freq1_reduced + 1;
3402 result->freq4_to_max_remainder =
3403 -(freq4 % freq1_reduced);
3404 result->divisor_f3_to_f1 = freq3 / freq1_reduced;
3405 result->divisor_f4_to_f2 =
3406 (freq4 -
3407 (freq1_reduced - freq2_reduced)) / freq2_reduced;
3408 result->freq4_to_2_remainder =
3409 -(char)((freq1_reduced - freq2_reduced) +
3410 ((u8) freq4 -
3411 (freq1_reduced -
3412 freq2_reduced)) % (u8) freq2_reduced);
3413 } else {
3414 if (freq2_reduced > freq1_reduced) {
3415 result->freq4_to_max_remainder =
3416 (freq4 % freq2_reduced) - freq2_reduced + 1;
3417 result->freq4_to_2_remainder =
3418 freq4 % freq_max_reduced -
3419 freq_max_reduced + 1;
3420 } else {
3421 result->freq4_to_max_remainder =
3422 -(freq4 % freq2_reduced);
3423 result->freq4_to_2_remainder =
3424 -(char)(freq4 % freq_max_reduced);
3425 }
3426 result->divisor_f4_to_f2 = freq4 / freq2_reduced;
3427 result->divisor_f3_to_f1 =
3428 (freq3 -
3429 (freq2_reduced - freq1_reduced)) / freq1_reduced;
3430 result->freq3_to_2_remainder = -(freq3 % freq2_reduced);
3431 result->freq3_to_2_remaindera =
3432 -(char)((freq_max_reduced - freq_min_reduced) +
3433 (freq3 -
3434 (freq_max_reduced -
3435 freq_min_reduced)) % freq1_reduced);
3436 }
3437 }
3438 result->divisor_f3_to_fmax = freq3 / freq_max_reduced;
3439 result->divisor_f4_to_fmax = freq4 / freq_max_reduced;
3440 if (round_it) {
3441 if (freq2_reduced > freq1_reduced) {
3442 if (freq3 % freq_max_reduced)
3443 result->divisor_f3_to_fmax++;
3444 }
3445 if (freq2_reduced < freq1_reduced) {
3446 if (freq4 % freq_max_reduced)
3447 result->divisor_f4_to_fmax++;
3448 }
3449 }
3450 result->freqs_reversed = (freq2_reduced < freq1_reduced);
3451 result->freq_diff_reduced = freq_max_reduced - freq_min_reduced;
3452 result->freq_min_reduced = freq_min_reduced;
3453 result->common_time_unit_ps = common_time_unit_ps;
3454 result->freq_max_reduced = freq_max_reduced;
3455}
3456
3457static void
3458set_2d5x_reg(struct raminfo *info, u16 reg, u16 freq1, u16 freq2,
3459 int num_cycles_2, int num_cycles_1, int num_cycles_3,
3460 int num_cycles_4, int reverse)
3461{
3462 struct stru1 vv;
3463 char multiplier;
3464
3465 compute_frequence_ratios(info, freq1, freq2, num_cycles_2, num_cycles_1,
3466 0, 1, &vv);
3467
3468 multiplier =
3469 div_roundup(max
3470 (div_roundup(num_cycles_2, vv.common_time_unit_ps) +
3471 div_roundup(num_cycles_3, vv.common_time_unit_ps),
3472 div_roundup(num_cycles_1,
3473 vv.common_time_unit_ps) +
3474 div_roundup(num_cycles_4, vv.common_time_unit_ps))
3475 + vv.freq_min_reduced - 1, vv.freq_max_reduced) - 1;
3476
3477 u32 y =
3478 (u8) ((vv.freq_max_reduced - vv.freq_min_reduced) +
3479 vv.freq_max_reduced * multiplier)
3480 | (vv.
3481 freqs_reversed << 8) | ((u8) (vv.freq_min_reduced *
3482 multiplier) << 16) | ((u8) (vv.
3483 freq_min_reduced
3484 *
3485 multiplier)
3486 << 24);
3487 u32 x =
3488 vv.freq3_to_2_remaindera | (vv.freq4_to_2_remainder << 8) | (vv.
3489 divisor_f3_to_f1
3490 << 16)
3491 | (vv.divisor_f4_to_f2 << 20) | (vv.freq_min_reduced << 24);
3492 if (reverse) {
3493 write_mchbar32(reg, y);
3494 write_mchbar32(reg + 4, x);
3495 } else {
3496 write_mchbar32(reg + 4, y);
3497 write_mchbar32(reg, x);
3498 }
3499}
3500
3501static void
3502set_6d_reg(struct raminfo *info, u16 reg, u16 freq1, u16 freq2,
3503 int num_cycles_1, int num_cycles_2, int num_cycles_3,
3504 int num_cycles_4)
3505{
3506 struct stru1 ratios1;
3507 struct stru1 ratios2;
3508
3509 compute_frequence_ratios(info, freq1, freq2, num_cycles_1, num_cycles_2,
3510 0, 1, &ratios2);
3511 compute_frequence_ratios(info, freq1, freq2, num_cycles_3, num_cycles_4,
3512 0, 1, &ratios1);
Vladimir Serbinenkof7a42de2014-01-09 11:10:04 +01003513 printk (BIOS_SPEW, "[%x] <= %x\n", reg,
3514 ratios1.freq4_to_max_remainder | (ratios2.
3515 freq4_to_max_remainder
3516 << 8)
3517 | (ratios1.divisor_f4_to_fmax << 16) | (ratios2.
3518 divisor_f4_to_fmax
3519 << 20));
Vladimir Serbinenkoc6f6be02013-11-12 22:32:08 +01003520 write_mchbar32(reg,
3521 ratios1.freq4_to_max_remainder | (ratios2.
3522 freq4_to_max_remainder
3523 << 8)
3524 | (ratios1.divisor_f4_to_fmax << 16) | (ratios2.
3525 divisor_f4_to_fmax
3526 << 20));
3527}
3528
3529static void
3530set_2dx8_reg(struct raminfo *info, u16 reg, u8 mode, u16 freq1, u16 freq2,
3531 int num_cycles_2, int num_cycles_1, int round_it, int add_freqs)
3532{
3533 struct stru1 ratios;
3534
3535 compute_frequence_ratios(info, freq1, freq2, num_cycles_2, num_cycles_1,
3536 round_it, add_freqs, &ratios);
3537 switch (mode) {
3538 case 0:
3539 write_mchbar32(reg + 4,
3540 ratios.freq_diff_reduced | (ratios.
3541 freqs_reversed <<
3542 8));
3543 write_mchbar32(reg,
3544 ratios.freq3_to_2_remainder | (ratios.
3545 freq4_to_max_remainder
3546 << 8)
3547 | (ratios.divisor_f3_to_fmax << 16) | (ratios.
3548 divisor_f4_to_fmax
3549 << 20) |
3550 (ratios.freq_min_reduced << 24));
3551 break;
3552
3553 case 1:
3554 write_mchbar32(reg,
3555 ratios.freq3_to_2_remainder | (ratios.
3556 divisor_f3_to_fmax
3557 << 16));
3558 break;
3559
3560 case 2:
3561 write_mchbar32(reg,
3562 ratios.freq3_to_2_remainder | (ratios.
3563 freq4_to_max_remainder
3564 << 8) | (ratios.
3565 divisor_f3_to_fmax
3566 << 16) |
3567 (ratios.divisor_f4_to_fmax << 20));
3568 break;
3569
3570 case 4:
3571 write_mchbar32(reg, (ratios.divisor_f3_to_fmax << 4)
3572 | (ratios.divisor_f4_to_fmax << 8) | (ratios.
3573 freqs_reversed
3574 << 12) |
3575 (ratios.freq_min_reduced << 16) | (ratios.
3576 freq_diff_reduced
3577 << 24));
3578 break;
3579 }
3580}
3581
Vladimir Serbinenkof7a42de2014-01-09 11:10:04 +01003582static void set_2dxx_series(struct raminfo *info, int s3resume)
Vladimir Serbinenkoc6f6be02013-11-12 22:32:08 +01003583{
3584 set_2dx8_reg(info, 0x2d00, 0, 0x78, frequency_11(info) / 2, 1359, 1005,
3585 0, 1);
3586 set_2dx8_reg(info, 0x2d08, 0, 0x78, 0x78, 3273, 5033, 1, 1);
3587 set_2dx8_reg(info, 0x2d10, 0, 0x78, info->fsb_frequency, 1475, 1131, 0,
3588 1);
3589 set_2dx8_reg(info, 0x2d18, 0, 2 * info->fsb_frequency,
3590 frequency_11(info), 1231, 1524, 0, 1);
3591 set_2dx8_reg(info, 0x2d20, 0, 2 * info->fsb_frequency,
3592 frequency_11(info) / 2, 1278, 2008, 0, 1);
3593 set_2dx8_reg(info, 0x2d28, 0, info->fsb_frequency, frequency_11(info),
3594 1167, 1539, 0, 1);
3595 set_2dx8_reg(info, 0x2d30, 0, info->fsb_frequency,
3596 frequency_11(info) / 2, 1403, 1318, 0, 1);
3597 set_2dx8_reg(info, 0x2d38, 0, info->fsb_frequency, 0x78, 3460, 5363, 1,
3598 1);
3599 set_2dx8_reg(info, 0x2d40, 0, info->fsb_frequency, 0x3c, 2792, 5178, 1,
3600 1);
3601 set_2dx8_reg(info, 0x2d48, 0, 2 * info->fsb_frequency, 0x78, 2738, 4610,
3602 1, 1);
3603 set_2dx8_reg(info, 0x2d50, 0, info->fsb_frequency, 0x78, 2819, 5932, 1,
3604 1);
3605 set_2dx8_reg(info, 0x6d4, 1, info->fsb_frequency,
3606 frequency_11(info) / 2, 4000, 0, 0, 0);
3607 set_2dx8_reg(info, 0x6d8, 2, info->fsb_frequency,
3608 frequency_11(info) / 2, 4000, 4000, 0, 0);
3609
Vladimir Serbinenkof7a42de2014-01-09 11:10:04 +01003610 if (s3resume) {
3611 printk (BIOS_SPEW, "[6dc] <= %x\n", info->cached_training->reg_6dc);
3612 write_mchbar32(0x6dc, info->cached_training->reg_6dc);
3613 } else
3614 set_6d_reg(info, 0x6dc, 2 * info->fsb_frequency, frequency_11(info), 0,
3615 info->delay46_ps[0], 0,
3616 info->delay54_ps[0]);
Vladimir Serbinenkoc6f6be02013-11-12 22:32:08 +01003617 set_2dx8_reg(info, 0x6e0, 1, 2 * info->fsb_frequency,
3618 frequency_11(info), 2500, 0, 0, 0);
3619 set_2dx8_reg(info, 0x6e4, 1, 2 * info->fsb_frequency,
3620 frequency_11(info) / 2, 3500, 0, 0, 0);
Vladimir Serbinenkof7a42de2014-01-09 11:10:04 +01003621 if (s3resume) {
3622 printk (BIOS_SPEW, "[6e8] <= %x\n", info->cached_training->reg_6e8);
3623 write_mchbar32(0x6e8, info->cached_training->reg_6e8);
3624 } else
3625 set_6d_reg(info, 0x6e8, 2 * info->fsb_frequency, frequency_11(info), 0,
3626 info->delay46_ps[1], 0,
3627 info->delay54_ps[1]);
Vladimir Serbinenkoc6f6be02013-11-12 22:32:08 +01003628 set_2d5x_reg(info, 0x2d58, 0x78, 0x78, 864, 1195, 762, 786, 0);
3629 set_2d5x_reg(info, 0x2d60, 0x195, info->fsb_frequency, 1352, 725, 455,
3630 470, 0);
3631 set_2d5x_reg(info, 0x2d68, 0x195, 0x3c, 2707, 5632, 3277, 2207, 0);
3632 set_2d5x_reg(info, 0x2d70, 0x195, frequency_11(info) / 2, 1276, 758,
3633 454, 459, 0);
3634 set_2d5x_reg(info, 0x2d78, 0x195, 0x78, 1021, 799, 510, 513, 0);
3635 set_2d5x_reg(info, 0x2d80, info->fsb_frequency, 0xe1, 0, 2862, 2579,
3636 2588, 0);
3637 set_2d5x_reg(info, 0x2d88, info->fsb_frequency, 0xe1, 0, 2690, 2405,
3638 2405, 0);
3639 set_2d5x_reg(info, 0x2da0, 0x78, 0xe1, 0, 2560, 2264, 2251, 0);
3640 set_2d5x_reg(info, 0x2da8, 0x195, frequency_11(info), 1060, 775, 484,
3641 480, 0);
3642 set_2d5x_reg(info, 0x2db0, 0x195, 0x78, 4183, 6023, 2217, 2048, 0);
3643 write_mchbar32(0x2dbc, ((frequency_11(info) / 2) - 1) | 0xe00000);
3644 write_mchbar32(0x2db8, ((info->fsb_frequency - 1) << 16) | 0x77);
3645}
3646
3647static u16 get_max_timing(struct raminfo *info, int channel)
3648{
3649 int slot, rank, lane;
3650 u16 ret = 0;
3651
3652 if ((read_mchbar8(0x2ca8) >> 2) < 1)
3653 return 384;
3654
3655 if (info->revision < 8)
3656 return 256;
3657
3658 for (slot = 0; slot < NUM_SLOTS; slot++)
3659 for (rank = 0; rank < NUM_RANKS; rank++)
3660 if (info->populated_ranks[channel][slot][rank])
3661 for (lane = 0; lane < 8 + info->use_ecc; lane++)
3662 ret = max(ret, read_500(info, channel,
3663 get_timing_register_addr
3664 (lane, 0, slot,
3665 rank), 9));
3666 return ret;
3667}
3668
3669static void set_274265(struct raminfo *info)
3670{
3671 int delay_a_ps, delay_b_ps, delay_c_ps, delay_d_ps;
3672 int delay_e_ps, delay_e_cycles, delay_f_cycles;
3673 int delay_e_over_cycle_ps;
3674 int cycletime_ps;
3675 int channel;
3676
3677 delay_a_ps = 4 * halfcycle_ps(info) + 6 * fsbcycle_ps(info);
Vladimir Serbinenkof7a42de2014-01-09 11:10:04 +01003678 info->training.reg2ca9_bit0 = 0;
Vladimir Serbinenkoc6f6be02013-11-12 22:32:08 +01003679 for (channel = 0; channel < NUM_CHANNELS; channel++) {
3680 cycletime_ps =
3681 900000 / lcm(2 * info->fsb_frequency, frequency_11(info));
3682 delay_d_ps =
3683 (halfcycle_ps(info) * get_max_timing(info, channel) >> 6)
3684 - info->some_delay_3_ps_rounded + 200;
3685 if (!
3686 ((info->silicon_revision == 0
3687 || info->silicon_revision == 1)
3688 && (info->revision >= 8)))
3689 delay_d_ps += halfcycle_ps(info) * 2;
3690 delay_d_ps +=
3691 halfcycle_ps(info) * (!info->revision_flag_1 +
3692 info->some_delay_2_halfcycles_ceil +
3693 2 * info->some_delay_1_cycle_floor +
3694 info->clock_speed_index +
3695 2 * info->cas_latency - 7 + 11);
3696 delay_d_ps += info->revision >= 8 ? 2758 : 4428;
3697
3698 write_mchbar32(0x140,
3699 (read_mchbar32(0x140) & 0xfaffffff) | 0x2000000);
3700 write_mchbar32(0x138,
3701 (read_mchbar32(0x138) & 0xfaffffff) | 0x2000000);
3702 if ((read_mchbar8(0x144) & 0x1f) > 0x13)
3703 delay_d_ps += 650;
3704 delay_c_ps = delay_d_ps + 1800;
3705 if (delay_c_ps <= delay_a_ps)
3706 delay_e_ps = 0;
3707 else
3708 delay_e_ps =
3709 cycletime_ps * div_roundup(delay_c_ps - delay_a_ps,
3710 cycletime_ps);
3711
3712 delay_e_over_cycle_ps = delay_e_ps % (2 * halfcycle_ps(info));
3713 delay_e_cycles = delay_e_ps / (2 * halfcycle_ps(info));
3714 delay_f_cycles =
3715 div_roundup(2500 - delay_e_over_cycle_ps,
3716 2 * halfcycle_ps(info));
3717 if (delay_f_cycles > delay_e_cycles) {
3718 info->delay46_ps[channel] = delay_e_ps;
3719 delay_e_cycles = 0;
3720 } else {
3721 info->delay46_ps[channel] =
3722 delay_e_over_cycle_ps +
3723 2 * halfcycle_ps(info) * delay_f_cycles;
3724 delay_e_cycles -= delay_f_cycles;
3725 }
3726
3727 if (info->delay46_ps[channel] < 2500) {
3728 info->delay46_ps[channel] = 2500;
Vladimir Serbinenkof7a42de2014-01-09 11:10:04 +01003729 info->training.reg2ca9_bit0 = 1;
Vladimir Serbinenkoc6f6be02013-11-12 22:32:08 +01003730 }
3731 delay_b_ps = halfcycle_ps(info) + delay_c_ps;
3732 if (delay_b_ps <= delay_a_ps)
3733 delay_b_ps = 0;
3734 else
3735 delay_b_ps -= delay_a_ps;
3736 info->delay54_ps[channel] =
3737 cycletime_ps * div_roundup(delay_b_ps,
3738 cycletime_ps) -
3739 2 * halfcycle_ps(info) * delay_e_cycles;
3740 if (info->delay54_ps[channel] < 2500)
3741 info->delay54_ps[channel] = 2500;
Vladimir Serbinenkof7a42de2014-01-09 11:10:04 +01003742 info->training.reg274265[channel][0] = delay_e_cycles;
Vladimir Serbinenkoc6f6be02013-11-12 22:32:08 +01003743 if (delay_d_ps + 7 * halfcycle_ps(info) <=
3744 24 * halfcycle_ps(info))
Vladimir Serbinenkof7a42de2014-01-09 11:10:04 +01003745 info->training.reg274265[channel][1] = 0;
Vladimir Serbinenkoc6f6be02013-11-12 22:32:08 +01003746 else
Vladimir Serbinenkof7a42de2014-01-09 11:10:04 +01003747 info->training.reg274265[channel][1] =
Vladimir Serbinenkoc6f6be02013-11-12 22:32:08 +01003748 div_roundup(delay_d_ps + 7 * halfcycle_ps(info),
3749 4 * halfcycle_ps(info)) - 6;
3750 write_mchbar32((channel << 10) + 0x274,
Vladimir Serbinenkof7a42de2014-01-09 11:10:04 +01003751 info->training.reg274265[channel][1]
3752 | (info->training.reg274265[channel][0] << 16));
3753 info->training.reg274265[channel][2] =
Vladimir Serbinenkoc6f6be02013-11-12 22:32:08 +01003754 div_roundup(delay_c_ps + 3 * fsbcycle_ps(info),
3755 4 * halfcycle_ps(info)) + 1;
3756 write_mchbar16((channel << 10) + 0x265,
Vladimir Serbinenkof7a42de2014-01-09 11:10:04 +01003757 info->training.reg274265[channel][2] << 8);
Vladimir Serbinenkoc6f6be02013-11-12 22:32:08 +01003758 }
Vladimir Serbinenkof7a42de2014-01-09 11:10:04 +01003759 if (info->training.reg2ca9_bit0)
Vladimir Serbinenkoc6f6be02013-11-12 22:32:08 +01003760 write_mchbar8(0x2ca9, read_mchbar8(0x2ca9) | 1);
3761 else
3762 write_mchbar8(0x2ca9, read_mchbar8(0x2ca9) & ~1);
3763}
3764
3765static void restore_274265(struct raminfo *info)
3766{
3767 int channel;
3768
3769 for (channel = 0; channel < NUM_CHANNELS; channel++) {
3770 write_mchbar32((channel << 10) + 0x274,
Vladimir Serbinenkof7a42de2014-01-09 11:10:04 +01003771 (info->cached_training->reg274265[channel][0] << 16)
3772 | info->cached_training->reg274265[channel][1]);
Vladimir Serbinenkoc6f6be02013-11-12 22:32:08 +01003773 write_mchbar16((channel << 10) + 0x265,
Vladimir Serbinenkof7a42de2014-01-09 11:10:04 +01003774 info->cached_training->reg274265[channel][2] << 8);
Vladimir Serbinenkoc6f6be02013-11-12 22:32:08 +01003775 }
Vladimir Serbinenkof7a42de2014-01-09 11:10:04 +01003776 if (info->cached_training->reg2ca9_bit0)
Vladimir Serbinenkoc6f6be02013-11-12 22:32:08 +01003777 write_mchbar8(0x2ca9, read_mchbar8(0x2ca9) | 1);
3778 else
3779 write_mchbar8(0x2ca9, read_mchbar8(0x2ca9) & ~1);
3780}
3781
3782#if REAL
3783static void dmi_setup(void)
3784{
3785 gav(read8(DEFAULT_DMIBAR | 0x254));
3786 write8(DEFAULT_DMIBAR | 0x254, 0x1);
3787 write16(DEFAULT_DMIBAR | 0x1b8, 0x18f2);
3788 read_mchbar16(0x48);
3789 write_mchbar16(0x48, 0x2);
3790
3791 write32(DEFAULT_DMIBAR | 0xd68, read32(DEFAULT_DMIBAR | 0xd68) | 0x08000000);
3792
3793 outl((gav(inl(DEFAULT_GPIOBASE | 0x38)) & ~0x140000) | 0x400000,
3794 DEFAULT_GPIOBASE | 0x38);
3795 gav(inb(DEFAULT_GPIOBASE | 0xe)); // = 0xfdcaff6e
3796}
3797#endif
3798
Vladimir Serbinenko9817a372014-02-19 22:07:12 +01003799void chipset_init(const int s3resume)
Vladimir Serbinenkoc6f6be02013-11-12 22:32:08 +01003800{
Vladimir Serbinenkoc6f6be02013-11-12 22:32:08 +01003801 u8 x2ca8;
3802
Vladimir Serbinenko9817a372014-02-19 22:07:12 +01003803 x2ca8 = read_mchbar8(0x2ca8);
Vladimir Serbinenkoc6f6be02013-11-12 22:32:08 +01003804 if ((x2ca8 & 1) || (x2ca8 == 8 && !s3resume)) {
3805 printk(BIOS_DEBUG, "soft reset detected, rebooting properly\n");
3806 write_mchbar8(0x2ca8, 0);
3807 outb(0xe, 0xcf9);
3808#if REAL
3809 while (1) {
3810 asm volatile ("hlt");
3811 }
3812#else
3813 printf("CP5\n");
3814 exit(0);
3815#endif
3816 }
3817#if !REAL
3818 if (!s3resume) {
3819 pre_raminit_3(x2ca8);
3820 }
Vladimir Serbinenkof62669c2014-01-09 10:59:38 +01003821 pre_raminit_4a(x2ca8);
Vladimir Serbinenkoc6f6be02013-11-12 22:32:08 +01003822#endif
3823
3824 dmi_setup();
3825
3826 write_mchbar16(0x1170, 0xa880);
3827 write_mchbar8(0x11c1, 0x1);
3828 write_mchbar16(0x1170, 0xb880);
3829 read_mchbar8(0x1210);
3830 write_mchbar8(0x1210, 0x84);
3831 pcie_read_config8(NORTHBRIDGE, D0F0_GGC); // = 0x52
3832 pcie_write_config8(NORTHBRIDGE, D0F0_GGC, 0x2);
3833 pcie_read_config8(NORTHBRIDGE, D0F0_GGC); // = 0x2
3834 pcie_write_config8(NORTHBRIDGE, D0F0_GGC, 0x52);
3835 pcie_read_config16(NORTHBRIDGE, D0F0_GGC); // = 0xb52
3836
3837 pcie_write_config16(NORTHBRIDGE, D0F0_GGC, 0xb52);
3838
3839 u16 deven;
3840 deven = pcie_read_config16(NORTHBRIDGE, D0F0_DEVEN); // = 0x3
3841
3842 if (deven & 8) {
3843 write_mchbar8(0x2c30, 0x20);
3844 pcie_read_config8(NORTHBRIDGE, 0x8); // = 0x18
3845 write_mchbar16(0x2c30, read_mchbar16(0x2c30) | 0x200);
3846 write_mchbar16(0x2c32, 0x434);
3847 read_mchbar32(0x2c44);
3848 write_mchbar32(0x2c44, 0x1053687);
3849 pcie_read_config8(GMA, 0x62); // = 0x2
3850 pcie_write_config8(GMA, 0x62, 0x2);
3851 read8(DEFAULT_RCBA | 0x2318);
3852 write8(DEFAULT_RCBA | 0x2318, 0x47);
3853 read8(DEFAULT_RCBA | 0x2320);
3854 write8(DEFAULT_RCBA | 0x2320, 0xfc);
3855 }
3856
3857 read_mchbar32(0x30);
3858 write_mchbar32(0x30, 0x40);
3859
3860 pcie_read_config8(SOUTHBRIDGE, 0x8); // = 0x6
3861 pcie_read_config16(NORTHBRIDGE, D0F0_GGC); // = 0xb52
3862 pcie_write_config16(NORTHBRIDGE, D0F0_GGC, 0xb50);
3863 gav(read32(DEFAULT_RCBA | 0x3428));
3864 write32(DEFAULT_RCBA | 0x3428, 0x1d);
Vladimir Serbinenko9817a372014-02-19 22:07:12 +01003865}
Vladimir Serbinenkoc6f6be02013-11-12 22:32:08 +01003866
Vladimir Serbinenko9817a372014-02-19 22:07:12 +01003867void raminit(const int s3resume, const u8 *spd_addrmap)
3868{
3869 unsigned channel, slot, lane, rank;
3870 int i;
3871 struct raminfo info;
3872 u8 x2ca8;
3873 u16 deven;
3874
3875 x2ca8 = read_mchbar8(0x2ca8);
3876 deven = pcie_read_config16(NORTHBRIDGE, D0F0_DEVEN);
Vladimir Serbinenkoc6f6be02013-11-12 22:32:08 +01003877
3878 memset(&info, 0x5a, sizeof(info));
3879
3880 info.last_500_command[0] = 0;
3881 info.last_500_command[1] = 0;
3882
3883 info.fsb_frequency = 135 * 2;
3884 info.board_lane_delay[0] = 0x14;
3885 info.board_lane_delay[1] = 0x07;
3886 info.board_lane_delay[2] = 0x07;
3887 info.board_lane_delay[3] = 0x08;
3888 info.board_lane_delay[4] = 0x56;
3889 info.board_lane_delay[5] = 0x04;
3890 info.board_lane_delay[6] = 0x04;
3891 info.board_lane_delay[7] = 0x05;
3892 info.board_lane_delay[8] = 0x10;
3893
3894 info.training.reg_178 = 0;
3895 info.training.reg_10b = 0;
3896
3897 info.heci_bar = 0;
3898 info.memory_reserved_for_heci_mb = 0;
3899
3900 /* before SPD */
3901 timestamp_add_now(101);
3902
3903 if (!s3resume || REAL) {
3904 pcie_read_config8(SOUTHBRIDGE, GEN_PMCON_2); // = 0x80
3905
3906 collect_system_info(&info);
3907
3908#if REAL
3909 /* Enable SMBUS. */
3910 enable_smbus();
3911#endif
3912
3913 memset(&info.populated_ranks, 0, sizeof(info.populated_ranks));
3914
3915 info.use_ecc = 1;
3916 for (channel = 0; channel < NUM_CHANNELS; channel++)
Vladimir Serbinenko2ab8ec72014-02-20 14:34:56 +01003917 for (slot = 0; slot < NUM_SLOTS; slot++) {
Vladimir Serbinenkoc6f6be02013-11-12 22:32:08 +01003918 int v;
3919 int try;
3920 int addr;
3921 const u8 useful_addresses[] = {
3922 DEVICE_TYPE,
3923 MODULE_TYPE,
3924 DENSITY,
3925 RANKS_AND_DQ,
3926 MEMORY_BUS_WIDTH,
3927 TIMEBASE_DIVIDEND,
3928 TIMEBASE_DIVISOR,
3929 CYCLETIME,
3930 CAS_LATENCIES_LSB,
3931 CAS_LATENCIES_MSB,
3932 CAS_LATENCY_TIME,
3933 0x11, 0x12, 0x13, 0x14, 0x15,
3934 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b,
3935 0x1c, 0x1d,
3936 THERMAL_AND_REFRESH,
3937 0x20,
3938 REFERENCE_RAW_CARD_USED,
3939 RANK1_ADDRESS_MAPPING,
3940 0x75, 0x76, 0x77, 0x78,
3941 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e,
3942 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84,
3943 0x85, 0x86, 0x87, 0x88,
3944 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
3945 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94,
3946 0x95
3947 };
Vladimir Serbinenko902626c2014-02-16 17:22:26 +01003948 if (!spd_addrmap[2 * channel + slot])
Vladimir Serbinenkoc6f6be02013-11-12 22:32:08 +01003949 continue;
3950 for (try = 0; try < 5; try++) {
Vladimir Serbinenko902626c2014-02-16 17:22:26 +01003951 v = smbus_read_byte(spd_addrmap[2 * channel + slot],
Vladimir Serbinenkoc6f6be02013-11-12 22:32:08 +01003952 DEVICE_TYPE);
3953 if (v >= 0)
3954 break;
3955 }
3956 if (v < 0)
3957 continue;
3958 for (addr = 0;
3959 addr <
3960 sizeof(useful_addresses) /
3961 sizeof(useful_addresses[0]); addr++)
3962 gav(info.
3963 spd[channel][0][useful_addresses
3964 [addr]] =
Vladimir Serbinenko902626c2014-02-16 17:22:26 +01003965 smbus_read_byte(spd_addrmap[2 * channel + slot],
Vladimir Serbinenkoc6f6be02013-11-12 22:32:08 +01003966 useful_addresses
3967 [addr]));
3968 if (info.spd[channel][0][DEVICE_TYPE] != 11)
3969 die("Only DDR3 is supported");
3970
3971 v = info.spd[channel][0][RANKS_AND_DQ];
3972 info.populated_ranks[channel][0][0] = 1;
3973 info.populated_ranks[channel][0][1] =
3974 ((v >> 3) & 7);
3975 if (((v >> 3) & 7) > 1)
3976 die("At most 2 ranks are supported");
3977 if ((v & 7) == 0 || (v & 7) > 2)
3978 die("Only x8 and x16 modules are supported");
3979 if ((info.
3980 spd[channel][slot][MODULE_TYPE] & 0xF) != 2
3981 && (info.
3982 spd[channel][slot][MODULE_TYPE] & 0xF)
3983 != 3)
3984 die("Registered memory is not supported");
3985 info.is_x16_module[channel][0] = (v & 7) - 1;
3986 info.density[channel][slot] =
3987 info.spd[channel][slot][DENSITY] & 0xF;
3988 if (!
3989 (info.
3990 spd[channel][slot][MEMORY_BUS_WIDTH] &
3991 0x18))
3992 info.use_ecc = 0;
3993 }
3994
3995 gav(0x55);
3996
3997 for (channel = 0; channel < NUM_CHANNELS; channel++) {
3998 int v = 0;
3999 for (slot = 0; slot < NUM_SLOTS; slot++)
4000 for (rank = 0; rank < NUM_RANKS; rank++)
4001 v |= info.
4002 populated_ranks[channel][slot][rank]
4003 << (2 * slot + rank);
4004 info.populated_ranks_mask[channel] = v;
4005 }
4006
4007 gav(0x55);
4008
4009 gav(pcie_read_config32(NORTHBRIDGE, D0F0_CAPID0 + 4));
4010 }
4011
4012 /* after SPD */
4013 timestamp_add_now(102);
4014
4015 write_mchbar8(0x2ca8, read_mchbar8(0x2ca8) & 0xfc);
4016#if !REAL
4017 rdmsr (MTRRphysMask_MSR (3));
4018#endif
4019
4020 collect_system_info(&info);
4021 calculate_timings(&info);
4022
4023#if !REAL
4024 pcie_write_config8(NORTHBRIDGE, 0xdf, 0x82);
4025#endif
4026
4027 if (!s3resume) {
4028 u8 reg8 = pcie_read_config8(SOUTHBRIDGE, GEN_PMCON_2);
4029 if (x2ca8 == 0 && (reg8 & 0x80)) {
4030 /* Don't enable S4-assertion stretch. Makes trouble on roda/rk9.
4031 reg8 = pci_read_config8(PCI_DEV(0, 0x1f, 0), 0xa4);
4032 pci_write_config8(PCI_DEV(0, 0x1f, 0), 0xa4, reg8 | 0x08);
4033 */
4034
4035 /* Clear bit7. */
4036
4037 pci_write_config8(SOUTHBRIDGE, GEN_PMCON_2,
4038 (reg8 & ~(1 << 7)));
4039
4040 printk(BIOS_INFO,
4041 "Interrupted RAM init, reset required.\n");
4042 outb(0x6, 0xcf9);
4043#if REAL
4044 while (1) {
4045 asm volatile ("hlt");
4046 }
4047#endif
4048 }
4049 }
4050#if !REAL
4051 gav(read_mchbar8(0x2ca8)); ///!!!!
4052#endif
4053
4054 if (!s3resume && x2ca8 == 0)
4055 pcie_write_config8(SOUTHBRIDGE, GEN_PMCON_2,
4056 pcie_read_config8(SOUTHBRIDGE, GEN_PMCON_2) | 0x80);
4057
4058 compute_derived_timings(&info);
4059
4060 if (x2ca8 == 0) {
4061 gav(read_mchbar8(0x164));
4062 write_mchbar8(0x164, 0x26);
4063 write_mchbar16(0x2c20, 0x10);
4064 }
4065
4066 write_mchbar32(0x18b4, read_mchbar32(0x18b4) | 0x210000); /* OK */
4067 write_mchbar32(0x1890, read_mchbar32(0x1890) | 0x2000000); /* OK */
4068 write_mchbar32(0x18b4, read_mchbar32(0x18b4) | 0x8000);
4069
4070 gav(pcie_read_config32(PCI_DEV(0xff, 2, 1), 0x50)); // !!!!
4071 pcie_write_config8(PCI_DEV(0xff, 2, 1), 0x54, 0x12);
4072
4073 gav(read_mchbar16(0x2c10)); // !!!!
4074 write_mchbar16(0x2c10, 0x412);
4075 gav(read_mchbar16(0x2c10)); // !!!!
4076 write_mchbar16(0x2c12, read_mchbar16(0x2c12) | 0x100); /* OK */
4077
4078 gav(read_mchbar8(0x2ca8)); // !!!!
4079 write_mchbar32(0x1804,
4080 (read_mchbar32(0x1804) & 0xfffffffc) | 0x8400080);
4081
4082 pcie_read_config32(PCI_DEV(0xff, 2, 1), 0x6c); // !!!!
4083 pcie_write_config32(PCI_DEV(0xff, 2, 1), 0x6c, 0x40a0a0);
4084 gav(read_mchbar32(0x1c04)); // !!!!
4085 gav(read_mchbar32(0x1804)); // !!!!
4086
4087 if (x2ca8 == 0) {
4088 write_mchbar8(0x2ca8, read_mchbar8(0x2ca8) | 1);
4089 }
4090
4091 write_mchbar32(0x18d8, 0x120000);
4092 write_mchbar32(0x18dc, 0x30a484a);
4093 pcie_write_config32(PCI_DEV(0xff, 2, 1), 0xe0, 0x0);
4094 pcie_write_config32(PCI_DEV(0xff, 2, 1), 0xf4, 0x9444a);
4095 write_mchbar32(0x18d8, 0x40000);
4096 write_mchbar32(0x18dc, 0xb000000);
4097 pcie_write_config32(PCI_DEV(0xff, 2, 1), 0xe0, 0x60000);
4098 pcie_write_config32(PCI_DEV(0xff, 2, 1), 0xf4, 0x0);
4099 write_mchbar32(0x18d8, 0x180000);
4100 write_mchbar32(0x18dc, 0xc0000142);
4101 pcie_write_config32(PCI_DEV(0xff, 2, 1), 0xe0, 0x20000);
4102 pcie_write_config32(PCI_DEV(0xff, 2, 1), 0xf4, 0x142);
4103 write_mchbar32(0x18d8, 0x1e0000);
4104
4105 gav(read_mchbar32(0x18dc)); // !!!!
4106 write_mchbar32(0x18dc, 0x3);
4107 gav(read_mchbar32(0x18dc)); // !!!!
4108
4109 if (x2ca8 == 0) {
4110 write_mchbar8(0x2ca8, read_mchbar8(0x2ca8) | 1); // guess
4111 }
4112
4113 write_mchbar32(0x188c, 0x20bc09);
4114 pcie_write_config32(PCI_DEV(0xff, 2, 1), 0xd0, 0x40b0c09);
4115 write_mchbar32(0x1a10, 0x4200010e);
4116 write_mchbar32(0x18b8, read_mchbar32(0x18b8) | 0x200);
4117 gav(read_mchbar32(0x1918)); // !!!!
4118 write_mchbar32(0x1918, 0x332);
4119
4120 gav(read_mchbar32(0x18b8)); // !!!!
4121 write_mchbar32(0x18b8, 0xe00);
4122 gav(read_mchbar32(0x182c)); // !!!!
4123 write_mchbar32(0x182c, 0x10202);
4124 gav(pcie_read_config32(PCI_DEV(0xff, 2, 1), 0x94)); // !!!!
4125 pcie_write_config32(PCI_DEV(0xff, 2, 1), 0x94, 0x10202);
4126 write_mchbar32(0x1a1c, read_mchbar32(0x1a1c) & 0x8fffffff);
4127 write_mchbar32(0x1a70, read_mchbar32(0x1a70) | 0x100000);
4128
4129 write_mchbar32(0x18b4, read_mchbar32(0x18b4) & 0xffff7fff);
4130 gav(read_mchbar32(0x1a68)); // !!!!
4131 write_mchbar32(0x1a68, 0x343800);
4132 gav(read_mchbar32(0x1e68)); // !!!!
4133 gav(read_mchbar32(0x1a68)); // !!!!
4134
4135 if (x2ca8 == 0) {
4136 write_mchbar8(0x2ca8, read_mchbar8(0x2ca8) | 1); // guess
4137 }
4138
4139 pcie_read_config32(PCI_DEV(0xff, 2, 0), 0x048); // !!!!
4140 pcie_write_config32(PCI_DEV(0xff, 2, 0), 0x048, 0x140000);
4141 pcie_read_config32(PCI_DEV(0xff, 2, 0), 0x058); // !!!!
4142 pcie_write_config32(PCI_DEV(0xff, 2, 0), 0x058, 0x64555);
4143 pcie_read_config32(PCI_DEV(0xff, 2, 0), 0x058); // !!!!
4144 pcie_read_config32(PCI_DEV (0xff, 0, 0), 0xd0); // !!!!
4145 pcie_write_config32(PCI_DEV (0xff, 0, 0), 0xd0, 0x180);
4146 gav(read_mchbar32(0x1af0)); // !!!!
4147 gav(read_mchbar32(0x1af0)); // !!!!
4148 write_mchbar32(0x1af0, 0x1f020003);
4149 gav(read_mchbar32(0x1af0)); // !!!!
4150
4151 if (((x2ca8 == 0))) {
4152 write_mchbar8(0x2ca8, read_mchbar8(0x2ca8) | 1); // guess
4153 }
4154
4155 gav(read_mchbar32(0x1890)); // !!!!
4156 write_mchbar32(0x1890, 0x80102);
4157 gav(read_mchbar32(0x18b4)); // !!!!
4158 write_mchbar32(0x18b4, 0x216000);
4159 write_mchbar32(0x18a4, 0x22222222);
4160 write_mchbar32(0x18a8, 0x22222222);
4161 write_mchbar32(0x18ac, 0x22222);
4162
4163 udelay(1000);
4164
Vladimir Serbinenkof7a42de2014-01-09 11:10:04 +01004165 info.cached_training = get_cached_training();
Vladimir Serbinenkoc6f6be02013-11-12 22:32:08 +01004166
Vladimir Serbinenkof7a42de2014-01-09 11:10:04 +01004167 if (x2ca8 == 0) {
4168 int j;
4169 if (s3resume && info.cached_training) {
4170 restore_274265(&info);
4171 printk(BIOS_DEBUG, "reg2ca9_bit0 = %x\n",
4172 info.cached_training->reg2ca9_bit0);
4173 for (i = 0; i < 2; i++)
4174 for (j = 0; j < 3; j++)
4175 printk(BIOS_DEBUG, "reg274265[%d][%d] = %x\n",
4176 i, j, info.cached_training->reg274265[i][j]);
4177 } else {
4178 set_274265(&info);
4179 printk(BIOS_DEBUG, "reg2ca9_bit0 = %x\n",
4180 info.training.reg2ca9_bit0);
4181 for (i = 0; i < 2; i++)
4182 for (j = 0; j < 3; j++)
4183 printk(BIOS_DEBUG, "reg274265[%d][%d] = %x\n",
4184 i, j, info.training.reg274265[i][j]);
4185 }
4186
4187 set_2dxx_series(&info, s3resume);
Vladimir Serbinenkoc6f6be02013-11-12 22:32:08 +01004188
4189 if (!(deven & 8)) {
4190 read_mchbar32(0x2cb0);
4191 write_mchbar32(0x2cb0, 0x40);
4192 }
4193
4194 udelay(1000);
4195
4196 if (deven & 8) {
4197 write_mchbar32(0xff8, 0x1800 | read_mchbar32(0xff8));
4198 read_mchbar32(0x2cb0);
4199 write_mchbar32(0x2cb0, 0x00);
4200 pcie_read_config8(PCI_DEV (0, 0x2, 0x0), 0x4c);
4201 pcie_read_config8(PCI_DEV (0, 0x2, 0x0), 0x4c);
4202 pcie_read_config8(PCI_DEV (0, 0x2, 0x0), 0x4e);
4203
4204 read_mchbar8(0x1150);
4205 read_mchbar8(0x1151);
4206 read_mchbar8(0x1022);
4207 read_mchbar8(0x16d0);
4208 write_mchbar32(0x1300, 0x60606060);
4209 write_mchbar32(0x1304, 0x60606060);
4210 write_mchbar32(0x1308, 0x78797a7b);
4211 write_mchbar32(0x130c, 0x7c7d7e7f);
4212 write_mchbar32(0x1310, 0x60606060);
4213 write_mchbar32(0x1314, 0x60606060);
4214 write_mchbar32(0x1318, 0x60606060);
4215 write_mchbar32(0x131c, 0x60606060);
4216 write_mchbar32(0x1320, 0x50515253);
4217 write_mchbar32(0x1324, 0x54555657);
4218 write_mchbar32(0x1328, 0x58595a5b);
4219 write_mchbar32(0x132c, 0x5c5d5e5f);
4220 write_mchbar32(0x1330, 0x40414243);
4221 write_mchbar32(0x1334, 0x44454647);
4222 write_mchbar32(0x1338, 0x48494a4b);
4223 write_mchbar32(0x133c, 0x4c4d4e4f);
4224 write_mchbar32(0x1340, 0x30313233);
4225 write_mchbar32(0x1344, 0x34353637);
4226 write_mchbar32(0x1348, 0x38393a3b);
4227 write_mchbar32(0x134c, 0x3c3d3e3f);
4228 write_mchbar32(0x1350, 0x20212223);
4229 write_mchbar32(0x1354, 0x24252627);
4230 write_mchbar32(0x1358, 0x28292a2b);
4231 write_mchbar32(0x135c, 0x2c2d2e2f);
4232 write_mchbar32(0x1360, 0x10111213);
4233 write_mchbar32(0x1364, 0x14151617);
4234 write_mchbar32(0x1368, 0x18191a1b);
4235 write_mchbar32(0x136c, 0x1c1d1e1f);
4236 write_mchbar32(0x1370, 0x10203);
4237 write_mchbar32(0x1374, 0x4050607);
4238 write_mchbar32(0x1378, 0x8090a0b);
4239 write_mchbar32(0x137c, 0xc0d0e0f);
4240 write_mchbar8(0x11cc, 0x4e);
4241 write_mchbar32(0x1110, 0x73970404);
4242 write_mchbar32(0x1114, 0x72960404);
4243 write_mchbar32(0x1118, 0x6f950404);
4244 write_mchbar32(0x111c, 0x6d940404);
4245 write_mchbar32(0x1120, 0x6a930404);
4246 write_mchbar32(0x1124, 0x68a41404);
4247 write_mchbar32(0x1128, 0x66a21404);
4248 write_mchbar32(0x112c, 0x63a01404);
4249 write_mchbar32(0x1130, 0x609e1404);
4250 write_mchbar32(0x1134, 0x5f9c1404);
4251 write_mchbar32(0x1138, 0x5c961404);
4252 write_mchbar32(0x113c, 0x58a02404);
4253 write_mchbar32(0x1140, 0x54942404);
4254 write_mchbar32(0x1190, 0x900080a);
4255 write_mchbar16(0x11c0, 0xc40b);
4256 write_mchbar16(0x11c2, 0x303);
4257 write_mchbar16(0x11c4, 0x301);
4258 read_mchbar32(0x1190);
4259 write_mchbar32(0x1190, 0x8900080a);
4260 write_mchbar32(0x11b8, 0x70c3000);
4261 write_mchbar8(0x11ec, 0xa);
4262 write_mchbar16(0x1100, 0x800);
4263 read_mchbar32(0x11bc);
4264 write_mchbar32(0x11bc, 0x1e84800);
4265 write_mchbar16(0x11ca, 0xfa);
4266 write_mchbar32(0x11e4, 0x4e20);
4267 write_mchbar8(0x11bc, 0xf);
4268 write_mchbar16(0x11da, 0x19);
4269 write_mchbar16(0x11ba, 0x470c);
4270 write_mchbar32(0x1680, 0xe6ffe4ff);
4271 write_mchbar32(0x1684, 0xdeffdaff);
4272 write_mchbar32(0x1688, 0xd4ffd0ff);
4273 write_mchbar32(0x168c, 0xccffc6ff);
4274 write_mchbar32(0x1690, 0xc0ffbeff);
4275 write_mchbar32(0x1694, 0xb8ffb0ff);
4276 write_mchbar32(0x1698, 0xa8ff0000);
4277 write_mchbar32(0x169c, 0xc00);
4278 write_mchbar32(0x1290, 0x5000000);
4279 }
4280
4281 write_mchbar32(0x124c, 0x15040d00);
4282 write_mchbar32(0x1250, 0x7f0000);
4283 write_mchbar32(0x1254, 0x1e220004);
4284 write_mchbar32(0x1258, 0x4000004);
4285 write_mchbar32(0x1278, 0x0);
4286 write_mchbar32(0x125c, 0x0);
4287 write_mchbar32(0x1260, 0x0);
4288 write_mchbar32(0x1264, 0x0);
4289 write_mchbar32(0x1268, 0x0);
4290 write_mchbar32(0x126c, 0x0);
4291 write_mchbar32(0x1270, 0x0);
4292 write_mchbar32(0x1274, 0x0);
4293 }
4294
4295 if ((deven & 8) && x2ca8 == 0) {
4296 write_mchbar16(0x1214, 0x320);
4297 write_mchbar32(0x1600, 0x40000000);
4298 read_mchbar32(0x11f4);
4299 write_mchbar32(0x11f4, 0x10000000);
4300 read_mchbar16(0x1230);
4301 write_mchbar16(0x1230, 0x8000);
4302 write_mchbar32(0x1400, 0x13040020);
4303 write_mchbar32(0x1404, 0xe090120);
4304 write_mchbar32(0x1408, 0x5120220);
4305 write_mchbar32(0x140c, 0x5120330);
4306 write_mchbar32(0x1410, 0xe090220);
4307 write_mchbar32(0x1414, 0x1010001);
4308 write_mchbar32(0x1418, 0x1110000);
4309 write_mchbar32(0x141c, 0x9020020);
4310 write_mchbar32(0x1420, 0xd090220);
4311 write_mchbar32(0x1424, 0x2090220);
4312 write_mchbar32(0x1428, 0x2090330);
4313 write_mchbar32(0x142c, 0xd090220);
4314 write_mchbar32(0x1430, 0x1010001);
4315 write_mchbar32(0x1434, 0x1110000);
4316 write_mchbar32(0x1438, 0x11040020);
4317 write_mchbar32(0x143c, 0x4030220);
4318 write_mchbar32(0x1440, 0x1060220);
4319 write_mchbar32(0x1444, 0x1060330);
4320 write_mchbar32(0x1448, 0x4030220);
4321 write_mchbar32(0x144c, 0x1010001);
4322 write_mchbar32(0x1450, 0x1110000);
4323 write_mchbar32(0x1454, 0x4010020);
4324 write_mchbar32(0x1458, 0xb090220);
4325 write_mchbar32(0x145c, 0x1090220);
4326 write_mchbar32(0x1460, 0x1090330);
4327 write_mchbar32(0x1464, 0xb090220);
4328 write_mchbar32(0x1468, 0x1010001);
4329 write_mchbar32(0x146c, 0x1110000);
4330 write_mchbar32(0x1470, 0xf040020);
4331 write_mchbar32(0x1474, 0xa090220);
4332 write_mchbar32(0x1478, 0x1120220);
4333 write_mchbar32(0x147c, 0x1120330);
4334 write_mchbar32(0x1480, 0xa090220);
4335 write_mchbar32(0x1484, 0x1010001);
4336 write_mchbar32(0x1488, 0x1110000);
4337 write_mchbar32(0x148c, 0x7020020);
4338 write_mchbar32(0x1490, 0x1010220);
4339 write_mchbar32(0x1494, 0x10210);
4340 write_mchbar32(0x1498, 0x10320);
4341 write_mchbar32(0x149c, 0x1010220);
4342 write_mchbar32(0x14a0, 0x1010001);
4343 write_mchbar32(0x14a4, 0x1110000);
4344 write_mchbar32(0x14a8, 0xd040020);
4345 write_mchbar32(0x14ac, 0x8090220);
4346 write_mchbar32(0x14b0, 0x1111310);
4347 write_mchbar32(0x14b4, 0x1111420);
4348 write_mchbar32(0x14b8, 0x8090220);
4349 write_mchbar32(0x14bc, 0x1010001);
4350 write_mchbar32(0x14c0, 0x1110000);
4351 write_mchbar32(0x14c4, 0x3010020);
4352 write_mchbar32(0x14c8, 0x7090220);
4353 write_mchbar32(0x14cc, 0x1081310);
4354 write_mchbar32(0x14d0, 0x1081420);
4355 write_mchbar32(0x14d4, 0x7090220);
4356 write_mchbar32(0x14d8, 0x1010001);
4357 write_mchbar32(0x14dc, 0x1110000);
4358 write_mchbar32(0x14e0, 0xb040020);
4359 write_mchbar32(0x14e4, 0x2030220);
4360 write_mchbar32(0x14e8, 0x1051310);
4361 write_mchbar32(0x14ec, 0x1051420);
4362 write_mchbar32(0x14f0, 0x2030220);
4363 write_mchbar32(0x14f4, 0x1010001);
4364 write_mchbar32(0x14f8, 0x1110000);
4365 write_mchbar32(0x14fc, 0x5020020);
4366 write_mchbar32(0x1500, 0x5090220);
4367 write_mchbar32(0x1504, 0x2071310);
4368 write_mchbar32(0x1508, 0x2071420);
4369 write_mchbar32(0x150c, 0x5090220);
4370 write_mchbar32(0x1510, 0x1010001);
4371 write_mchbar32(0x1514, 0x1110000);
4372 write_mchbar32(0x1518, 0x7040120);
4373 write_mchbar32(0x151c, 0x2090220);
4374 write_mchbar32(0x1520, 0x70b1210);
4375 write_mchbar32(0x1524, 0x70b1310);
4376 write_mchbar32(0x1528, 0x2090220);
4377 write_mchbar32(0x152c, 0x1010001);
4378 write_mchbar32(0x1530, 0x1110000);
4379 write_mchbar32(0x1534, 0x1010110);
4380 write_mchbar32(0x1538, 0x1081310);
4381 write_mchbar32(0x153c, 0x5041200);
4382 write_mchbar32(0x1540, 0x5041310);
4383 write_mchbar32(0x1544, 0x1081310);
4384 write_mchbar32(0x1548, 0x1010001);
4385 write_mchbar32(0x154c, 0x1110000);
4386 write_mchbar32(0x1550, 0x1040120);
4387 write_mchbar32(0x1554, 0x4051210);
4388 write_mchbar32(0x1558, 0xd051200);
4389 write_mchbar32(0x155c, 0xd051200);
4390 write_mchbar32(0x1560, 0x4051210);
4391 write_mchbar32(0x1564, 0x1010001);
4392 write_mchbar32(0x1568, 0x1110000);
4393 write_mchbar16(0x1222, 0x220a);
4394 write_mchbar16(0x123c, 0x1fc0);
4395 write_mchbar16(0x1220, 0x1388);
4396 }
4397
4398 read_mchbar32(0x2c80); // !!!!
4399 write_mchbar32(0x2c80, 0x1053688);
4400 read_mchbar32(0x1c04); // !!!!
4401 write_mchbar32(0x1804, 0x406080);
4402
4403 read_mchbar8(0x2ca8);
4404
4405 if (x2ca8 == 0) {
4406 write_mchbar8(0x2ca8, read_mchbar8(0x2ca8) & ~3);
4407 write_mchbar8(0x2ca8, read_mchbar8(0x2ca8) + 4);
4408 write_mchbar32(0x1af0, read_mchbar32(0x1af0) | 0x10);
4409#if REAL
4410 while (1) {
4411 asm volatile ("hlt");
4412 }
4413#else
4414 printf("CP5\n");
4415 exit(0);
4416#endif
4417 }
4418
4419 write_mchbar8(0x2ca8, read_mchbar8(0x2ca8));
4420 read_mchbar32(0x2c80); // !!!!
4421 write_mchbar32(0x2c80, 0x53688);
4422 pcie_write_config32(PCI_DEV (0xff, 0, 0), 0x60, 0x20220);
4423 read_mchbar16(0x2c20); // !!!!
4424 read_mchbar16(0x2c10); // !!!!
4425 read_mchbar16(0x2c00); // !!!!
4426 write_mchbar16(0x2c00, 0x8c0);
4427 udelay(1000);
4428 write_1d0(0, 0x33d, 0, 0);
4429 write_500(&info, 0, 0, 0xb61, 0, 0);
4430 write_500(&info, 1, 0, 0xb61, 0, 0);
4431 write_mchbar32(0x1a30, 0x0);
4432 write_mchbar32(0x1a34, 0x0);
4433 write_mchbar16(0x614,
4434 0xb5b | (info.populated_ranks[1][0][0] *
4435 0x404) | (info.populated_ranks[0][0][0] *
4436 0xa0));
4437 write_mchbar16(0x616, 0x26a);
4438 write_mchbar32(0x134, 0x856000);
4439 write_mchbar32(0x160, 0x5ffffff);
4440 read_mchbar32(0x114); // !!!!
4441 write_mchbar32(0x114, 0xc2024440);
4442 read_mchbar32(0x118); // !!!!
4443 write_mchbar32(0x118, 0x4);
4444 for (channel = 0; channel < NUM_CHANNELS; channel++)
4445 write_mchbar32(0x260 + (channel << 10),
4446 0x30809ff |
4447 ((info.
4448 populated_ranks_mask[channel] & 3) << 20));
4449 for (channel = 0; channel < NUM_CHANNELS; channel++) {
4450 write_mchbar16(0x31c + (channel << 10), 0x101);
4451 write_mchbar16(0x360 + (channel << 10), 0x909);
4452 write_mchbar16(0x3a4 + (channel << 10), 0x101);
4453 write_mchbar16(0x3e8 + (channel << 10), 0x101);
4454 write_mchbar32(0x320 + (channel << 10), 0x29002900);
4455 write_mchbar32(0x324 + (channel << 10), 0x0);
4456 write_mchbar32(0x368 + (channel << 10), 0x32003200);
4457 write_mchbar16(0x352 + (channel << 10), 0x505);
4458 write_mchbar16(0x354 + (channel << 10), 0x3c3c);
4459 write_mchbar16(0x356 + (channel << 10), 0x1040);
4460 write_mchbar16(0x39a + (channel << 10), 0x73e4);
4461 write_mchbar16(0x3de + (channel << 10), 0x77ed);
4462 write_mchbar16(0x422 + (channel << 10), 0x1040);
4463 }
4464
4465 write_1d0(0x4, 0x151, 4, 1);
4466 write_1d0(0, 0x142, 3, 1);
4467 rdmsr(0x1ac); // !!!!
4468 write_500(&info, 1, 1, 0x6b3, 4, 1);
4469 write_500(&info, 1, 1, 0x6cf, 4, 1);
4470
4471 rmw_1d0(0x21c, 0x38, 0, 6, 1);
4472
4473 write_1d0(((!info.populated_ranks[1][0][0]) << 1) | ((!info.
4474 populated_ranks[0]
4475 [0][0]) << 0),
4476 0x1d1, 3, 1);
4477 for (channel = 0; channel < NUM_CHANNELS; channel++) {
4478 write_mchbar16(0x38e + (channel << 10), 0x5f5f);
4479 write_mchbar16(0x3d2 + (channel << 10), 0x5f5f);
4480 }
4481
4482 set_334(0);
4483
4484 program_base_timings(&info);
4485
4486 write_mchbar8(0x5ff, read_mchbar8(0x5ff) | 0x80); /* OK */
4487
4488 write_1d0(0x2, 0x1d5, 2, 1);
4489 write_1d0(0x20, 0x166, 7, 1);
4490 write_1d0(0x0, 0xeb, 3, 1);
4491 write_1d0(0x0, 0xf3, 6, 1);
4492
4493 for (channel = 0; channel < NUM_CHANNELS; channel++)
4494 for (lane = 0; lane < 9; lane++) {
4495 u16 addr = 0x125 + get_lane_offset(0, 0, lane);
4496 u8 a;
4497 a = read_500(&info, channel, addr, 6); // = 0x20040080 //!!!!
4498 write_500(&info, channel, a, addr, 6, 1);
4499 }
4500
4501 udelay(1000);
4502
Vladimir Serbinenkoc6f6be02013-11-12 22:32:08 +01004503 if (s3resume) {
4504 if (info.cached_training == NULL) {
4505 u32 reg32;
4506 printk(BIOS_ERR,
4507 "Couldn't find training data. Rebooting\n");
4508 reg32 = inl(DEFAULT_PMBASE + 0x04);
4509 outl(reg32 & ~(7 << 10), DEFAULT_PMBASE + 0x04);
4510 outb(0xe, 0xcf9);
4511
4512#if REAL
4513 while (1) {
4514 asm volatile ("hlt");
4515 }
4516#else
4517 printf("CP5\n");
4518 exit(0);
4519#endif
4520 }
4521 int tm;
4522 info.training = *info.cached_training;
4523 for (tm = 0; tm < 4; tm++)
4524 for (channel = 0; channel < NUM_CHANNELS; channel++)
4525 for (slot = 0; slot < NUM_SLOTS; slot++)
4526 for (rank = 0; rank < NUM_RANKS; rank++)
4527 for (lane = 0; lane < 9; lane++)
4528 write_500(&info,
4529 channel,
4530 info.training.
4531 lane_timings
4532 [tm][channel]
4533 [slot][rank]
4534 [lane],
4535 get_timing_register_addr
4536 (lane, tm,
4537 slot, rank),
4538 9, 0);
4539 write_1d0(info.cached_training->reg_178, 0x178, 7, 1);
4540 write_1d0(info.cached_training->reg_10b, 0x10b, 6, 1);
4541 }
4542
4543 read_mchbar32(0x1f4); // !!!!
4544 write_mchbar32(0x1f4, 0x20000);
4545 write_mchbar32(0x1f0, 0x1d000200);
4546 read_mchbar8(0x1f0); // !!!!
4547 write_mchbar8(0x1f0, 0x1);
4548 read_mchbar8(0x1f0); // !!!!
4549
4550 program_board_delay(&info);
4551
4552 write_mchbar8(0x5ff, 0x0); /* OK */
4553 write_mchbar8(0x5ff, 0x80); /* OK */
4554 write_mchbar8(0x5f4, 0x1); /* OK */
4555
4556 write_mchbar32(0x130, read_mchbar32(0x130) & 0xfffffffd); // | 2 when ?
4557 while (read_mchbar32(0x130) & 1) ;
4558 gav(read_1d0(0x14b, 7)); // = 0x81023100
4559 write_1d0(0x30, 0x14b, 7, 1);
4560 read_1d0(0xd6, 6); // = 0xfa008080 // !!!!
4561 write_1d0(7, 0xd6, 6, 1);
4562 read_1d0(0x328, 6); // = 0xfa018080 // !!!!
4563 write_1d0(7, 0x328, 6, 1);
4564
4565 for (channel = 0; channel < NUM_CHANNELS; channel++)
4566 set_4cf(&info, channel,
4567 info.populated_ranks[channel][0][0] ? 8 : 0);
4568
4569 read_1d0(0x116, 4); // = 0x4040432 // !!!!
4570 write_1d0(2, 0x116, 4, 1);
4571 read_1d0(0xae, 6); // = 0xe8088080 // !!!!
4572 write_1d0(0, 0xae, 6, 1);
4573 read_1d0(0x300, 4); // = 0x48088080 // !!!!
4574 write_1d0(0, 0x300, 6, 1);
4575 read_mchbar16(0x356); // !!!!
4576 write_mchbar16(0x356, 0x1040);
4577 read_mchbar16(0x756); // !!!!
4578 write_mchbar16(0x756, 0x1040);
4579 write_mchbar32(0x140, read_mchbar32(0x140) & ~0x07000000);
4580 write_mchbar32(0x138, read_mchbar32(0x138) & ~0x07000000);
4581 write_mchbar32(0x130, 0x31111301);
4582 while (read_mchbar32(0x130) & 1) ;
4583
4584 {
4585 u32 t;
4586 u8 val_a1;
4587 val_a1 = read_1d0(0xa1, 6); // = 0x1cf4040 // !!!!
4588 t = read_1d0(0x2f3, 6); // = 0x10a4040 // !!!!
4589 rmw_1d0(0x320, 0x07,
4590 (t & 4) | ((t & 8) >> 2) | ((t & 0x10) >> 4), 6, 1);
4591 rmw_1d0(0x14b, 0x78,
4592 ((((val_a1 >> 2) & 4) | (val_a1 & 8)) >> 2) | (val_a1 &
4593 4), 7,
4594 1);
4595 rmw_1d0(0xce, 0x38,
4596 ((((val_a1 >> 2) & 4) | (val_a1 & 8)) >> 2) | (val_a1 &
4597 4), 6,
4598 1);
4599 }
4600
4601 for (channel = 0; channel < NUM_CHANNELS; channel++)
4602 set_4cf(&info, channel,
4603 info.populated_ranks[channel][0][0] ? 9 : 1);
4604
4605 rmw_1d0(0x116, 0xe, 1, 4, 1); // = 0x4040432 // !!!!
4606 read_mchbar32(0x144); // !!!!
4607 write_1d0(2, 0xae, 6, 1);
4608 write_1d0(2, 0x300, 6, 1);
4609 write_1d0(2, 0x121, 3, 1);
4610 read_1d0(0xd6, 6); // = 0xfa00c0c7 // !!!!
4611 write_1d0(4, 0xd6, 6, 1);
4612 read_1d0(0x328, 6); // = 0xfa00c0c7 // !!!!
4613 write_1d0(4, 0x328, 6, 1);
4614
4615 for (channel = 0; channel < NUM_CHANNELS; channel++)
4616 set_4cf(&info, channel,
4617 info.populated_ranks[channel][0][0] ? 9 : 0);
4618
4619 write_mchbar32(0x130,
4620 0x11111301 | (info.
4621 populated_ranks[1][0][0] << 30) | (info.
4622 populated_ranks
4623 [0][0]
4624 [0] <<
4625 29));
4626 while (read_mchbar8(0x130) & 1) ; // !!!!
4627 read_1d0(0xa1, 6); // = 0x1cf4054 // !!!!
4628 read_1d0(0x2f3, 6); // = 0x10a4054 // !!!!
4629 read_1d0(0x21c, 6); // = 0xafa00c0 // !!!!
4630 write_1d0(0, 0x21c, 6, 1);
4631 read_1d0(0x14b, 7); // = 0x810231b0 // !!!!
4632 write_1d0(0x35, 0x14b, 7, 1);
4633
4634 for (channel = 0; channel < NUM_CHANNELS; channel++)
4635 set_4cf(&info, channel,
4636 info.populated_ranks[channel][0][0] ? 0xb : 0x2);
4637
4638 set_334(1);
4639
4640 write_mchbar8(0x1e8, 0x4); /* OK */
4641
4642 for (channel = 0; channel < NUM_CHANNELS; channel++) {
4643 write_500(&info, channel,
4644 0x3 & ~(info.populated_ranks_mask[channel]), 0x6b7, 2,
4645 1);
4646 write_500(&info, channel, 0x3, 0x69b, 2, 1);
4647 }
4648 write_mchbar32(0x2d0, (read_mchbar32(0x2d0) & 0xff2c01ff) | 0x200000); /* OK */
4649 write_mchbar16(0x6c0, 0x14a0); /* OK */
4650 write_mchbar32(0x6d0, (read_mchbar32(0x6d0) & 0xff0080ff) | 0x8000); /* OK */
4651 write_mchbar16(0x232, 0x8);
4652 write_mchbar32(0x234, (read_mchbar32(0x234) & 0xfffbfffb) | 0x40004); /* 0x40004 or 0 depending on ? */
4653 write_mchbar32(0x34, (read_mchbar32(0x34) & 0xfffffffd) | 5); /* OK */
4654 write_mchbar32(0x128, 0x2150d05);
4655 write_mchbar8(0x12c, 0x1f); /* OK */
4656 write_mchbar8(0x12d, 0x56); /* OK */
4657 write_mchbar8(0x12e, 0x31);
4658 write_mchbar8(0x12f, 0x0); /* OK */
4659 write_mchbar8(0x271, 0x2); /* OK */
4660 write_mchbar8(0x671, 0x2); /* OK */
4661 write_mchbar8(0x1e8, 0x4); /* OK */
4662 for (channel = 0; channel < NUM_CHANNELS; channel++)
4663 write_mchbar32(0x294 + (channel << 10),
4664 (info.populated_ranks_mask[channel] & 3) << 16);
4665 write_mchbar32(0x134, (read_mchbar32(0x134) & 0xfc01ffff) | 0x10000); /* OK */
4666 write_mchbar32(0x134, (read_mchbar32(0x134) & 0xfc85ffff) | 0x850000); /* OK */
4667 for (channel = 0; channel < NUM_CHANNELS; channel++)
4668 write_mchbar32(0x260 + (channel << 10),
4669 (read_mchbar32(0x260 + (channel << 10)) &
4670 ~0xf00000) | 0x8000000 | ((info.
4671 populated_ranks_mask
4672 [channel] & 3) <<
4673 20));
4674
4675 if (!s3resume)
4676 jedec_init(&info);
4677
4678 int totalrank = 0;
4679 for (channel = 0; channel < NUM_CHANNELS; channel++)
4680 for (slot = 0; slot < NUM_SLOTS; slot++)
4681 for (rank = 0; rank < NUM_RANKS; rank++)
4682 if (info.populated_ranks[channel][slot][rank]) {
4683 jedec_read(&info, channel, slot, rank,
4684 totalrank, 0xa, 0x400);
4685 totalrank++;
4686 }
4687
4688 write_mchbar8(0x12c, 0x9f);
4689
4690 read_mchbar8(0x271); // 2 // !!!!
4691 write_mchbar8(0x271, 0xe);
4692 read_mchbar8(0x671); // !!!!
4693 write_mchbar8(0x671, 0xe);
4694
4695 if (!s3resume) {
4696 for (channel = 0; channel < NUM_CHANNELS; channel++) {
4697 write_mchbar32(0x294 + (channel << 10),
4698 (info.
4699 populated_ranks_mask[channel] & 3) <<
4700 16);
4701 write_mchbar16(0x298 + (channel << 10),
4702 (info.
4703 populated_ranks[channel][0][0]) | (info.
4704 populated_ranks
4705 [channel]
4706 [0]
4707 [1]
4708 <<
4709 5));
4710 write_mchbar32(0x29c + (channel << 10), 0x77a);
4711 }
4712 read_mchbar32(0x2c0); /// !!!
4713 write_mchbar32(0x2c0, 0x6009cc00);
4714
4715 {
4716 u8 a, b;
4717 a = read_mchbar8(0x243); // !!!!
4718 b = read_mchbar8(0x643); // !!!!
4719 write_mchbar8(0x243, a | 2);
4720 write_mchbar8(0x643, b | 2);
4721 }
4722
4723 write_1d0(7, 0x19b, 3, 1);
4724 write_1d0(7, 0x1c0, 3, 1);
4725 write_1d0(4, 0x1c6, 4, 1);
4726 write_1d0(4, 0x1cc, 4, 1);
4727 read_1d0(0x151, 4); // = 0x408c6d74 // !!!!
4728 write_1d0(4, 0x151, 4, 1);
4729 write_mchbar32(0x584, 0xfffff);
4730 write_mchbar32(0x984, 0xfffff);
4731
4732 for (channel = 0; channel < NUM_CHANNELS; channel++)
4733 for (slot = 0; slot < NUM_SLOTS; slot++)
4734 for (rank = 0; rank < NUM_RANKS; rank++)
4735 if (info.
4736 populated_ranks[channel][slot]
4737 [rank])
4738 config_rank(&info, s3resume,
4739 channel, slot,
4740 rank);
4741
4742 write_mchbar8(0x243, 0x1);
4743 write_mchbar8(0x643, 0x1);
4744 }
4745
4746 /* was == 1 but is common */
4747 pcie_write_config16(NORTHBRIDGE, 0xc8, 3);
4748 write_26c(0, 0x820);
4749 write_26c(1, 0x820);
4750 write_mchbar32(0x130, read_mchbar32(0x130) | 2);
4751 /* end */
4752
4753 if (s3resume) {
4754 for (channel = 0; channel < NUM_CHANNELS; channel++) {
4755 write_mchbar32(0x294 + (channel << 10),
4756 (info.
4757 populated_ranks_mask[channel] & 3) <<
4758 16);
4759 write_mchbar16(0x298 + (channel << 10),
4760 (info.
4761 populated_ranks[channel][0][0]) | (info.
4762 populated_ranks
4763 [channel]
4764 [0]
4765 [1]
4766 <<
4767 5));
4768 write_mchbar32(0x29c + (channel << 10), 0x77a);
4769 }
4770 read_mchbar32(0x2c0); /// !!!
4771 write_mchbar32(0x2c0, 0x6009cc00);
4772 }
4773
4774 write_mchbar32(0xfa4, read_mchbar32(0xfa4) & ~0x01000002);
4775 write_mchbar32(0xfb0, 0x2000e019);
4776
4777#if !REAL
4778 printf("CP16\n");
4779#endif
4780
4781 /* Before training. */
4782 timestamp_add_now(103);
4783
4784 if (!s3resume)
4785 ram_training(&info);
4786
4787 /* After training. */
4788 timestamp_add_now (104);
4789
4790 dump_timings(&info);
4791
4792#if 0
4793 ram_check(0x100000, 0x200000);
4794#endif
4795 program_modules_memory_map(&info, 0);
4796 program_total_memory_map(&info);
4797
4798 if (info.non_interleaved_part_mb != 0 && info.interleaved_part_mb != 0)
4799 write_mchbar8(0x111, 0x20 | (0 << 2) | (1 << 6) | (0 << 7));
4800 else if (have_match_ranks(&info, 0, 4) && have_match_ranks(&info, 1, 4))
4801 write_mchbar8(0x111, 0x20 | (3 << 2) | (0 << 6) | (1 << 7));
4802 else if (have_match_ranks(&info, 0, 2) && have_match_ranks(&info, 1, 2))
4803 write_mchbar8(0x111, 0x20 | (3 << 2) | (0 << 6) | (0 << 7));
4804 else
4805 write_mchbar8(0x111, 0x20 | (3 << 2) | (1 << 6) | (0 << 7));
4806
4807 write_mchbar32(0xfac, read_mchbar32(0xfac) & ~0x80000000); // OK
4808 write_mchbar32(0xfb4, 0x4800); // OK
4809 write_mchbar32(0xfb8, (info.revision < 8) ? 0x20 : 0x0); // OK
4810 write_mchbar32(0xe94, 0x7ffff); // OK
4811 write_mchbar32(0xfc0, 0x80002040); // OK
4812 write_mchbar32(0xfc4, 0x701246); // OK
4813 write_mchbar8(0xfc8, read_mchbar8(0xfc8) & ~0x70); // OK
4814 write_mchbar32(0xe5c, 0x1000000 | read_mchbar32(0xe5c)); // OK
4815 write_mchbar32(0x1a70, (read_mchbar32(0x1a70) | 0x00200000) & ~0x00100000); // OK
4816 write_mchbar32(0x50, 0x700b0); // OK
4817 write_mchbar32(0x3c, 0x10); // OK
4818 write_mchbar8(0x1aa8, (read_mchbar8(0x1aa8) & ~0x35) | 0xa); // OK
4819 write_mchbar8(0xff4, read_mchbar8(0xff4) | 0x2); // OK
4820 write_mchbar32(0xff8, (read_mchbar32(0xff8) & ~0xe008) | 0x1020); // OK
4821
4822#if REAL
4823 write_mchbar32(0xd00, IOMMU_BASE2 | 1);
4824 write_mchbar32(0xd40, IOMMU_BASE1 | 1);
4825 write_mchbar32(0xdc0, IOMMU_BASE4 | 1);
4826
4827 write32(IOMMU_BASE1 | 0xffc, 0x80000000);
4828 write32(IOMMU_BASE2 | 0xffc, 0xc0000000);
4829 write32(IOMMU_BASE4 | 0xffc, 0x80000000);
4830
4831#else
4832 {
4833 u32 eax;
4834 eax = read32(0xffc + (read_mchbar32(0xd00) & ~1)) | 0x08000000; // = 0xe911714b// OK
4835 write32(0xffc + (read_mchbar32(0xd00) & ~1), eax); // OK
4836 eax = read32(0xffc + (read_mchbar32(0xdc0) & ~1)) | 0x40000000; // = 0xe911714b// OK
4837 write32(0xffc + (read_mchbar32(0xdc0) & ~1), eax); // OK
4838 }
4839#endif
4840
4841 {
4842 u32 eax;
4843
4844 eax = info.fsb_frequency / 9;
4845 write_mchbar32(0xfcc, (read_mchbar32(0xfcc) & 0xfffc0000) | (eax * 0x280) | (eax * 0x5000) | eax | 0x40000); // OK
4846 write_mchbar32(0x20, 0x33001); //OK
4847 }
4848
4849 for (channel = 0; channel < NUM_CHANNELS; channel++) {
4850 write_mchbar32(0x220 + (channel << 10), read_mchbar32(0x220 + (channel << 10)) & ~0x7770); //OK
4851 if (info.max_slots_used_in_channel == 1)
4852 write_mchbar16(0x237 + (channel << 10), (read_mchbar16(0x237 + (channel << 10)) | 0x0201)); //OK
4853 else
4854 write_mchbar16(0x237 + (channel << 10), (read_mchbar16(0x237 + (channel << 10)) & ~0x0201)); //OK
4855
4856 write_mchbar8(0x241 + (channel << 10), read_mchbar8(0x241 + (channel << 10)) | 1); // OK
4857
4858 if (info.clock_speed_index <= 1
4859 && (info.silicon_revision == 2
4860 || info.silicon_revision == 3))
4861 write_mchbar32(0x248 + (channel << 10), (read_mchbar32(0x248 + (channel << 10)) | 0x00102000)); // OK
4862 else
4863 write_mchbar32(0x248 + (channel << 10), (read_mchbar32(0x248 + (channel << 10)) & ~0x00102000)); // OK
4864 }
4865
4866 write_mchbar32(0x115, read_mchbar32(0x115) | 0x1000000); // OK
4867
4868 {
4869 u8 al;
4870 al = 0xd;
4871 if (!(info.silicon_revision == 0 || info.silicon_revision == 1))
4872 al += 2;
4873 al |= ((1 << (info.max_slots_used_in_channel - 1)) - 1) << 4;
4874 write_mchbar32(0x210, (al << 16) | 0x20); // OK
4875 }
4876
4877 for (channel = 0; channel < NUM_CHANNELS; channel++) {
4878 write_mchbar32(0x288 + (channel << 10), 0x70605040); // OK
4879 write_mchbar32(0x28c + (channel << 10), 0xfffec080); // OK
4880 write_mchbar32(0x290 + (channel << 10), 0x282091c | ((info.max_slots_used_in_channel - 1) << 0x16)); // OK
4881 }
4882 u32 reg1c;
4883 pcie_read_config32(NORTHBRIDGE, 0x40); // = DEFAULT_EPBAR | 0x001 // OK
4884 reg1c = read32(DEFAULT_EPBAR | 0x01c); // = 0x8001 // OK
4885 pcie_read_config32(NORTHBRIDGE, 0x40); // = DEFAULT_EPBAR | 0x001 // OK
4886 write32(DEFAULT_EPBAR | 0x01c, reg1c); // OK
4887 read_mchbar8(0xe08); // = 0x0
4888 pcie_read_config32(NORTHBRIDGE, 0xe4); // = 0x316126
4889 write_mchbar8(0x1210, read_mchbar8(0x1210) | 2); // OK
4890 write_mchbar32(0x1200, 0x8800440); // OK
4891 write_mchbar32(0x1204, 0x53ff0453); // OK
4892 write_mchbar32(0x1208, 0x19002043); // OK
4893 write_mchbar16(0x1214, 0x320); // OK
4894
4895 if (info.revision == 0x10 || info.revision == 0x11) {
4896 write_mchbar16(0x1214, 0x220); // OK
4897 write_mchbar8(0x1210, read_mchbar8(0x1210) | 0x40); // OK
4898 }
4899
4900 write_mchbar8(0x1214, read_mchbar8(0x1214) | 0x4); // OK
4901 write_mchbar8(0x120c, 0x1); // OK
4902 write_mchbar8(0x1218, 0x3); // OK
4903 write_mchbar8(0x121a, 0x3); // OK
4904 write_mchbar8(0x121c, 0x3); // OK
4905 write_mchbar16(0xc14, 0x0); // OK
4906 write_mchbar16(0xc20, 0x0); // OK
4907 write_mchbar32(0x1c, 0x0); // OK
4908
4909 /* revision dependent here. */
4910
4911 write_mchbar16(0x1230, read_mchbar16(0x1230) | 0x1f07); // OK
4912
4913 if (info.uma_enabled)
4914 write_mchbar32(0x11f4, read_mchbar32(0x11f4) | 0x10000000); // OK
4915
4916 write_mchbar16(0x1230, read_mchbar16(0x1230) | 0x8000); // OK
4917 write_mchbar8(0x1214, read_mchbar8(0x1214) | 1); // OK
4918
4919 u8 bl, ebpb;
4920 u16 reg_1020;
4921
4922 reg_1020 = read_mchbar32(0x1020); // = 0x6c733c // OK
4923 write_mchbar8(0x1070, 0x1); // OK
4924
4925 write_mchbar32(0x1000, 0x100); // OK
4926 write_mchbar8(0x1007, 0x0); // OK
4927
4928 if (reg_1020 != 0) {
4929 write_mchbar16(0x1018, 0x0); // OK
4930 bl = reg_1020 >> 8;
4931 ebpb = reg_1020 & 0xff;
4932 } else {
4933 ebpb = 0;
4934 bl = 8;
4935 }
4936
4937 rdmsr(0x1a2);
4938
4939 write_mchbar32(0x1014, 0xffffffff); // OK
4940
4941 write_mchbar32(0x1010, ((((ebpb + 0x7d) << 7) / bl) & 0xff) * (! !reg_1020)); // OK
4942
4943 write_mchbar8(0x101c, 0xb8); // OK
4944
4945 write_mchbar8(0x123e, (read_mchbar8(0x123e) & 0xf) | 0x60); // OK
4946 if (reg_1020 != 0) {
4947 write_mchbar32(0x123c, (read_mchbar32(0x123c) & ~0x00900000) | 0x600000); // OK
4948 write_mchbar8(0x101c, 0xb8); // OK
4949 }
4950
4951 setup_heci_uma(&info);
4952
4953 if (info.uma_enabled) {
4954 u16 ax;
4955 write_mchbar32(0x11b0, read_mchbar32(0x11b0) | 0x4000); // OK
4956 write_mchbar32(0x11b4, read_mchbar32(0x11b4) | 0x4000); // OK
4957 write_mchbar16(0x1190, read_mchbar16(0x1190) | 0x4000); // OK
4958
4959 ax = read_mchbar16(0x1190) & 0xf00; // = 0x480a // OK
4960 write_mchbar16(0x1170, ax | (read_mchbar16(0x1170) & 0x107f) | 0x4080); // OK
4961 write_mchbar16(0x1170, read_mchbar16(0x1170) | 0x1000); // OK
4962#if REAL
4963 udelay(1000);
4964#endif
4965 u16 ecx;
4966 for (ecx = 0xffff; ecx && (read_mchbar16(0x1170) & 0x1000); ecx--) ; // OK
4967 write_mchbar16(0x1190, read_mchbar16(0x1190) & ~0x4000); // OK
4968 }
4969
4970 pcie_write_config8(SOUTHBRIDGE, GEN_PMCON_2,
4971 pcie_read_config8(SOUTHBRIDGE, GEN_PMCON_2) & ~0x80);
4972 udelay(10000);
4973 write_mchbar16(0x2ca8, 0x0);
4974
4975#if REAL
4976 udelay(1000);
4977 dump_timings(&info);
4978 if (!s3resume)
4979 save_timings(&info);
4980#endif
4981}
4982
4983#if REAL
4984unsigned long get_top_of_ram(void)
4985{
4986 /* Base of TSEG is top of usable DRAM */
4987 u32 tom = pci_read_config32(PCI_DEV(0, 0, 0), TSEG);
4988 return (unsigned long)tom;
4989}
4990#endif