blob: 70bb7aff7f60e9ce1f4b286413dc6917ed6f6511 [file] [log] [blame]
Patrick Rudolphfd5fa2a2016-11-11 18:22:33 +01001/*
2 * This file is part of the coreboot project.
3 *
4 * Copyright (C) 2014 Damien Zammit <damien@zamaudio.com>
5 * Copyright (C) 2014 Vladimir Serbinenko <phcoder@gmail.com>
6 * Copyright (C) 2016 Patrick Rudolph <siro@das-labor.org>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; version 2 of the License.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 */
17
18#include <console/console.h>
19#include <console/usb.h>
20#include <string.h>
21#include <arch/io.h>
22#include <cbmem.h>
23#include <arch/cbfs.h>
24#include <cbfs.h>
25#include <northbridge/intel/sandybridge/chip.h>
26#include <device/pci_def.h>
27#include <delay.h>
28#include <arch/cpu.h>
29#include <cpu/x86/msr.h>
30#include "raminit_native.h"
31#include "raminit_common.h"
32#include "sandybridge.h"
33
34/* FIXME: no ECC support. */
35/* FIXME: no support for 3-channel chipsets. */
36
37/*
38 * Register description:
39 * Intel provides a command queue of depth four.
40 * Every command is configured by using multiple registers.
41 * On executing the command queue you have to provide the depth used.
42 *
43 * Known registers:
44 * Channel X = [0, 1]
45 * Command queue index Y = [0, 1, 2, 3]
46 *
47 * DEFAULT_MCHBAR + 0x4220 + 0x400 * X + 4 * Y: command io register
48 * Controls the DRAM command signals
49 * Bit 0: !RAS
50 * Bit 1: !CAS
51 * Bit 2: !WE
52 *
53 * DEFAULT_MCHBAR + 0x4200 + 0x400 * X + 4 * Y: addr bankslot io register
54 * Controls the address, bank address and slotrank signals
55 * Bit 0-15 : Address
56 * Bit 20-22: Bank Address
57 * Bit 24-25: slotrank
58 *
59 * DEFAULT_MCHBAR + 0x4230 + 0x400 * X + 4 * Y: idle register
60 * Controls the idle time after issuing this DRAM command
61 * Bit 16-32: number of clock-cylces to idle
62 *
63 * DEFAULT_MCHBAR + 0x4284 + 0x400 * channel: execute command queue
64 * Starts to execute all queued commands
65 * Bit 0 : start DRAM command execution
66 * Bit 16-20: (number of queued commands - 1) * 4
67 */
68
69static void sfence(void)
70{
71 asm volatile ("sfence");
72}
73
74static void toggle_io_reset(void) {
75 /* toggle IO reset bit */
76 u32 r32 = read32(DEFAULT_MCHBAR + 0x5030);
77 write32(DEFAULT_MCHBAR + 0x5030, r32 | 0x20);
78 udelay(1);
79 write32(DEFAULT_MCHBAR + 0x5030, r32 & ~0x20);
80 udelay(1);
81}
82
83static u32 get_XOVER_CLK(u8 rankmap)
84{
85 return rankmap << 24;
86}
87
88static u32 get_XOVER_CMD(u8 rankmap)
89{
90 u32 reg;
91
92 // enable xover cmd
93 reg = 0x4000;
94
95 // enable xover ctl
96 if (rankmap & 0x3)
97 reg |= 0x20000;
98
99 if (rankmap & 0xc)
100 reg |= 0x4000000;
101
102 return reg;
103}
104
105/* CAS write latency. To be programmed in MR2.
106 * See DDR3 SPEC for MR2 documentation. */
107u8 get_CWL(u32 tCK)
108{
109 /* Get CWL based on tCK using the following rule: */
110 switch (tCK) {
111 case TCK_1333MHZ:
112 return 12;
113 case TCK_1200MHZ:
114 case TCK_1100MHZ:
115 return 11;
116 case TCK_1066MHZ:
117 case TCK_1000MHZ:
118 return 10;
119 case TCK_933MHZ:
120 case TCK_900MHZ:
121 return 9;
122 case TCK_800MHZ:
123 case TCK_700MHZ:
124 return 8;
125 case TCK_666MHZ:
126 return 7;
127 case TCK_533MHZ:
128 return 6;
129 default:
130 return 5;
131 }
132}
133
134void dram_find_common_params(ramctr_timing *ctrl)
135{
136 size_t valid_dimms;
137 int channel, slot;
138 dimm_info *dimms = &ctrl->info;
139
140 ctrl->cas_supported = (1 << (MAX_CAS - MIN_CAS + 1)) - 1;
141 valid_dimms = 0;
142 FOR_ALL_CHANNELS for (slot = 0; slot < 2; slot++) {
143 const dimm_attr *dimm = &dimms->dimm[channel][slot];
144 if (dimm->dram_type != SPD_MEMORY_TYPE_SDRAM_DDR3)
145 continue;
146 valid_dimms++;
147
148 /* Find all possible CAS combinations */
149 ctrl->cas_supported &= dimm->cas_supported;
150
151 /* Find the smallest common latencies supported by all DIMMs */
152 ctrl->tCK = MAX(ctrl->tCK, dimm->tCK);
153 ctrl->tAA = MAX(ctrl->tAA, dimm->tAA);
154 ctrl->tWR = MAX(ctrl->tWR, dimm->tWR);
155 ctrl->tRCD = MAX(ctrl->tRCD, dimm->tRCD);
156 ctrl->tRRD = MAX(ctrl->tRRD, dimm->tRRD);
157 ctrl->tRP = MAX(ctrl->tRP, dimm->tRP);
158 ctrl->tRAS = MAX(ctrl->tRAS, dimm->tRAS);
159 ctrl->tRFC = MAX(ctrl->tRFC, dimm->tRFC);
160 ctrl->tWTR = MAX(ctrl->tWTR, dimm->tWTR);
161 ctrl->tRTP = MAX(ctrl->tRTP, dimm->tRTP);
162 ctrl->tFAW = MAX(ctrl->tFAW, dimm->tFAW);
163 }
164
165 if (!ctrl->cas_supported)
166 die("Unsupported DIMM combination. "
167 "DIMMS do not support common CAS latency");
168 if (!valid_dimms)
169 die("No valid DIMMs found");
170}
171
172void dram_xover(ramctr_timing * ctrl)
173{
174 u32 reg;
175 int channel;
176
177 FOR_ALL_CHANNELS {
178 // enable xover clk
179 reg = get_XOVER_CLK(ctrl->rankmap[channel]);
180 printram("XOVER CLK [%x] = %x\n", channel * 0x100 + 0xc14,
181 reg);
182 MCHBAR32(channel * 0x100 + 0xc14) = reg;
183
184 // enable xover ctl & xover cmd
185 reg = get_XOVER_CMD(ctrl->rankmap[channel]);
186 printram("XOVER CMD [%x] = %x\n", 0x100 * channel + 0x320c,
187 reg);
188 MCHBAR32(0x100 * channel + 0x320c) = reg;
189 }
190}
191
192void dram_timing_regs(ramctr_timing * ctrl)
193{
194 u32 reg, addr, val32, cpu, stretch;
195 struct cpuid_result cpures;
196 int channel;
197
198 FOR_ALL_CHANNELS {
199 // DBP
200 reg = 0;
201 reg |= ctrl->tRCD;
202 reg |= (ctrl->tRP << 4);
203 reg |= (ctrl->CAS << 8);
204 reg |= (ctrl->CWL << 12);
205 reg |= (ctrl->tRAS << 16);
206 printram("DBP [%x] = %x\n", 0x400 * channel + 0x4000, reg);
207 MCHBAR32(0x400 * channel + 0x4000) = reg;
208
209 // RAP
210 reg = 0;
211 reg |= ctrl->tRRD;
212 reg |= (ctrl->tRTP << 4);
213 reg |= (ctrl->tCKE << 8);
214 reg |= (ctrl->tWTR << 12);
215 reg |= (ctrl->tFAW << 16);
216 reg |= (ctrl->tWR << 24);
217 reg |= (3 << 30);
218 printram("RAP [%x] = %x\n", 0x400 * channel + 0x4004, reg);
219 MCHBAR32(0x400 * channel + 0x4004) = reg;
220
221 // OTHP
222 addr = 0x400 * channel + 0x400c;
223 reg = 0;
224 reg |= ctrl->tXPDLL;
225 reg |= (ctrl->tXP << 5);
226 reg |= (ctrl->tAONPD << 8);
227 reg |= 0xa0000;
228 printram("OTHP [%x] = %x\n", addr, reg);
229 MCHBAR32(addr) = reg;
230
231 MCHBAR32(0x400 * channel + 0x4014) = 0;
232
233 MCHBAR32(addr) |= 0x00020000;
234
235 // ODT stretch
236 reg = 0;
237
238 cpures = cpuid(1);
239 cpu = cpures.eax;
240 if (IS_IVY_CPU(cpu)
241 || (IS_SANDY_CPU(cpu) && IS_SANDY_CPU_D2(cpu))) {
242 stretch = 2;
243 addr = 0x400 * channel + 0x400c;
244 printram("ODT stretch [%x] = %x\n",
245 0x400 * channel + 0x400c, reg);
246 reg = MCHBAR32(addr);
247
248 if (((ctrl->rankmap[channel] & 3) == 0)
249 || (ctrl->rankmap[channel] & 0xc) == 0) {
250
251 // Rank 0 - operate on rank 2
252 reg = (reg & ~0xc0000) | (stretch << 18);
253
254 // Rank 2 - operate on rank 0
255 reg = (reg & ~0x30000) | (stretch << 16);
256
257 printram("ODT stretch [%x] = %x\n", addr, reg);
258 MCHBAR32(addr) = reg;
259 }
260
261 } else if (IS_SANDY_CPU(cpu) && IS_SANDY_CPU_C(cpu)) {
262 stretch = 3;
263 addr = 0x400 * channel + 0x401c;
264 reg = MCHBAR32(addr);
265
266 if (((ctrl->rankmap[channel] & 3) == 0)
267 || (ctrl->rankmap[channel] & 0xc) == 0) {
268
269 // Rank 0 - operate on rank 2
270 reg = (reg & ~0x3000) | (stretch << 12);
271
272 // Rank 2 - operate on rank 0
273 reg = (reg & ~0xc00) | (stretch << 10);
274
275 printram("ODT stretch [%x] = %x\n", addr, reg);
276 MCHBAR32(addr) = reg;
277 }
278 } else {
279 stretch = 0;
280 }
281
282 // REFI
283 reg = 0;
284 val32 = ctrl->tREFI;
285 reg = (reg & ~0xffff) | val32;
286 val32 = ctrl->tRFC;
287 reg = (reg & ~0x1ff0000) | (val32 << 16);
288 val32 = (u32) (ctrl->tREFI * 9) / 1024;
289 reg = (reg & ~0xfe000000) | (val32 << 25);
290 printram("REFI [%x] = %x\n", 0x400 * channel + 0x4298,
291 reg);
292 MCHBAR32(0x400 * channel + 0x4298) = reg;
293
294 MCHBAR32(0x400 * channel + 0x4294) |= 0xff;
295
296 // SRFTP
297 reg = 0;
298 val32 = tDLLK;
299 reg = (reg & ~0xfff) | val32;
300 val32 = ctrl->tXSOffset;
301 reg = (reg & ~0xf000) | (val32 << 12);
302 val32 = tDLLK - ctrl->tXSOffset;
303 reg = (reg & ~0x3ff0000) | (val32 << 16);
304 val32 = ctrl->tMOD - 8;
305 reg = (reg & ~0xf0000000) | (val32 << 28);
306 printram("SRFTP [%x] = %x\n", 0x400 * channel + 0x42a4,
307 reg);
308 MCHBAR32(0x400 * channel + 0x42a4) = reg;
309 }
310}
311
312void dram_dimm_mapping(ramctr_timing *ctrl)
313{
314 u32 reg, val32;
315 int channel;
316 dimm_info *info = &ctrl->info;
317
318 FOR_ALL_CHANNELS {
319 dimm_attr *dimmA = 0;
320 dimm_attr *dimmB = 0;
321 reg = 0;
322 val32 = 0;
323 if (info->dimm[channel][0].size_mb >=
324 info->dimm[channel][1].size_mb) {
325 // dimm 0 is bigger, set it to dimmA
326 dimmA = &info->dimm[channel][0];
327 dimmB = &info->dimm[channel][1];
328 reg |= (0 << 16);
329 } else {
330 // dimm 1 is bigger, set it to dimmA
331 dimmA = &info->dimm[channel][1];
332 dimmB = &info->dimm[channel][0];
333 reg |= (1 << 16);
334 }
335 // dimmA
336 if (dimmA && (dimmA->ranks > 0)) {
337 val32 = dimmA->size_mb / 256;
338 reg = (reg & ~0xff) | val32;
339 val32 = dimmA->ranks - 1;
340 reg = (reg & ~0x20000) | (val32 << 17);
341 val32 = (dimmA->width / 8) - 1;
342 reg = (reg & ~0x80000) | (val32 << 19);
343 }
344 // dimmB
345 if (dimmB && (dimmB->ranks > 0)) {
346 val32 = dimmB->size_mb / 256;
347 reg = (reg & ~0xff00) | (val32 << 8);
348 val32 = dimmB->ranks - 1;
349 reg = (reg & ~0x40000) | (val32 << 18);
350 val32 = (dimmB->width / 8) - 1;
351 reg = (reg & ~0x100000) | (val32 << 20);
352 }
353 reg = (reg & ~0x200000) | (1 << 21); // rank interleave
354 reg = (reg & ~0x400000) | (1 << 22); // enhanced interleave
355
356 // Save MAD-DIMM register
357 if ((dimmA && (dimmA->ranks > 0))
358 || (dimmB && (dimmB->ranks > 0))) {
359 ctrl->mad_dimm[channel] = reg;
360 } else {
361 ctrl->mad_dimm[channel] = 0;
362 }
363 }
364}
365
366void dram_dimm_set_mapping(ramctr_timing * ctrl)
367{
368 int channel;
369 FOR_ALL_CHANNELS {
370 MCHBAR32(0x5004 + channel * 4) = ctrl->mad_dimm[channel];
371 }
372}
373
374void dram_zones(ramctr_timing * ctrl, int training)
375{
376 u32 reg, ch0size, ch1size;
377 u8 val;
378 reg = 0;
379 val = 0;
380 if (training) {
381 ch0size = ctrl->channel_size_mb[0] ? 256 : 0;
382 ch1size = ctrl->channel_size_mb[1] ? 256 : 0;
383 } else {
384 ch0size = ctrl->channel_size_mb[0];
385 ch1size = ctrl->channel_size_mb[1];
386 }
387
388 if (ch0size >= ch1size) {
389 reg = MCHBAR32(0x5014);
390 val = ch1size / 256;
391 reg = (reg & ~0xff000000) | val << 24;
392 reg = (reg & ~0xff0000) | (2 * val) << 16;
393 MCHBAR32(0x5014) = reg;
394 MCHBAR32(0x5000) = 0x24;
395 } else {
396 reg = MCHBAR32(0x5014);
397 val = ch0size / 256;
398 reg = (reg & ~0xff000000) | val << 24;
399 reg = (reg & ~0xff0000) | (2 * val) << 16;
400 MCHBAR32(0x5014) = reg;
401 MCHBAR32(0x5000) = 0x21;
402 }
403}
404
405#define HOST_BRIDGE PCI_DEVFN(0, 0)
406#define DEFAULT_TCK TCK_800MHZ
407
408unsigned int get_mem_min_tck(void)
409{
410 u32 reg32;
411 u8 rev;
412 const struct device *dev;
413 const struct northbridge_intel_sandybridge_config *cfg = NULL;
414
415 dev = dev_find_slot(0, HOST_BRIDGE);
416 if (dev)
417 cfg = dev->chip_info;
418
419 /* If this is zero, it just means devicetree.cb didn't set it */
420 if (!cfg || cfg->max_mem_clock_mhz == 0) {
421 rev = pci_read_config8(PCI_DEV(0, 0, 0), PCI_DEVICE_ID);
422
423 if ((rev & BASE_REV_MASK) == BASE_REV_SNB) {
424 /* read Capabilities A Register DMFC bits */
425 reg32 = pci_read_config32(PCI_DEV(0, 0, 0), CAPID0_A);
426 reg32 &= 0x7;
427
428 switch (reg32) {
429 case 7: return TCK_533MHZ;
430 case 6: return TCK_666MHZ;
431 case 5: return TCK_800MHZ;
432 /* reserved: */
433 default:
434 break;
435 }
436 } else {
437 /* read Capabilities B Register DMFC bits */
438 reg32 = pci_read_config32(PCI_DEV(0, 0, 0), CAPID0_B);
439 reg32 = (reg32 >> 4) & 0x7;
440
441 switch (reg32) {
442 case 7: return TCK_533MHZ;
443 case 6: return TCK_666MHZ;
444 case 5: return TCK_800MHZ;
445 case 4: return TCK_933MHZ;
446 case 3: return TCK_1066MHZ;
447 case 2: return TCK_1200MHZ;
448 case 1: return TCK_1333MHZ;
449 /* reserved: */
450 default:
451 break;
452 }
453 }
454 return DEFAULT_TCK;
455 } else {
456 if (cfg->max_mem_clock_mhz >= 1066)
457 return TCK_1066MHZ;
458 else if (cfg->max_mem_clock_mhz >= 933)
459 return TCK_933MHZ;
460 else if (cfg->max_mem_clock_mhz >= 800)
461 return TCK_800MHZ;
462 else if (cfg->max_mem_clock_mhz >= 666)
463 return TCK_666MHZ;
464 else if (cfg->max_mem_clock_mhz >= 533)
465 return TCK_533MHZ;
466 else
467 return TCK_400MHZ;
468 }
469}
470
471#define DEFAULT_PCI_MMIO_SIZE 2048
472
473static unsigned int get_mmio_size(void)
474{
475 const struct device *dev;
476 const struct northbridge_intel_sandybridge_config *cfg = NULL;
477
478 dev = dev_find_slot(0, HOST_BRIDGE);
479 if (dev)
480 cfg = dev->chip_info;
481
482 /* If this is zero, it just means devicetree.cb didn't set it */
483 if (!cfg || cfg->pci_mmio_size == 0)
484 return DEFAULT_PCI_MMIO_SIZE;
485 else
486 return cfg->pci_mmio_size;
487}
488
489void dram_memorymap(ramctr_timing * ctrl, int me_uma_size)
490{
491 u32 reg, val, reclaim;
492 u32 tom, gfxstolen, gttsize;
493 size_t tsegsize, mmiosize, toludbase, touudbase, gfxstolenbase, gttbase,
494 tsegbase, mestolenbase;
495 size_t tsegbasedelta, remapbase, remaplimit;
496 uint16_t ggc;
497
498 mmiosize = get_mmio_size();
499
500 ggc = pci_read_config16(NORTHBRIDGE, GGC);
501 if (!(ggc & 2)) {
502 gfxstolen = ((ggc >> 3) & 0x1f) * 32;
503 gttsize = ((ggc >> 8) & 0x3);
504 } else {
505 gfxstolen = 0;
506 gttsize = 0;
507 }
508
509 tsegsize = CONFIG_SMM_TSEG_SIZE >> 20;
510
511 tom = ctrl->channel_size_mb[0] + ctrl->channel_size_mb[1];
512
513 mestolenbase = tom - me_uma_size;
514
515 toludbase = MIN(4096 - mmiosize + gfxstolen + gttsize + tsegsize,
516 tom - me_uma_size);
517 gfxstolenbase = toludbase - gfxstolen;
518 gttbase = gfxstolenbase - gttsize;
519
520 tsegbase = gttbase - tsegsize;
521
522 // Round tsegbase down to nearest address aligned to tsegsize
523 tsegbasedelta = tsegbase & (tsegsize - 1);
524 tsegbase &= ~(tsegsize - 1);
525
526 gttbase -= tsegbasedelta;
527 gfxstolenbase -= tsegbasedelta;
528 toludbase -= tsegbasedelta;
529
530 // Test if it is possible to reclaim a hole in the RAM addressing
531 if (tom - me_uma_size > toludbase) {
532 // Reclaim is possible
533 reclaim = 1;
534 remapbase = MAX(4096, tom - me_uma_size);
535 remaplimit =
536 remapbase + MIN(4096, tom - me_uma_size) - toludbase - 1;
537 touudbase = remaplimit + 1;
538 } else {
539 // Reclaim not possible
540 reclaim = 0;
541 touudbase = tom - me_uma_size;
542 }
543
544 // Update memory map in pci-e configuration space
545 printk(BIOS_DEBUG, "Update PCI-E configuration space:\n");
546
547 // TOM (top of memory)
548 reg = pcie_read_config32(PCI_DEV(0, 0, 0), 0xa0);
549 val = tom & 0xfff;
550 reg = (reg & ~0xfff00000) | (val << 20);
551 printk(BIOS_DEBUG, "PCI(0, 0, 0)[%x] = %x\n", 0xa0, reg);
552 pcie_write_config32(PCI_DEV(0, 0, 0), 0xa0, reg);
553
554 reg = pcie_read_config32(PCI_DEV(0, 0, 0), 0xa4);
555 val = tom & 0xfffff000;
556 reg = (reg & ~0x000fffff) | (val >> 12);
557 printk(BIOS_DEBUG, "PCI(0, 0, 0)[%x] = %x\n", 0xa4, reg);
558 pcie_write_config32(PCI_DEV(0, 0, 0), 0xa4, reg);
559
560 // TOLUD (top of low used dram)
561 reg = pcie_read_config32(PCI_DEV(0, 0, 0), 0xbc);
562 val = toludbase & 0xfff;
563 reg = (reg & ~0xfff00000) | (val << 20);
564 printk(BIOS_DEBUG, "PCI(0, 0, 0)[%x] = %x\n", 0xbc, reg);
565 pcie_write_config32(PCI_DEV(0, 0, 0), 0xbc, reg);
566
567 // TOUUD LSB (top of upper usable dram)
568 reg = pcie_read_config32(PCI_DEV(0, 0, 0), 0xa8);
569 val = touudbase & 0xfff;
570 reg = (reg & ~0xfff00000) | (val << 20);
571 printk(BIOS_DEBUG, "PCI(0, 0, 0)[%x] = %x\n", 0xa8, reg);
572 pcie_write_config32(PCI_DEV(0, 0, 0), 0xa8, reg);
573
574 // TOUUD MSB
575 reg = pcie_read_config32(PCI_DEV(0, 0, 0), 0xac);
576 val = touudbase & 0xfffff000;
577 reg = (reg & ~0x000fffff) | (val >> 12);
578 printk(BIOS_DEBUG, "PCI(0, 0, 0)[%x] = %x\n", 0xac, reg);
579 pcie_write_config32(PCI_DEV(0, 0, 0), 0xac, reg);
580
581 if (reclaim) {
582 // REMAP BASE
583 pcie_write_config32(PCI_DEV(0, 0, 0), 0x90, remapbase << 20);
584 pcie_write_config32(PCI_DEV(0, 0, 0), 0x94, remapbase >> 12);
585
586 // REMAP LIMIT
587 pcie_write_config32(PCI_DEV(0, 0, 0), 0x98, remaplimit << 20);
588 pcie_write_config32(PCI_DEV(0, 0, 0), 0x9c, remaplimit >> 12);
589 }
590 // TSEG
591 reg = pcie_read_config32(PCI_DEV(0, 0, 0), 0xb8);
592 val = tsegbase & 0xfff;
593 reg = (reg & ~0xfff00000) | (val << 20);
594 printk(BIOS_DEBUG, "PCI(0, 0, 0)[%x] = %x\n", 0xb8, reg);
595 pcie_write_config32(PCI_DEV(0, 0, 0), 0xb8, reg);
596
597 // GFX stolen memory
598 reg = pcie_read_config32(PCI_DEV(0, 0, 0), 0xb0);
599 val = gfxstolenbase & 0xfff;
600 reg = (reg & ~0xfff00000) | (val << 20);
601 printk(BIOS_DEBUG, "PCI(0, 0, 0)[%x] = %x\n", 0xb0, reg);
602 pcie_write_config32(PCI_DEV(0, 0, 0), 0xb0, reg);
603
604 // GTT stolen memory
605 reg = pcie_read_config32(PCI_DEV(0, 0, 0), 0xb4);
606 val = gttbase & 0xfff;
607 reg = (reg & ~0xfff00000) | (val << 20);
608 printk(BIOS_DEBUG, "PCI(0, 0, 0)[%x] = %x\n", 0xb4, reg);
609 pcie_write_config32(PCI_DEV(0, 0, 0), 0xb4, reg);
610
611 if (me_uma_size) {
612 reg = pcie_read_config32(PCI_DEV(0, 0, 0), 0x7c);
613 val = (0x80000 - me_uma_size) & 0xfffff000;
614 reg = (reg & ~0x000fffff) | (val >> 12);
615 printk(BIOS_DEBUG, "PCI(0, 0, 0)[%x] = %x\n", 0x7c, reg);
616 pcie_write_config32(PCI_DEV(0, 0, 0), 0x7c, reg);
617
618 // ME base
619 reg = pcie_read_config32(PCI_DEV(0, 0, 0), 0x70);
620 val = mestolenbase & 0xfff;
621 reg = (reg & ~0xfff00000) | (val << 20);
622 printk(BIOS_DEBUG, "PCI(0, 0, 0)[%x] = %x\n", 0x70, reg);
623 pcie_write_config32(PCI_DEV(0, 0, 0), 0x70, reg);
624
625 reg = pcie_read_config32(PCI_DEV(0, 0, 0), 0x74);
626 val = mestolenbase & 0xfffff000;
627 reg = (reg & ~0x000fffff) | (val >> 12);
628 printk(BIOS_DEBUG, "PCI(0, 0, 0)[%x] = %x\n", 0x74, reg);
629 pcie_write_config32(PCI_DEV(0, 0, 0), 0x74, reg);
630
631 // ME mask
632 reg = pcie_read_config32(PCI_DEV(0, 0, 0), 0x78);
633 val = (0x80000 - me_uma_size) & 0xfff;
634 reg = (reg & ~0xfff00000) | (val << 20);
635 reg = (reg & ~0x400) | (1 << 10); // set lockbit on ME mem
636
637 reg = (reg & ~0x800) | (1 << 11); // set ME memory enable
638 printk(BIOS_DEBUG, "PCI(0, 0, 0)[%x] = %x\n", 0x78, reg);
639 pcie_write_config32(PCI_DEV(0, 0, 0), 0x78, reg);
640 }
641}
642
643static void wait_428c(int channel)
644{
645 while (1) {
646 if (read32(DEFAULT_MCHBAR + 0x428c + (channel << 10)) & 0x50)
647 return;
648 }
649}
650
651static void write_reset(ramctr_timing * ctrl)
652{
653 int channel, slotrank;
654
655 /* choose a populated channel. */
656 channel = (ctrl->rankmap[0]) ? 0 : 1;
657
658 wait_428c(channel);
659
660 /* choose a populated rank. */
661 slotrank = (ctrl->rankmap[channel] & 1) ? 0 : 2;
662
663 /* DRAM command ZQCS */
664 write32(DEFAULT_MCHBAR + 0x4220 + 0x400 * channel, 0x0f003);
665 write32(DEFAULT_MCHBAR + 0x4230 + 0x400 * channel, 0x80c01);
666
667 write32(DEFAULT_MCHBAR + 0x4200 + 0x400 * channel,
668 (slotrank << 24) | 0x60000);
669
670 write32(DEFAULT_MCHBAR + 0x4210 + 0x400 * channel, 0);
671
672 write32(DEFAULT_MCHBAR + 0x4284 + 0x400 * channel, 0x400001);
673 wait_428c(channel);
674}
675
676void dram_jedecreset(ramctr_timing * ctrl)
677{
678 u32 reg, addr;
679 int channel;
680
681 while (!(MCHBAR32(0x5084) & 0x10000));
682 do {
683 reg = MCHBAR32(0x428c);
684 } while ((reg & 0x14) == 0);
685
686 // Set state of memory controller
687 reg = 0x112;
688 MCHBAR32(0x5030) = reg;
689 MCHBAR32(0x4ea0) = 0;
690 reg |= 2; //ddr reset
691 MCHBAR32(0x5030) = reg;
692
693 // Assert dimm reset signal
694 reg = MCHBAR32(0x5030);
695 reg &= ~0x2;
696 MCHBAR32(0x5030) = reg;
697
698 // Wait 200us
699 udelay(200);
700
701 // Deassert dimm reset signal
702 MCHBAR32(0x5030) |= 2;
703
704 // Wait 500us
705 udelay(500);
706
707 // Enable DCLK
708 MCHBAR32(0x5030) |= 4;
709
710 // XXX Wait 20ns
711 udelay(1);
712
713 FOR_ALL_CHANNELS {
714 // Set valid rank CKE
715 reg = 0;
716 reg = (reg & ~0xf) | ctrl->rankmap[channel];
717 addr = 0x400 * channel + 0x42a0;
718 MCHBAR32(addr) = reg;
719
720 // Wait 10ns for ranks to settle
721 //udelay(0.01);
722
723 reg = (reg & ~0xf0) | (ctrl->rankmap[channel] << 4);
724 MCHBAR32(addr) = reg;
725
726 // Write reset using a NOP
727 write_reset(ctrl);
728 }
729}
730
731static odtmap get_ODT(ramctr_timing *ctrl, u8 rank, int channel)
732{
733 /* Get ODT based on rankmap: */
734 int dimms_per_ch = (ctrl->rankmap[channel] & 1)
735 + ((ctrl->rankmap[channel] >> 2) & 1);
736
737 if (dimms_per_ch == 1) {
738 return (const odtmap){60, 60};
739 } else {
740 return (const odtmap){120, 30};
741 }
742}
743
744static void write_mrreg(ramctr_timing *ctrl, int channel, int slotrank,
745 int reg, u32 val)
746{
747 wait_428c(channel);
748
749 if (ctrl->rank_mirror[channel][slotrank]) {
750 /* DDR3 Rank1 Address mirror
751 * swap the following pins:
752 * A3<->A4, A5<->A6, A7<->A8, BA0<->BA1 */
753 reg = ((reg >> 1) & 1) | ((reg << 1) & 2);
754 val = (val & ~0x1f8) | ((val >> 1) & 0xa8)
755 | ((val & 0xa8) << 1);
756 }
757
758 /* DRAM command MRS */
759 write32(DEFAULT_MCHBAR + 0x4220 + 0x400 * channel, 0x0f000);
760 write32(DEFAULT_MCHBAR + 0x4230 + 0x400 * channel, 0x41001);
761 write32(DEFAULT_MCHBAR + 0x4200 + 0x400 * channel,
762 (slotrank << 24) | (reg << 20) | val | 0x60000);
763 write32(DEFAULT_MCHBAR + 0x4210 + 0x400 * channel, 0);
764
765 /* DRAM command MRS */
766 write32(DEFAULT_MCHBAR + 0x4224 + 0x400 * channel, 0x1f000);
767 write32(DEFAULT_MCHBAR + 0x4234 + 0x400 * channel, 0x41001);
768 write32(DEFAULT_MCHBAR + 0x4204 + 0x400 * channel,
769 (slotrank << 24) | (reg << 20) | val | 0x60000);
770 write32(DEFAULT_MCHBAR + 0x4214 + 0x400 * channel, 0);
771
772 /* DRAM command MRS */
773 write32(DEFAULT_MCHBAR + 0x4228 + 0x400 * channel, 0x0f000);
774 write32(DEFAULT_MCHBAR + 0x4238 + 0x400 * channel,
775 0x1001 | (ctrl->tMOD << 16));
776 write32(DEFAULT_MCHBAR + 0x4208 + 0x400 * channel,
777 (slotrank << 24) | (reg << 20) | val | 0x60000);
778 write32(DEFAULT_MCHBAR + 0x4218 + 0x400 * channel, 0);
779 write32(DEFAULT_MCHBAR + 0x4284 + 0x400 * channel, 0x80001);
780}
781
782static u32 make_mr0(ramctr_timing * ctrl, u8 rank)
783{
784 u16 mr0reg, mch_cas, mch_wr;
785 static const u8 mch_wr_t[12] = { 1, 2, 3, 4, 0, 5, 0, 6, 0, 7, 0, 0 };
786
787 /* DLL Reset - self clearing - set after CLK frequency has been changed */
788 mr0reg = 0x100;
789
790 // Convert CAS to MCH register friendly
791 if (ctrl->CAS < 12) {
792 mch_cas = (u16) ((ctrl->CAS - 4) << 1);
793 } else {
794 mch_cas = (u16) (ctrl->CAS - 12);
795 mch_cas = ((mch_cas << 1) | 0x1);
796 }
797
798 // Convert tWR to MCH register friendly
799 mch_wr = mch_wr_t[ctrl->tWR - 5];
800
801 mr0reg = (mr0reg & ~0x4) | ((mch_cas & 0x1) << 2);
802 mr0reg = (mr0reg & ~0x70) | ((mch_cas & 0xe) << 3);
803 mr0reg = (mr0reg & ~0xe00) | (mch_wr << 9);
804
805 // Precharge PD - Fast (desktop) 0x1 or slow (mobile) 0x0 - mostly power-saving feature
806 mr0reg = (mr0reg & ~0x1000) | (!ctrl->mobile << 12);
807 return mr0reg;
808}
809
810static void dram_mr0(ramctr_timing *ctrl, u8 rank, int channel)
811{
812 write_mrreg(ctrl, channel, rank, 0,
813 make_mr0(ctrl, rank));
814}
815
816static u32 encode_odt(u32 odt)
817{
818 switch (odt) {
819 case 30:
820 return (1 << 9) | (1 << 2); // RZQ/8, RZQ/4
821 case 60:
822 return (1 << 2); // RZQ/4
823 case 120:
824 return (1 << 6); // RZQ/2
825 default:
826 case 0:
827 return 0;
828 }
829}
830
831static u32 make_mr1(ramctr_timing *ctrl, u8 rank, int channel)
832{
833 odtmap odt;
834 u32 mr1reg;
835
836 odt = get_ODT(ctrl, rank, channel);
837 mr1reg = 0x2;
838
839 mr1reg |= encode_odt(odt.rttnom);
840
841 return mr1reg;
842}
843
844static void dram_mr1(ramctr_timing *ctrl, u8 rank, int channel)
845{
846 u16 mr1reg;
847
848 mr1reg = make_mr1(ctrl, rank, channel);
849
850 write_mrreg(ctrl, channel, rank, 1, mr1reg);
851}
852
853static void dram_mr2(ramctr_timing *ctrl, u8 rank, int channel)
854{
855 u16 pasr, cwl, mr2reg;
856 odtmap odt;
857 int srt;
858
859 pasr = 0;
860 cwl = ctrl->CWL - 5;
861 odt = get_ODT(ctrl, rank, channel);
862
863 srt = ctrl->extended_temperature_range && !ctrl->auto_self_refresh;
864
865 mr2reg = 0;
866 mr2reg = (mr2reg & ~0x7) | pasr;
867 mr2reg = (mr2reg & ~0x38) | (cwl << 3);
868 mr2reg = (mr2reg & ~0x40) | (ctrl->auto_self_refresh << 6);
869 mr2reg = (mr2reg & ~0x80) | (srt << 7);
870 mr2reg |= (odt.rttwr / 60) << 9;
871
872 write_mrreg(ctrl, channel, rank, 2, mr2reg);
873}
874
875static void dram_mr3(ramctr_timing *ctrl, u8 rank, int channel)
876{
877 write_mrreg(ctrl, channel, rank, 3, 0);
878}
879
880void dram_mrscommands(ramctr_timing * ctrl)
881{
882 u8 slotrank;
883 u32 reg, addr;
884 int channel;
885
886 FOR_ALL_POPULATED_CHANNELS {
887 FOR_ALL_POPULATED_RANKS {
888 // MR2
889 dram_mr2(ctrl, slotrank, channel);
890
891 // MR3
892 dram_mr3(ctrl, slotrank, channel);
893
894 // MR1
895 dram_mr1(ctrl, slotrank, channel);
896
897 // MR0
898 dram_mr0(ctrl, slotrank, channel);
899 }
900 }
901
902 /* DRAM command NOP */
903 write32(DEFAULT_MCHBAR + 0x4e20, 0x7);
904 write32(DEFAULT_MCHBAR + 0x4e30, 0xf1001);
905 write32(DEFAULT_MCHBAR + 0x4e00, 0x60002);
906 write32(DEFAULT_MCHBAR + 0x4e10, 0);
907
908 /* DRAM command ZQCL */
909 write32(DEFAULT_MCHBAR + 0x4e24, 0x1f003);
910 write32(DEFAULT_MCHBAR + 0x4e34, 0x1901001);
911 write32(DEFAULT_MCHBAR + 0x4e04, 0x60400);
912 write32(DEFAULT_MCHBAR + 0x4e14, 0x288);
913
914 /* execute command queue on all channels ? */
915 write32(DEFAULT_MCHBAR + 0x4e84, 0x40004);
916
917 // Drain
918 FOR_ALL_CHANNELS {
919 // Wait for ref drained
920 wait_428c(channel);
921 }
922
923 // Refresh enable
924 MCHBAR32(0x5030) |= 8;
925
926 FOR_ALL_POPULATED_CHANNELS {
927 addr = 0x400 * channel + 0x4020;
928 reg = MCHBAR32(addr);
929 reg &= ~0x200000;
930 MCHBAR32(addr) = reg;
931
932 wait_428c(channel);
933
934 slotrank = (ctrl->rankmap[channel] & 1) ? 0 : 2;
935
936 // Drain
937 wait_428c(channel);
938
939 /* DRAM command ZQCS */
940 write32(DEFAULT_MCHBAR + 0x4220 + 0x400 * channel, 0x0f003);
941 write32(DEFAULT_MCHBAR + 0x4230 + 0x400 * channel, 0x659001);
942 write32(DEFAULT_MCHBAR + 0x4200 + 0x400 * channel,
943 (slotrank << 24) | 0x60000);
944 write32(DEFAULT_MCHBAR + 0x4210 + 0x400 * channel, 0x3e0);
945 write32(DEFAULT_MCHBAR + 0x4284 + 0x400 * channel, 0x1);
946
947 // Drain
948 wait_428c(channel);
949 }
950}
951
952static const u32 lane_registers[] = {
953 0x0000, 0x0200, 0x0400, 0x0600,
954 0x1000, 0x1200, 0x1400, 0x1600,
955 0x0800
956};
957
958void program_timings(ramctr_timing * ctrl, int channel)
959{
960 u32 reg32, reg_4024, reg_c14, reg_c18, reg_4028;
961 int lane;
962 int slotrank, slot;
963 int full_shift = 0;
964 u16 slot320c[NUM_SLOTS];
965
966 FOR_ALL_POPULATED_RANKS {
967 if (full_shift < -ctrl->timings[channel][slotrank].val_320c)
968 full_shift = -ctrl->timings[channel][slotrank].val_320c;
969 }
970
971 for (slot = 0; slot < NUM_SLOTS; slot++)
972 switch ((ctrl->rankmap[channel] >> (2 * slot)) & 3) {
973 case 0:
974 default:
975 slot320c[slot] = 0x7f;
976 break;
977 case 1:
978 slot320c[slot] =
979 ctrl->timings[channel][2 * slot + 0].val_320c +
980 full_shift;
981 break;
982 case 2:
983 slot320c[slot] =
984 ctrl->timings[channel][2 * slot + 1].val_320c +
985 full_shift;
986 break;
987 case 3:
988 slot320c[slot] =
989 (ctrl->timings[channel][2 * slot].val_320c +
990 ctrl->timings[channel][2 * slot +
991 1].val_320c) / 2 +
992 full_shift;
993 break;
994 }
995
996 /* enable CMD XOVER */
997 reg32 = get_XOVER_CMD(ctrl->rankmap[channel]);
998 reg32 |= ((slot320c[0] & 0x3f) << 6) | ((slot320c[0] & 0x40) << 9);
999 reg32 |= (slot320c[1] & 0x7f) << 18;
1000 reg32 |= (full_shift & 0x3f) | ((full_shift & 0x40) << 6);
1001
1002 MCHBAR32(0x320c + 0x100 * channel) = reg32;
1003
1004 /* enable CLK XOVER */
1005 reg_c14 = get_XOVER_CLK(ctrl->rankmap[channel]);
1006 reg_c18 = 0;
1007
1008 FOR_ALL_POPULATED_RANKS {
1009 int shift =
1010 ctrl->timings[channel][slotrank].val_320c + full_shift;
1011 int offset_val_c14;
1012 if (shift < 0)
1013 shift = 0;
1014 offset_val_c14 = ctrl->reg_c14_offset + shift;
1015 /* set CLK phase shift */
1016 reg_c14 |= (offset_val_c14 & 0x3f) << (6 * slotrank);
1017 reg_c18 |= ((offset_val_c14 >> 6) & 1) << slotrank;
1018 }
1019
1020 MCHBAR32(0xc14 + channel * 0x100) = reg_c14;
1021 MCHBAR32(0xc18 + channel * 0x100) = reg_c18;
1022
1023 reg_4028 = MCHBAR32(0x4028 + 0x400 * channel);
1024 reg_4028 &= 0xffff0000;
1025
1026 reg_4024 = 0;
1027
1028 FOR_ALL_POPULATED_RANKS {
1029 int post_timA_min_high = 7, post_timA_max_high = 0;
1030 int pre_timA_min_high = 7, pre_timA_max_high = 0;
1031 int shift_402x = 0;
1032 int shift =
1033 ctrl->timings[channel][slotrank].val_320c + full_shift;
1034
1035 if (shift < 0)
1036 shift = 0;
1037
1038 FOR_ALL_LANES {
1039 if (post_timA_min_high >
1040 ((ctrl->timings[channel][slotrank].lanes[lane].
1041 timA + shift) >> 6))
1042 post_timA_min_high =
1043 ((ctrl->timings[channel][slotrank].
1044 lanes[lane].timA + shift) >> 6);
1045 if (pre_timA_min_high >
1046 (ctrl->timings[channel][slotrank].lanes[lane].
1047 timA >> 6))
1048 pre_timA_min_high =
1049 (ctrl->timings[channel][slotrank].
1050 lanes[lane].timA >> 6);
1051 if (post_timA_max_high <
1052 ((ctrl->timings[channel][slotrank].lanes[lane].
1053 timA + shift) >> 6))
1054 post_timA_max_high =
1055 ((ctrl->timings[channel][slotrank].
1056 lanes[lane].timA + shift) >> 6);
1057 if (pre_timA_max_high <
1058 (ctrl->timings[channel][slotrank].lanes[lane].
1059 timA >> 6))
1060 pre_timA_max_high =
1061 (ctrl->timings[channel][slotrank].
1062 lanes[lane].timA >> 6);
1063 }
1064
1065 if (pre_timA_max_high - pre_timA_min_high <
1066 post_timA_max_high - post_timA_min_high)
1067 shift_402x = +1;
1068 else if (pre_timA_max_high - pre_timA_min_high >
1069 post_timA_max_high - post_timA_min_high)
1070 shift_402x = -1;
1071
1072 reg_4028 |=
1073 (ctrl->timings[channel][slotrank].val_4028 + shift_402x -
1074 post_timA_min_high) << (4 * slotrank);
1075 reg_4024 |=
1076 (ctrl->timings[channel][slotrank].val_4024 +
1077 shift_402x) << (8 * slotrank);
1078
1079 FOR_ALL_LANES {
1080 MCHBAR32(lane_registers[lane] + 0x10 + 0x100 * channel +
1081 4 * slotrank)
1082 =
1083 (((ctrl->timings[channel][slotrank].lanes[lane].
1084 timA + shift) & 0x3f)
1085 |
1086 ((ctrl->timings[channel][slotrank].lanes[lane].
1087 rising + shift) << 8)
1088 |
1089 (((ctrl->timings[channel][slotrank].lanes[lane].
1090 timA + shift -
1091 (post_timA_min_high << 6)) & 0x1c0) << 10)
1092 | ((ctrl->timings[channel][slotrank].lanes[lane].
1093 falling + shift) << 20));
1094
1095 MCHBAR32(lane_registers[lane] + 0x20 + 0x100 * channel +
1096 4 * slotrank)
1097 =
1098 (((ctrl->timings[channel][slotrank].lanes[lane].
1099 timC + shift) & 0x3f)
1100 |
1101 (((ctrl->timings[channel][slotrank].lanes[lane].
1102 timB + shift) & 0x3f) << 8)
1103 |
1104 (((ctrl->timings[channel][slotrank].lanes[lane].
1105 timB + shift) & 0x1c0) << 9)
1106 |
1107 (((ctrl->timings[channel][slotrank].lanes[lane].
1108 timC + shift) & 0x40) << 13));
1109 }
1110 }
1111 MCHBAR32(0x4024 + 0x400 * channel) = reg_4024;
1112 MCHBAR32(0x4028 + 0x400 * channel) = reg_4028;
1113}
1114
1115static void test_timA(ramctr_timing * ctrl, int channel, int slotrank)
1116{
1117 wait_428c(channel);
1118
1119 /* DRAM command MRS
1120 * write MR3 MPR enable
1121 * in this mode only RD and RDA are allowed
1122 * all reads return a predefined pattern */
1123 write32(DEFAULT_MCHBAR + 0x4220 + 0x400 * channel, 0x1f000);
1124 write32(DEFAULT_MCHBAR + 0x4230 + 0x400 * channel,
1125 (0xc01 | (ctrl->tMOD << 16)));
1126 write32(DEFAULT_MCHBAR + 0x4200 + 0x400 * channel,
1127 (slotrank << 24) | 0x360004);
1128 write32(DEFAULT_MCHBAR + 0x4210 + 0x400 * channel, 0);
1129
1130 /* DRAM command RD */
1131 write32(DEFAULT_MCHBAR + 0x4224 + 0x400 * channel, 0x1f105);
1132 write32(DEFAULT_MCHBAR + 0x4234 + 0x400 * channel, 0x4040c01);
1133 write32(DEFAULT_MCHBAR + 0x4204 + 0x400 * channel, (slotrank << 24));
1134 write32(DEFAULT_MCHBAR + 0x4214 + 0x400 * channel, 0);
1135
1136 /* DRAM command RD */
1137 write32(DEFAULT_MCHBAR + 0x4228 + 0x400 * channel, 0x1f105);
1138 write32(DEFAULT_MCHBAR + 0x4238 + 0x400 * channel,
1139 0x100f | ((ctrl->CAS + 36) << 16));
1140 write32(DEFAULT_MCHBAR + 0x4208 + 0x400 * channel,
1141 (slotrank << 24) | 0x60000);
1142 write32(DEFAULT_MCHBAR + 0x4218 + 0x400 * channel, 0);
1143
1144 /* DRAM command MRS
1145 * write MR3 MPR disable */
1146 write32(DEFAULT_MCHBAR + 0x422c + 0x400 * channel, 0x1f000);
1147 write32(DEFAULT_MCHBAR + 0x423c + 0x400 * channel,
1148 (0xc01 | (ctrl->tMOD << 16)));
1149 write32(DEFAULT_MCHBAR + 0x420c + 0x400 * channel,
1150 (slotrank << 24) | 0x360000);
1151 write32(DEFAULT_MCHBAR + 0x421c + 0x400 * channel, 0);
1152
1153 write32(DEFAULT_MCHBAR + 0x4284 + 0x400 * channel, 0xc0001);
1154
1155 wait_428c(channel);
1156}
1157
1158static int does_lane_work(ramctr_timing * ctrl, int channel, int slotrank,
1159 int lane)
1160{
1161 u32 timA = ctrl->timings[channel][slotrank].lanes[lane].timA;
1162 return ((read32
1163 (DEFAULT_MCHBAR + lane_registers[lane] + channel * 0x100 + 4 +
1164 ((timA / 32) & 1) * 4)
1165 >> (timA % 32)) & 1);
1166}
1167
1168struct run {
1169 int middle;
1170 int end;
1171 int start;
1172 int all;
1173 int length;
1174};
1175
1176static struct run get_longest_zero_run(int *seq, int sz)
1177{
1178 int i, ls;
1179 int bl = 0, bs = 0;
1180 struct run ret;
1181
1182 ls = 0;
1183 for (i = 0; i < 2 * sz; i++)
1184 if (seq[i % sz]) {
1185 if (i - ls > bl) {
1186 bl = i - ls;
1187 bs = ls;
1188 }
1189 ls = i + 1;
1190 }
1191 if (bl == 0) {
1192 ret.middle = sz / 2;
1193 ret.start = 0;
1194 ret.end = sz;
1195 ret.all = 1;
1196 return ret;
1197 }
1198
1199 ret.start = bs % sz;
1200 ret.end = (bs + bl - 1) % sz;
1201 ret.middle = (bs + (bl - 1) / 2) % sz;
1202 ret.length = bl;
1203 ret.all = 0;
1204
1205 return ret;
1206}
1207
1208static void discover_timA_coarse(ramctr_timing * ctrl, int channel,
1209 int slotrank, int *upperA)
1210{
1211 int timA;
1212 int statistics[NUM_LANES][128];
1213 int lane;
1214
1215 for (timA = 0; timA < 128; timA++) {
1216 FOR_ALL_LANES {
1217 ctrl->timings[channel][slotrank].lanes[lane].timA = timA;
1218 }
1219 program_timings(ctrl, channel);
1220
1221 test_timA(ctrl, channel, slotrank);
1222
1223 FOR_ALL_LANES {
1224 statistics[lane][timA] =
1225 !does_lane_work(ctrl, channel, slotrank, lane);
1226 printram("Astat: %d, %d, %d: %x, %x\n",
1227 channel, slotrank, lane, timA,
1228 statistics[lane][timA]);
1229 }
1230 }
1231 FOR_ALL_LANES {
1232 struct run rn = get_longest_zero_run(statistics[lane], 128);
1233 ctrl->timings[channel][slotrank].lanes[lane].timA = rn.middle;
1234 upperA[lane] = rn.end;
1235 if (upperA[lane] < rn.middle)
1236 upperA[lane] += 128;
1237 printram("Aval: %d, %d, %d: %x\n", channel, slotrank,
1238 lane, ctrl->timings[channel][slotrank].lanes[lane].timA);
1239 printram("Aend: %d, %d, %d: %x\n", channel, slotrank,
1240 lane, upperA[lane]);
1241 }
1242}
1243
1244static void discover_timA_fine(ramctr_timing * ctrl, int channel, int slotrank,
1245 int *upperA)
1246{
1247 int timA_delta;
1248 int statistics[NUM_LANES][51];
1249 int lane, i;
1250
1251 memset(statistics, 0, sizeof(statistics));
1252
1253 for (timA_delta = -25; timA_delta <= 25; timA_delta++) {
1254 FOR_ALL_LANES ctrl->timings[channel][slotrank].lanes[lane].
1255 timA = upperA[lane] + timA_delta + 0x40;
1256 program_timings(ctrl, channel);
1257
1258 for (i = 0; i < 100; i++) {
1259 test_timA(ctrl, channel, slotrank);
1260 FOR_ALL_LANES {
1261 statistics[lane][timA_delta + 25] +=
1262 does_lane_work(ctrl, channel, slotrank,
1263 lane);
1264 }
1265 }
1266 }
1267 FOR_ALL_LANES {
1268 int last_zero, first_all;
1269
1270 for (last_zero = -25; last_zero <= 25; last_zero++)
1271 if (statistics[lane][last_zero + 25])
1272 break;
1273 last_zero--;
1274 for (first_all = -25; first_all <= 25; first_all++)
1275 if (statistics[lane][first_all + 25] == 100)
1276 break;
1277
1278 printram("lane %d: %d, %d\n", lane, last_zero,
1279 first_all);
1280
1281 ctrl->timings[channel][slotrank].lanes[lane].timA =
1282 (last_zero + first_all) / 2 + upperA[lane];
1283 printram("Aval: %d, %d, %d: %x\n", channel, slotrank,
1284 lane, ctrl->timings[channel][slotrank].lanes[lane].timA);
1285 }
1286}
1287
1288static int discover_402x(ramctr_timing *ctrl, int channel, int slotrank,
1289 int *upperA)
1290{
1291 int works[NUM_LANES];
1292 int lane;
1293 while (1) {
1294 int all_works = 1, some_works = 0;
1295 program_timings(ctrl, channel);
1296 test_timA(ctrl, channel, slotrank);
1297 FOR_ALL_LANES {
1298 works[lane] =
1299 !does_lane_work(ctrl, channel, slotrank, lane);
1300 if (works[lane])
1301 some_works = 1;
1302 else
1303 all_works = 0;
1304 }
1305 if (all_works)
1306 return 0;
1307 if (!some_works) {
1308 if (ctrl->timings[channel][slotrank].val_4024 < 2) {
1309 printk(BIOS_EMERG, "402x discovery failed (1): %d, %d\n",
1310 channel, slotrank);
1311 return MAKE_ERR;
1312 }
1313 ctrl->timings[channel][slotrank].val_4024 -= 2;
1314 printram("4024 -= 2;\n");
1315 continue;
1316 }
1317 ctrl->timings[channel][slotrank].val_4028 += 2;
1318 printram("4028 += 2;\n");
1319 if (ctrl->timings[channel][slotrank].val_4028 >= 0x10) {
1320 printk(BIOS_EMERG, "402x discovery failed (2): %d, %d\n",
1321 channel, slotrank);
1322 return MAKE_ERR;
1323 }
1324 FOR_ALL_LANES if (works[lane]) {
1325 ctrl->timings[channel][slotrank].lanes[lane].timA +=
1326 128;
1327 upperA[lane] += 128;
1328 printram("increment %d, %d, %d\n", channel,
1329 slotrank, lane);
1330 }
1331 }
1332 return 0;
1333}
1334
1335struct timA_minmax {
1336 int timA_min_high, timA_max_high;
1337};
1338
1339static void pre_timA_change(ramctr_timing * ctrl, int channel, int slotrank,
1340 struct timA_minmax *mnmx)
1341{
1342 int lane;
1343 mnmx->timA_min_high = 7;
1344 mnmx->timA_max_high = 0;
1345
1346 FOR_ALL_LANES {
1347 if (mnmx->timA_min_high >
1348 (ctrl->timings[channel][slotrank].lanes[lane].timA >> 6))
1349 mnmx->timA_min_high =
1350 (ctrl->timings[channel][slotrank].lanes[lane].
1351 timA >> 6);
1352 if (mnmx->timA_max_high <
1353 (ctrl->timings[channel][slotrank].lanes[lane].timA >> 6))
1354 mnmx->timA_max_high =
1355 (ctrl->timings[channel][slotrank].lanes[lane].
1356 timA >> 6);
1357 }
1358}
1359
1360static void post_timA_change(ramctr_timing * ctrl, int channel, int slotrank,
1361 struct timA_minmax *mnmx)
1362{
1363 struct timA_minmax post;
1364 int shift_402x = 0;
1365
1366 /* Get changed maxima. */
1367 pre_timA_change(ctrl, channel, slotrank, &post);
1368
1369 if (mnmx->timA_max_high - mnmx->timA_min_high <
1370 post.timA_max_high - post.timA_min_high)
1371 shift_402x = +1;
1372 else if (mnmx->timA_max_high - mnmx->timA_min_high >
1373 post.timA_max_high - post.timA_min_high)
1374 shift_402x = -1;
1375 else
1376 shift_402x = 0;
1377
1378 ctrl->timings[channel][slotrank].val_4028 += shift_402x;
1379 ctrl->timings[channel][slotrank].val_4024 += shift_402x;
1380 printram("4024 += %d;\n", shift_402x);
1381 printram("4028 += %d;\n", shift_402x);
1382}
1383
1384/* Compensate the skew between DQS and DQs.
1385 * To ease PCB design a small skew between Data Strobe signals and
1386 * Data Signals is allowed.
1387 * The controller has to measure and compensate this skew for every byte-lane.
1388 * By delaying either all DQs signals or DQS signal, a full phase
1389 * shift can be introduced.
1390 * It is assumed that one byte-lane's DQs signals have the same routing delay.
1391 *
1392 * To measure the actual skew, the DRAM is placed in "read leveling" mode.
1393 * In read leveling mode the DRAM-chip outputs an alternating periodic pattern.
1394 * The memory controller iterates over all possible values to do a full phase shift
1395 * and issues read commands.
1396 * With DQS and DQs in phase the data read is expected to alternate on every byte:
1397 * 0xFF 0x00 0xFF ...
1398 * Once the controller has detected this pattern a bit in the result register is
1399 * set for the current phase shift.
1400 */
1401int read_training(ramctr_timing * ctrl)
1402{
1403 int channel, slotrank, lane;
1404 int err;
1405
1406 FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS {
1407 int all_high, some_high;
1408 int upperA[NUM_LANES];
1409 struct timA_minmax mnmx;
1410
1411 wait_428c(channel);
1412
1413 /* DRAM command PREA */
1414 write32(DEFAULT_MCHBAR + 0x4220 + 0x400 * channel, 0x1f002);
1415 write32(DEFAULT_MCHBAR + 0x4230 + 0x400 * channel,
1416 0xc01 | (ctrl->tRP << 16));
1417 write32(DEFAULT_MCHBAR + 0x4200 + 0x400 * channel,
1418 (slotrank << 24) | 0x60400);
1419 write32(DEFAULT_MCHBAR + 0x4210 + 0x400 * channel, 0);
1420 write32(DEFAULT_MCHBAR + 0x4284 + 0x400 * channel, 1);
1421
1422 write32(DEFAULT_MCHBAR + 0x3400, (slotrank << 2) | 0x8001);
1423
1424 ctrl->timings[channel][slotrank].val_4028 = 4;
1425 ctrl->timings[channel][slotrank].val_4024 = 55;
1426 program_timings(ctrl, channel);
1427
1428 discover_timA_coarse(ctrl, channel, slotrank, upperA);
1429
1430 all_high = 1;
1431 some_high = 0;
1432 FOR_ALL_LANES {
1433 if (ctrl->timings[channel][slotrank].lanes[lane].
1434 timA >= 0x40)
1435 some_high = 1;
1436 else
1437 all_high = 0;
1438 }
1439
1440 if (all_high) {
1441 ctrl->timings[channel][slotrank].val_4028--;
1442 printram("4028--;\n");
1443 FOR_ALL_LANES {
1444 ctrl->timings[channel][slotrank].lanes[lane].
1445 timA -= 0x40;
1446 upperA[lane] -= 0x40;
1447
1448 }
1449 } else if (some_high) {
1450 ctrl->timings[channel][slotrank].val_4024++;
1451 ctrl->timings[channel][slotrank].val_4028++;
1452 printram("4024++;\n");
1453 printram("4028++;\n");
1454 }
1455
1456 program_timings(ctrl, channel);
1457
1458 pre_timA_change(ctrl, channel, slotrank, &mnmx);
1459
1460 err = discover_402x(ctrl, channel, slotrank, upperA);
1461 if (err)
1462 return err;
1463
1464 post_timA_change(ctrl, channel, slotrank, &mnmx);
1465 pre_timA_change(ctrl, channel, slotrank, &mnmx);
1466
1467 discover_timA_fine(ctrl, channel, slotrank, upperA);
1468
1469 post_timA_change(ctrl, channel, slotrank, &mnmx);
1470 pre_timA_change(ctrl, channel, slotrank, &mnmx);
1471
1472 FOR_ALL_LANES {
1473 ctrl->timings[channel][slotrank].lanes[lane].timA -= mnmx.timA_min_high * 0x40;
1474 }
1475 ctrl->timings[channel][slotrank].val_4028 -= mnmx.timA_min_high;
1476 printram("4028 -= %d;\n", mnmx.timA_min_high);
1477
1478 post_timA_change(ctrl, channel, slotrank, &mnmx);
1479
1480 printram("4/8: %d, %d, %x, %x\n", channel, slotrank,
1481 ctrl->timings[channel][slotrank].val_4024,
1482 ctrl->timings[channel][slotrank].val_4028);
1483
1484 printram("final results:\n");
1485 FOR_ALL_LANES
1486 printram("Aval: %d, %d, %d: %x\n", channel, slotrank,
1487 lane,
1488 ctrl->timings[channel][slotrank].lanes[lane].timA);
1489
1490 write32(DEFAULT_MCHBAR + 0x3400, 0);
1491
1492 toggle_io_reset();
1493 }
1494
1495 FOR_ALL_POPULATED_CHANNELS {
1496 program_timings(ctrl, channel);
1497 }
1498 FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS FOR_ALL_LANES {
1499 write32(DEFAULT_MCHBAR + 0x4080 + 0x400 * channel
1500 + 4 * lane, 0);
1501 }
1502 return 0;
1503}
1504
1505static void test_timC(ramctr_timing * ctrl, int channel, int slotrank)
1506{
1507 int lane;
1508
1509 FOR_ALL_LANES {
1510 write32(DEFAULT_MCHBAR + 0x4340 + 0x400 * channel + 4 * lane, 0);
1511 read32(DEFAULT_MCHBAR + 0x4140 + 0x400 * channel + 4 * lane);
1512 }
1513
1514 wait_428c(channel);
1515
1516 /* DRAM command ACT */
1517 write32(DEFAULT_MCHBAR + 0x4220 + 0x400 * channel, 0x1f006);
1518 write32(DEFAULT_MCHBAR + 0x4230 + 0x400 * channel,
1519 (max((ctrl->tFAW >> 2) + 1, ctrl->tRRD) << 10)
1520 | 4 | (ctrl->tRCD << 16));
1521
1522 write32(DEFAULT_MCHBAR + 0x4200 + 0x400 * channel,
1523 (slotrank << 24) | (6 << 16));
1524
1525 write32(DEFAULT_MCHBAR + 0x4210 + 0x400 * channel, 0x244);
1526
1527 /* DRAM command NOP */
1528 write32(DEFAULT_MCHBAR + 0x4224 + 0x400 * channel, 0x1f207);
1529 write32(DEFAULT_MCHBAR + 0x4234 + 0x400 * channel, 0x8041001);
1530 write32(DEFAULT_MCHBAR + 0x4204 + 0x400 * channel,
1531 (slotrank << 24) | 8);
1532 write32(DEFAULT_MCHBAR + 0x4214 + 0x400 * channel, 0x3e0);
1533
1534 /* DRAM command WR */
1535 write32(DEFAULT_MCHBAR + 0x4228 + 0x400 * channel, 0x1f201);
1536 write32(DEFAULT_MCHBAR + 0x4238 + 0x400 * channel, 0x80411f4);
1537 write32(DEFAULT_MCHBAR + 0x4208 + 0x400 * channel, (slotrank << 24));
1538 write32(DEFAULT_MCHBAR + 0x4218 + 0x400 * channel, 0x242);
1539
1540 /* DRAM command NOP */
1541 write32(DEFAULT_MCHBAR + 0x422c + 0x400 * channel, 0x1f207);
1542 write32(DEFAULT_MCHBAR + 0x423c + 0x400 * channel,
1543 0x8000c01 | ((ctrl->CWL + ctrl->tWTR + 5) << 16));
1544 write32(DEFAULT_MCHBAR + 0x420c + 0x400 * channel,
1545 (slotrank << 24) | 8);
1546 write32(DEFAULT_MCHBAR + 0x421c + 0x400 * channel, 0x3e0);
1547
1548 write32(DEFAULT_MCHBAR + 0x4284 + 0x400 * channel, 0xc0001);
1549
1550 wait_428c(channel);
1551
1552 /* DRAM command PREA */
1553 write32(DEFAULT_MCHBAR + 0x4220 + 0x400 * channel, 0x1f002);
1554 write32(DEFAULT_MCHBAR + 0x4230 + 0x400 * channel,
1555 0xc01 | (ctrl->tRP << 16));
1556 write32(DEFAULT_MCHBAR + 0x4200 + 0x400 * channel,
1557 (slotrank << 24) | 0x60400);
1558 write32(DEFAULT_MCHBAR + 0x4210 + 0x400 * channel, 0x240);
1559
1560 /* DRAM command ACT */
1561 write32(DEFAULT_MCHBAR + 0x4224 + 0x400 * channel, 0x1f006);
1562 write32(DEFAULT_MCHBAR + 0x4234 + 0x400 * channel,
1563 (max(ctrl->tRRD, (ctrl->tFAW >> 2) + 1) << 10)
1564 | 8 | (ctrl->CAS << 16));
1565
1566 write32(DEFAULT_MCHBAR + 0x4204 + 0x400 * channel,
1567 (slotrank << 24) | 0x60000);
1568
1569 write32(DEFAULT_MCHBAR + 0x4214 + 0x400 * channel, 0x244);
1570
1571 /* DRAM command RD */
1572 write32(DEFAULT_MCHBAR + 0x4228 + 0x400 * channel, 0x1f105);
1573 write32(DEFAULT_MCHBAR + 0x4238 + 0x400 * channel,
1574 0x40011f4 | (max(ctrl->tRTP, 8) << 16));
1575 write32(DEFAULT_MCHBAR + 0x4208 + 0x400 * channel, (slotrank << 24));
1576 write32(DEFAULT_MCHBAR + 0x4218 + 0x400 * channel, 0x242);
1577
1578 /* DRAM command PREA */
1579 write32(DEFAULT_MCHBAR + 0x422c + 0x400 * channel, 0x1f002);
1580 write32(DEFAULT_MCHBAR + 0x423c + 0x400 * channel,
1581 0xc01 | (ctrl->tRP << 16));
1582 write32(DEFAULT_MCHBAR + 0x420c + 0x400 * channel,
1583 (slotrank << 24) | 0x60400);
1584 write32(DEFAULT_MCHBAR + 0x421c + 0x400 * channel, 0x240);
1585 write32(DEFAULT_MCHBAR + 0x4284 + 0x400 * channel, 0xc0001);
1586 wait_428c(channel);
1587}
1588
1589static int discover_timC(ramctr_timing *ctrl, int channel, int slotrank)
1590{
1591 int timC;
1592 int statistics[NUM_LANES][MAX_TIMC + 1];
1593 int lane;
1594
1595 wait_428c(channel);
1596
1597 /* DRAM command PREA */
1598 write32(DEFAULT_MCHBAR + 0x4220 + 0x400 * channel, 0x1f002);
1599 write32(DEFAULT_MCHBAR + 0x4230 + 0x400 * channel,
1600 0xc01 | (ctrl->tRP << 16));
1601 write32(DEFAULT_MCHBAR + 0x4200 + 0x400 * channel,
1602 (slotrank << 24) | 0x60400);
1603 write32(DEFAULT_MCHBAR + 0x4210 + 0x400 * channel, 0x240);
1604 write32(DEFAULT_MCHBAR + 0x4284 + 0x400 * channel, 1);
1605
1606 for (timC = 0; timC <= MAX_TIMC; timC++) {
1607 FOR_ALL_LANES ctrl->timings[channel][slotrank].lanes[lane].
1608 timC = timC;
1609 program_timings(ctrl, channel);
1610
1611 test_timC(ctrl, channel, slotrank);
1612
1613 FOR_ALL_LANES {
1614 statistics[lane][timC] =
1615 read32(DEFAULT_MCHBAR + 0x4340 + 4 * lane +
1616 0x400 * channel);
1617 printram("Cstat: %d, %d, %d, %x, %x\n",
1618 channel, slotrank, lane, timC,
1619 statistics[lane][timC]);
1620 }
1621 }
1622 FOR_ALL_LANES {
1623 struct run rn =
1624 get_longest_zero_run(statistics[lane], MAX_TIMC + 1);
1625 ctrl->timings[channel][slotrank].lanes[lane].timC = rn.middle;
1626 if (rn.all) {
1627 printk(BIOS_EMERG, "timC discovery failed: %d, %d, %d\n",
1628 channel, slotrank, lane);
1629 return MAKE_ERR;
1630 }
1631 printram("Cval: %d, %d, %d: %x\n", channel, slotrank,
1632 lane, ctrl->timings[channel][slotrank].lanes[lane].timC);
1633 }
1634 return 0;
1635}
1636
1637static int get_precedening_channels(ramctr_timing * ctrl, int target_channel)
1638{
1639 int channel, ret = 0;
1640 FOR_ALL_POPULATED_CHANNELS if (channel < target_channel)
1641 ret++;
1642 return ret;
1643}
1644
1645static void fill_pattern0(ramctr_timing * ctrl, int channel, u32 a, u32 b)
1646{
1647 unsigned j;
1648 unsigned channel_offset =
1649 get_precedening_channels(ctrl, channel) * 0x40;
1650 for (j = 0; j < 16; j++)
1651 write32((void *)(0x04000000 + channel_offset + 4 * j), j & 2 ? b : a);
1652 sfence();
1653}
1654
1655static int num_of_channels(const ramctr_timing * ctrl)
1656{
1657 int ret = 0;
1658 int channel;
1659 FOR_ALL_POPULATED_CHANNELS ret++;
1660 return ret;
1661}
1662
1663static void fill_pattern1(ramctr_timing * ctrl, int channel)
1664{
1665 unsigned j;
1666 unsigned channel_offset =
1667 get_precedening_channels(ctrl, channel) * 0x40;
1668 unsigned channel_step = 0x40 * num_of_channels(ctrl);
1669 for (j = 0; j < 16; j++)
1670 write32((void *)(0x04000000 + channel_offset + j * 4), 0xffffffff);
1671 for (j = 0; j < 16; j++)
1672 write32((void *)(0x04000000 + channel_offset + channel_step + j * 4), 0);
1673 sfence();
1674}
1675
1676static void precharge(ramctr_timing * ctrl)
1677{
1678 int channel, slotrank, lane;
1679
1680 FOR_ALL_POPULATED_CHANNELS {
1681 FOR_ALL_POPULATED_RANKS FOR_ALL_LANES {
1682 ctrl->timings[channel][slotrank].lanes[lane].falling =
1683 16;
1684 ctrl->timings[channel][slotrank].lanes[lane].rising =
1685 16;
1686 }
1687
1688 program_timings(ctrl, channel);
1689
1690 FOR_ALL_POPULATED_RANKS {
1691 wait_428c(channel);
1692
1693 /* DRAM command MRS
1694 * write MR3 MPR enable
1695 * in this mode only RD and RDA are allowed
1696 * all reads return a predefined pattern */
1697 write32(DEFAULT_MCHBAR + 0x4220 + 0x400 * channel,
1698 0x1f000);
1699 write32(DEFAULT_MCHBAR + 0x4230 + 0x400 * channel,
1700 0xc01 | (ctrl->tMOD << 16));
1701 write32(DEFAULT_MCHBAR + 0x4200 + 0x400 * channel,
1702 (slotrank << 24) | 0x360004);
1703 write32(DEFAULT_MCHBAR + 0x4210 + 0x400 * channel, 0);
1704
1705 /* DRAM command RD */
1706 write32(DEFAULT_MCHBAR + 0x4224 + 0x400 * channel,
1707 0x1f105);
1708 write32(DEFAULT_MCHBAR + 0x4234 + 0x400 * channel,
1709 0x4041003);
1710 write32(DEFAULT_MCHBAR + 0x4204 + 0x400 * channel,
1711 (slotrank << 24) | 0);
1712 write32(DEFAULT_MCHBAR + 0x4214 + 0x400 * channel, 0);
1713
1714 /* DRAM command RD */
1715 write32(DEFAULT_MCHBAR + 0x4228 + 0x400 * channel,
1716 0x1f105);
1717 write32(DEFAULT_MCHBAR + 0x4238 + 0x400 * channel,
1718 0x1001 | ((ctrl->CAS + 8) << 16));
1719 write32(DEFAULT_MCHBAR + 0x4208 + 0x400 * channel,
1720 (slotrank << 24) | 0x60000);
1721 write32(DEFAULT_MCHBAR + 0x4218 + 0x400 * channel, 0);
1722
1723 /* DRAM command MRS
1724 * write MR3 MPR disable */
1725 write32(DEFAULT_MCHBAR + 0x422c + 0x400 * channel,
1726 0x1f000);
1727 write32(DEFAULT_MCHBAR + 0x423c + 0x400 * channel,
1728 0xc01 | (ctrl->tMOD << 16));
1729 write32(DEFAULT_MCHBAR + 0x420c + 0x400 * channel,
1730 (slotrank << 24) | 0x360000);
1731 write32(DEFAULT_MCHBAR + 0x421c + 0x400 * channel, 0);
1732 write32(DEFAULT_MCHBAR + 0x4284 + 0x400 * channel,
1733 0xc0001);
1734
1735 wait_428c(channel);
1736 }
1737
1738 FOR_ALL_POPULATED_RANKS FOR_ALL_LANES {
1739 ctrl->timings[channel][slotrank].lanes[lane].falling =
1740 48;
1741 ctrl->timings[channel][slotrank].lanes[lane].rising =
1742 48;
1743 }
1744
1745 program_timings(ctrl, channel);
1746
1747 FOR_ALL_POPULATED_RANKS {
1748 wait_428c(channel);
1749 /* DRAM command MRS
1750 * write MR3 MPR enable
1751 * in this mode only RD and RDA are allowed
1752 * all reads return a predefined pattern */
1753 write32(DEFAULT_MCHBAR + 0x4220 + 0x400 * channel,
1754 0x1f000);
1755 write32(DEFAULT_MCHBAR + 0x4230 + 0x400 * channel,
1756 0xc01 | (ctrl->tMOD << 16));
1757 write32(DEFAULT_MCHBAR + 0x4200 + 0x400 * channel,
1758 (slotrank << 24) | 0x360004);
1759 write32(DEFAULT_MCHBAR + 0x4210 + 0x400 * channel, 0);
1760
1761 /* DRAM command RD */
1762 write32(DEFAULT_MCHBAR + 0x4224 + 0x400 * channel,
1763 0x1f105);
1764 write32(DEFAULT_MCHBAR + 0x4234 + 0x400 * channel,
1765 0x4041003);
1766 write32(DEFAULT_MCHBAR + 0x4204 + 0x400 * channel,
1767 (slotrank << 24) | 0);
1768 write32(DEFAULT_MCHBAR + 0x4214 + 0x400 * channel, 0);
1769
1770 /* DRAM command RD */
1771 write32(DEFAULT_MCHBAR + 0x4228 + 0x400 * channel,
1772 0x1f105);
1773 write32(DEFAULT_MCHBAR + 0x4238 + 0x400 * channel,
1774 0x1001 | ((ctrl->CAS + 8) << 16));
1775 write32(DEFAULT_MCHBAR + 0x4208 + 0x400 * channel,
1776 (slotrank << 24) | 0x60000);
1777 write32(DEFAULT_MCHBAR + 0x4218 + 0x400 * channel, 0);
1778
1779 /* DRAM command MRS
1780 * write MR3 MPR disable */
1781 write32(DEFAULT_MCHBAR + 0x422c + 0x400 * channel,
1782 0x1f000);
1783 write32(DEFAULT_MCHBAR + 0x423c + 0x400 * channel,
1784 0xc01 | (ctrl->tMOD << 16));
1785
1786 write32(DEFAULT_MCHBAR + 0x420c + 0x400 * channel,
1787 (slotrank << 24) | 0x360000);
1788 write32(DEFAULT_MCHBAR + 0x421c + 0x400 * channel, 0);
1789
1790 write32(DEFAULT_MCHBAR + 0x4284 + 0x400 * channel,
1791 0xc0001);
1792 wait_428c(channel);
1793 }
1794 }
1795}
1796
1797static void test_timB(ramctr_timing * ctrl, int channel, int slotrank)
1798{
1799 /* enable DQs on this slotrank */
1800 write_mrreg(ctrl, channel, slotrank, 1,
1801 0x80 | make_mr1(ctrl, slotrank, channel));
1802
1803 wait_428c(channel);
1804 /* DRAM command NOP */
1805 write32(DEFAULT_MCHBAR + 0x4220 + 0x400 * channel, 0x1f207);
1806 write32(DEFAULT_MCHBAR + 0x4230 + 0x400 * channel,
1807 0x8000c01 | ((ctrl->CWL + ctrl->tWLO) << 16));
1808 write32(DEFAULT_MCHBAR + 0x4200 + 0x400 * channel,
1809 8 | (slotrank << 24));
1810 write32(DEFAULT_MCHBAR + 0x4210 + 0x400 * channel, 0);
1811
1812 /* DRAM command NOP */
1813 write32(DEFAULT_MCHBAR + 0x4224 + 0x400 * channel, 0x1f107);
1814 write32(DEFAULT_MCHBAR + 0x4234 + 0x400 * channel,
1815 0x4000c01 | ((ctrl->CAS + 38) << 16));
1816 write32(DEFAULT_MCHBAR + 0x4204 + 0x400 * channel,
1817 (slotrank << 24) | 4);
1818 write32(DEFAULT_MCHBAR + 0x4214 + 0x400 * channel, 0);
1819
1820 write32(DEFAULT_MCHBAR + 0x400 * channel + 0x4284, 0x40001);
1821 wait_428c(channel);
1822
1823 /* disable DQs on this slotrank */
1824 write_mrreg(ctrl, channel, slotrank, 1,
1825 0x1080 | make_mr1(ctrl, slotrank, channel));
1826}
1827
1828static int discover_timB(ramctr_timing *ctrl, int channel, int slotrank)
1829{
1830 int timB;
1831 int statistics[NUM_LANES][128];
1832 int lane;
1833
1834 write32(DEFAULT_MCHBAR + 0x3400, 0x108052 | (slotrank << 2));
1835
1836 for (timB = 0; timB < 128; timB++) {
1837 FOR_ALL_LANES {
1838 ctrl->timings[channel][slotrank].lanes[lane].timB = timB;
1839 }
1840 program_timings(ctrl, channel);
1841
1842 test_timB(ctrl, channel, slotrank);
1843
1844 FOR_ALL_LANES {
1845 statistics[lane][timB] =
1846 !((read32
1847 (DEFAULT_MCHBAR + lane_registers[lane] +
1848 channel * 0x100 + 4 + ((timB / 32) & 1) * 4)
1849 >> (timB % 32)) & 1);
1850 printram("Bstat: %d, %d, %d: %x, %x\n",
1851 channel, slotrank, lane, timB,
1852 statistics[lane][timB]);
1853 }
1854 }
1855 FOR_ALL_LANES {
1856 struct run rn = get_longest_zero_run(statistics[lane], 128);
1857 /* timC is a direct function of timB's 6 LSBs.
1858 * Some tests increments the value of timB by a small value,
1859 * which might cause the 6bit value to overflow, if it's close
1860 * to 0x3F. Increment the value by a small offset if it's likely
1861 * to overflow, to make sure it won't overflow while running
1862 * tests and bricks the system due to a non matching timC.
1863 *
1864 * TODO: find out why some tests (edge write discovery)
1865 * increment timB. */
1866 if ((rn.start & 0x3F) == 0x3E)
1867 rn.start += 2;
1868 else if ((rn.start & 0x3F) == 0x3F)
1869 rn.start += 1;
1870 ctrl->timings[channel][slotrank].lanes[lane].timB = rn.start;
1871 if (rn.all) {
1872 printk(BIOS_EMERG, "timB discovery failed: %d, %d, %d\n",
1873 channel, slotrank, lane);
1874 return MAKE_ERR;
1875 }
1876 printram("Bval: %d, %d, %d: %x\n", channel, slotrank,
1877 lane, ctrl->timings[channel][slotrank].lanes[lane].timB);
1878 }
1879 return 0;
1880}
1881
1882static int get_timB_high_adjust(u64 val)
1883{
1884 int i;
1885
1886 /* good */
1887 if (val == 0xffffffffffffffffLL)
1888 return 0;
1889
1890 if (val >= 0xf000000000000000LL) {
1891 /* needs negative adjustment */
1892 for (i = 0; i < 8; i++)
1893 if (val << (8 * (7 - i) + 4))
1894 return -i;
1895 } else {
1896 /* needs positive adjustment */
1897 for (i = 0; i < 8; i++)
1898 if (val >> (8 * (7 - i) + 4))
1899 return i;
1900 }
1901 return 8;
1902}
1903
1904static void adjust_high_timB(ramctr_timing * ctrl)
1905{
1906 int channel, slotrank, lane, old;
1907 write32(DEFAULT_MCHBAR + 0x3400, 0x200);
1908 FOR_ALL_POPULATED_CHANNELS {
1909 fill_pattern1(ctrl, channel);
1910 write32(DEFAULT_MCHBAR + 0x4288 + (channel << 10), 1);
1911 }
1912 FOR_ALL_POPULATED_CHANNELS FOR_ALL_POPULATED_RANKS {
1913
1914 write32(DEFAULT_MCHBAR + 0x4288 + 0x400 * channel, 0x10001);
1915
1916 wait_428c(channel);
1917
1918 /* DRAM command ACT */
1919 write32(DEFAULT_MCHBAR + 0x4220 + 0x400 * channel, 0x1f006);
1920 write32(DEFAULT_MCHBAR + 0x4230 + 0x400 * channel,
1921 0xc01 | (ctrl->tRCD << 16));
1922 write32(DEFAULT_MCHBAR + 0x4200 + 0x400 * channel,
1923 (slotrank << 24) | 0x60000);
1924 write32(DEFAULT_MCHBAR + 0x4210 + 0x400 * channel, 0);
1925
1926 /* DRAM command NOP */
1927 write32(DEFAULT_MCHBAR + 0x4224 + 0x400 * channel, 0x1f207);
1928 write32(DEFAULT_MCHBAR + 0x4234 + 0x400 * channel, 0x8040c01);
1929 write32(DEFAULT_MCHBAR + 0x4204 + 0x400 * channel,
1930 (slotrank << 24) | 0x8);
1931 write32(DEFAULT_MCHBAR + 0x4214 + 0x400 * channel, 0x3e0);
1932
1933 /* DRAM command WR */
1934 write32(DEFAULT_MCHBAR + 0x4228 + 0x400 * channel, 0x1f201);
1935 write32(DEFAULT_MCHBAR + 0x4238 + 0x400 * channel, 0x8041003);
1936 write32(DEFAULT_MCHBAR + 0x4208 + 0x400 * channel,
1937 (slotrank << 24));
1938 write32(DEFAULT_MCHBAR + 0x4218 + 0x400 * channel, 0x3e2);
1939
1940 /* DRAM command NOP */
1941 write32(DEFAULT_MCHBAR + 0x422c + 0x400 * channel, 0x1f207);
1942 write32(DEFAULT_MCHBAR + 0x423c + 0x400 * channel,
1943 0x8000c01 | ((ctrl->CWL + ctrl->tWTR + 5) << 16));
1944 write32(DEFAULT_MCHBAR + 0x420c + 0x400 * channel,
1945 (slotrank << 24) | 0x8);
1946 write32(DEFAULT_MCHBAR + 0x421c + 0x400 * channel, 0x3e0);
1947
1948 write32(DEFAULT_MCHBAR + 0x4284 + 0x400 * channel, 0xc0001);
1949
1950 wait_428c(channel);
1951
1952 /* DRAM command PREA */
1953 write32(DEFAULT_MCHBAR + 0x4220 + 0x400 * channel, 0x1f002);
1954 write32(DEFAULT_MCHBAR + 0x4230 + 0x400 * channel,
1955 0xc01 | ((ctrl->tRP) << 16));
1956 write32(DEFAULT_MCHBAR + 0x4200 + 0x400 * channel,
1957 (slotrank << 24) | 0x60400);
1958 write32(DEFAULT_MCHBAR + 0x4210 + 0x400 * channel, 0x240);
1959
1960 /* DRAM command ACT */
1961 write32(DEFAULT_MCHBAR + 0x4224 + 0x400 * channel, 0x1f006);
1962 write32(DEFAULT_MCHBAR + 0x4234 + 0x400 * channel,
1963 0xc01 | ((ctrl->tRCD) << 16));
1964 write32(DEFAULT_MCHBAR + 0x4204 + 0x400 * channel,
1965 (slotrank << 24) | 0x60000);
1966 write32(DEFAULT_MCHBAR + 0x4214 + 0x400 * channel, 0);
1967
1968 /* DRAM command RD */
1969 write32(DEFAULT_MCHBAR + 0x4228 + 0x400 * channel, 0x3f105);
1970 write32(DEFAULT_MCHBAR + 0x4238 + 0x400 * channel,
1971 0x4000c01 |
1972 ((ctrl->tRP +
1973 ctrl->timings[channel][slotrank].val_4024 +
1974 ctrl->timings[channel][slotrank].val_4028) << 16));
1975 write32(DEFAULT_MCHBAR + 0x4208 + 0x400 * channel,
1976 (slotrank << 24) | 0x60008);
1977 write32(DEFAULT_MCHBAR + 0x4218 + 0x400 * channel, 0);
1978
1979 write32(DEFAULT_MCHBAR + 0x4284 + 0x400 * channel, 0x80001);
1980 wait_428c(channel);
1981 FOR_ALL_LANES {
1982 u64 res =
1983 read32(DEFAULT_MCHBAR + lane_registers[lane] +
1984 0x100 * channel + 4);
1985 res |=
1986 ((u64) read32(DEFAULT_MCHBAR + lane_registers[lane] +
1987 0x100 * channel + 8)) << 32;
1988 old = ctrl->timings[channel][slotrank].lanes[lane].timB;
1989 ctrl->timings[channel][slotrank].lanes[lane].timB +=
1990 get_timB_high_adjust(res) * 64;
1991
1992 printram("High adjust %d:%016llx\n", lane, res);
1993 printram("Bval+: %d, %d, %d, %x -> %x\n", channel,
1994 slotrank, lane, old,
1995 ctrl->timings[channel][slotrank].lanes[lane].
1996 timB);
1997 }
1998 }
1999 write32(DEFAULT_MCHBAR + 0x3400, 0);
2000}
2001
2002static void write_op(ramctr_timing * ctrl, int channel)
2003{
2004 int slotrank;
2005
2006 wait_428c(channel);
2007
2008 /* choose an existing rank. */
2009 slotrank = !(ctrl->rankmap[channel] & 1) ? 2 : 0;
2010
2011 /* DRAM command ACT */
2012 write32(DEFAULT_MCHBAR + 0x4220 + 0x400 * channel, 0x0f003);
2013 write32(DEFAULT_MCHBAR + 0x4230 + 0x400 * channel, 0x41001);
2014
2015 write32(DEFAULT_MCHBAR + 0x4200 + 0x400 * channel,
2016 (slotrank << 24) | 0x60000);
2017
2018 write32(DEFAULT_MCHBAR + 0x4210 + 0x400 * channel, 0x3e0);
2019
2020 write32(DEFAULT_MCHBAR + 0x4284 + 0x400 * channel, 1);
2021 wait_428c(channel);
2022}
2023
2024/* Compensate the skew between CMD/ADDR/CLK and DQ/DQS lanes.
2025 * DDR3 adopted the fly-by topology. The data and strobes signals reach
2026 * the chips at different times with respect to command, address and
2027 * clock signals.
2028 * By delaying either all DQ/DQs or all CMD/ADDR/CLK signals, a full phase
2029 * shift can be introduced.
2030 * It is assumed that the CLK/ADDR/CMD signals have the same routing delay.
2031 *
2032 * To find the required phase shift the DRAM is placed in "write leveling" mode.
2033 * In this mode the DRAM-chip samples the CLK on every DQS edge and feeds back the
2034 * sampled value on the data lanes (DQs).
2035 */
2036int write_training(ramctr_timing * ctrl)
2037{
2038 int channel, slotrank, lane;
2039 int err;
2040
2041 FOR_ALL_POPULATED_CHANNELS
2042 write32(DEFAULT_MCHBAR + 0x4008 + 0x400 * channel,
2043 read32(DEFAULT_MCHBAR + 0x4008 +
2044 0x400 * channel) | 0x8000000);
2045
2046 FOR_ALL_POPULATED_CHANNELS {
2047 write_op(ctrl, channel);
2048 write32(DEFAULT_MCHBAR + 0x4020 + 0x400 * channel,
2049 read32(DEFAULT_MCHBAR + 0x4020 +
2050 0x400 * channel) | 0x200000);
2051 }
2052
2053 /* refresh disable */
2054 write32(DEFAULT_MCHBAR + 0x5030, read32(DEFAULT_MCHBAR + 0x5030) & ~8);
2055 FOR_ALL_POPULATED_CHANNELS {
2056 write_op(ctrl, channel);
2057 }
2058
2059 /* enable write leveling on all ranks
2060 * disable all DQ outputs
2061 * only NOP is allowed in this mode */
2062 FOR_ALL_CHANNELS
2063 FOR_ALL_POPULATED_RANKS
2064 write_mrreg(ctrl, channel, slotrank, 1,
2065 make_mr1(ctrl, slotrank, channel) | 0x1080);
2066
2067 write32(DEFAULT_MCHBAR + 0x3400, 0x108052);
2068
2069 toggle_io_reset();
2070
2071 /* set any valid value for timB, it gets corrected later */
2072 FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS {
2073 err = discover_timB(ctrl, channel, slotrank);
2074 if (err)
2075 return err;
2076 }
2077
2078 /* disable write leveling on all ranks */
2079 FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS
2080 write_mrreg(ctrl, channel,
2081 slotrank, 1, make_mr1(ctrl, slotrank, channel));
2082
2083 write32(DEFAULT_MCHBAR + 0x3400, 0);
2084
2085 FOR_ALL_POPULATED_CHANNELS
2086 wait_428c(channel);
2087
2088 /* refresh enable */
2089 write32(DEFAULT_MCHBAR + 0x5030, read32(DEFAULT_MCHBAR + 0x5030) | 8);
2090
2091 FOR_ALL_POPULATED_CHANNELS {
2092 write32(DEFAULT_MCHBAR + 0x4020 + 0x400 * channel,
2093 ~0x00200000 & read32(DEFAULT_MCHBAR + 0x4020 +
2094 0x400 * channel));
2095 read32(DEFAULT_MCHBAR + 0x428c + 0x400 * channel);
2096 wait_428c(channel);
2097
2098 /* DRAM command ZQCS */
2099 write32(DEFAULT_MCHBAR + 0x4220 + 0x400 * channel, 0x0f003);
2100 write32(DEFAULT_MCHBAR + 0x4230 + 0x400 * channel, 0x659001);
2101 write32(DEFAULT_MCHBAR + 0x4200 + 0x400 * channel, 0x60000);
2102 write32(DEFAULT_MCHBAR + 0x4210 + 0x400 * channel, 0x3e0);
2103
2104 write32(DEFAULT_MCHBAR + 0x4284 + 0x400 * channel, 1);
2105 wait_428c(channel);
2106 }
2107
2108 toggle_io_reset();
2109
2110 printram("CPE\n");
2111 precharge(ctrl);
2112 printram("CPF\n");
2113
2114 FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS FOR_ALL_LANES {
2115 read32(DEFAULT_MCHBAR + 0x4080 + 0x400 * channel + 4 * lane);
2116 write32(DEFAULT_MCHBAR + 0x4080 + 0x400 * channel + 4 * lane,
2117 0);
2118 }
2119
2120 FOR_ALL_POPULATED_CHANNELS {
2121 fill_pattern0(ctrl, channel, 0xaaaaaaaa, 0x55555555);
2122 write32(DEFAULT_MCHBAR + 0x4288 + (channel << 10), 0);
2123 }
2124
2125 FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS {
2126 err = discover_timC(ctrl, channel, slotrank);
2127 if (err)
2128 return err;
2129 }
2130
2131 FOR_ALL_POPULATED_CHANNELS
2132 program_timings(ctrl, channel);
2133
2134 /* measure and adjust timB timings */
2135 adjust_high_timB(ctrl);
2136
2137 FOR_ALL_POPULATED_CHANNELS
2138 program_timings(ctrl, channel);
2139
2140 FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS FOR_ALL_LANES {
2141 read32(DEFAULT_MCHBAR + 0x4080 + 0x400 * channel + 4 * lane);
2142 write32(DEFAULT_MCHBAR + 0x4080 + 0x400 * channel + 4 * lane,
2143 0);
2144 }
2145 return 0;
2146}
2147
2148static int test_320c(ramctr_timing * ctrl, int channel, int slotrank)
2149{
2150 struct ram_rank_timings saved_rt = ctrl->timings[channel][slotrank];
2151 int timC_delta;
2152 int lanes_ok = 0;
2153 int ctr = 0;
2154 int lane;
2155
2156 for (timC_delta = -5; timC_delta <= 5; timC_delta++) {
2157 FOR_ALL_LANES {
2158 ctrl->timings[channel][slotrank].lanes[lane].timC =
2159 saved_rt.lanes[lane].timC + timC_delta;
2160 }
2161 program_timings(ctrl, channel);
2162 FOR_ALL_LANES {
2163 write32(DEFAULT_MCHBAR + 4 * lane + 0x4f40, 0);
2164 }
2165
2166 write32(DEFAULT_MCHBAR + 0x4288 + 0x400 * channel, 0x1f);
2167
2168 wait_428c(channel);
2169 /* DRAM command ACT */
2170 write32(DEFAULT_MCHBAR + 0x4220 + 0x400 * channel, 0x1f006);
2171 write32(DEFAULT_MCHBAR + 0x4230 + 0x400 * channel,
2172 ((max(ctrl->tRRD, (ctrl->tFAW >> 2) + 1)) << 10)
2173 | 8 | (ctrl->tRCD << 16));
2174
2175 write32(DEFAULT_MCHBAR + 0x4200 + 0x400 * channel,
2176 (slotrank << 24) | ctr | 0x60000);
2177
2178 write32(DEFAULT_MCHBAR + 0x4210 + 0x400 * channel, 0x244);
2179 /* DRAM command WR */
2180 write32(DEFAULT_MCHBAR + 0x4224 + 0x400 * channel, 0x1f201);
2181 write32(DEFAULT_MCHBAR + 0x4234 + 0x400 * channel,
2182 0x8001020 | ((ctrl->CWL + ctrl->tWTR + 8) << 16));
2183 write32(DEFAULT_MCHBAR + 0x4204 + 0x400 * channel,
2184 (slotrank << 24));
2185 write32(DEFAULT_MCHBAR + 0x4244 + 0x400 * channel, 0x389abcd);
2186 write32(DEFAULT_MCHBAR + 0x4214 + 0x400 * channel, 0x20e42);
2187
2188 /* DRAM command RD */
2189 write32(DEFAULT_MCHBAR + 0x4228 + 0x400 * channel, 0x1f105);
2190 write32(DEFAULT_MCHBAR + 0x4238 + 0x400 * channel,
2191 0x4001020 | (max(ctrl->tRTP, 8) << 16));
2192 write32(DEFAULT_MCHBAR + 0x4208 + 0x400 * channel,
2193 (slotrank << 24));
2194 write32(DEFAULT_MCHBAR + 0x4248 + 0x400 * channel, 0x389abcd);
2195 write32(DEFAULT_MCHBAR + 0x4218 + 0x400 * channel, 0x20e42);
2196
2197 /* DRAM command PRE */
2198 write32(DEFAULT_MCHBAR + 0x422c + 0x400 * channel, 0x1f002);
2199 write32(DEFAULT_MCHBAR + 0x423c + 0x400 * channel, 0xf1001);
2200 write32(DEFAULT_MCHBAR + 0x420c + 0x400 * channel,
2201 (slotrank << 24) | 0x60400);
2202 write32(DEFAULT_MCHBAR + 0x421c + 0x400 * channel, 0x240);
2203
2204 write32(DEFAULT_MCHBAR + 0x4284 + 0x400 * channel, 0xc0001);
2205 wait_428c(channel);
2206 FOR_ALL_LANES {
2207 u32 r32 =
2208 read32(DEFAULT_MCHBAR + 0x4340 + 4 * lane +
2209 0x400 * channel);
2210
2211 if (r32 == 0)
2212 lanes_ok |= 1 << lane;
2213 }
2214 ctr++;
2215 if (lanes_ok == ((1 << NUM_LANES) - 1))
2216 break;
2217 }
2218
2219 ctrl->timings[channel][slotrank] = saved_rt;
2220
2221 printram("3lanes: %x\n", lanes_ok);
2222 return lanes_ok != ((1 << NUM_LANES) - 1);
2223}
2224
2225#include "raminit_patterns.h"
2226
2227static void fill_pattern5(ramctr_timing * ctrl, int channel, int patno)
2228{
2229 unsigned i, j;
2230 unsigned channel_offset =
2231 get_precedening_channels(ctrl, channel) * 0x40;
2232 unsigned channel_step = 0x40 * num_of_channels(ctrl);
2233
2234 if (patno) {
2235 u8 base8 = 0x80 >> ((patno - 1) % 8);
2236 u32 base = base8 | (base8 << 8) | (base8 << 16) | (base8 << 24);
2237 for (i = 0; i < 32; i++) {
2238 for (j = 0; j < 16; j++) {
2239 u32 val = use_base[patno - 1][i] & (1 << (j / 2)) ? base : 0;
2240 if (invert[patno - 1][i] & (1 << (j / 2)))
2241 val = ~val;
2242 write32((void *)(0x04000000 + channel_offset + i * channel_step +
2243 j * 4), val);
2244 }
2245 }
2246
2247 } else {
2248 for (i = 0; i < sizeof(pattern) / sizeof(pattern[0]); i++) {
2249 for (j = 0; j < 16; j++)
2250 write32((void *)(0x04000000 + channel_offset + i * channel_step +
2251 j * 4), pattern[i][j]);
2252 }
2253 sfence();
2254 }
2255}
2256
2257static void reprogram_320c(ramctr_timing * ctrl)
2258{
2259 int channel, slotrank;
2260
2261 FOR_ALL_POPULATED_CHANNELS {
2262 wait_428c(channel);
2263
2264 /* choose an existing rank. */
2265 slotrank = !(ctrl->rankmap[channel] & 1) ? 2 : 0;
2266
2267 /* DRAM command ZQCS */
2268 write32(DEFAULT_MCHBAR + 0x4220 + 0x400 * channel, 0x0f003);
2269 write32(DEFAULT_MCHBAR + 0x4230 + 0x400 * channel, 0x41001);
2270
2271 write32(DEFAULT_MCHBAR + 0x4200 + 0x400 * channel,
2272 (slotrank << 24) | 0x60000);
2273
2274 write32(DEFAULT_MCHBAR + 0x4210 + 0x400 * channel, 0x3e0);
2275
2276 write32(DEFAULT_MCHBAR + 0x4284 + 0x400 * channel, 1);
2277 wait_428c(channel);
2278 write32(DEFAULT_MCHBAR + 0x4020 + 0x400 * channel,
2279 read32(DEFAULT_MCHBAR + 0x4020 +
2280 0x400 * channel) | 0x200000);
2281 }
2282
2283 /* refresh disable */
2284 write32(DEFAULT_MCHBAR + 0x5030, read32(DEFAULT_MCHBAR + 0x5030) & ~8);
2285 FOR_ALL_POPULATED_CHANNELS {
2286 wait_428c(channel);
2287
2288 /* choose an existing rank. */
2289 slotrank = !(ctrl->rankmap[channel] & 1) ? 2 : 0;
2290
2291 /* DRAM command ZQCS */
2292 write32(DEFAULT_MCHBAR + 0x4220 + 0x400 * channel, 0x0f003);
2293 write32(DEFAULT_MCHBAR + 0x4230 + 0x400 * channel, 0x41001);
2294
2295 write32(DEFAULT_MCHBAR + 0x4200 + 0x400 * channel,
2296 (slotrank << 24) | 0x60000);
2297
2298 write32(DEFAULT_MCHBAR + 0x4210 + 0x400 * channel, 0x3e0);
2299
2300 write32(DEFAULT_MCHBAR + 0x4284 + 0x400 * channel, 1);
2301 wait_428c(channel);
2302 }
2303
2304 /* jedec reset */
2305 dram_jedecreset(ctrl);
2306 /* mrs commands. */
2307 dram_mrscommands(ctrl);
2308
2309 toggle_io_reset();
2310}
2311
2312#define MIN_C320C_LEN 13
2313
2314static int try_cmd_stretch(ramctr_timing *ctrl, int channel, int cmd_stretch)
2315{
2316 struct ram_rank_timings saved_timings[NUM_CHANNELS][NUM_SLOTRANKS];
2317 int slotrank;
2318 int c320c;
2319 int stat[NUM_SLOTRANKS][256];
2320 int delta = 0;
2321
2322 printram("Trying cmd_stretch %d on channel %d\n", cmd_stretch, channel);
2323
2324 FOR_ALL_POPULATED_RANKS {
2325 saved_timings[channel][slotrank] =
2326 ctrl->timings[channel][slotrank];
2327 }
2328
2329 ctrl->cmd_stretch[channel] = cmd_stretch;
2330
2331 MCHBAR32(0x4004 + 0x400 * channel) =
2332 ctrl->tRRD
2333 | (ctrl->tRTP << 4)
2334 | (ctrl->tCKE << 8)
2335 | (ctrl->tWTR << 12)
2336 | (ctrl->tFAW << 16)
2337 | (ctrl->tWR << 24)
2338 | (ctrl->cmd_stretch[channel] << 30);
2339
2340 if (ctrl->cmd_stretch[channel] == 2)
2341 delta = 2;
2342 else if (ctrl->cmd_stretch[channel] == 0)
2343 delta = 4;
2344
2345 FOR_ALL_POPULATED_RANKS {
2346 ctrl->timings[channel][slotrank].val_4024 -= delta;
2347 }
2348
2349 for (c320c = -127; c320c <= 127; c320c++) {
2350 FOR_ALL_POPULATED_RANKS {
2351 ctrl->timings[channel][slotrank].val_320c = c320c;
2352 }
2353 program_timings(ctrl, channel);
2354 reprogram_320c(ctrl);
2355 FOR_ALL_POPULATED_RANKS {
2356 stat[slotrank][c320c + 127] =
2357 test_320c(ctrl, channel, slotrank);
2358 printram("3stat: %d, %d, %d: %x\n",
2359 channel, slotrank, c320c,
2360 stat[slotrank][c320c + 127]);
2361 }
2362 }
2363 FOR_ALL_POPULATED_RANKS {
2364 struct run rn =
2365 get_longest_zero_run(stat[slotrank], 255);
2366 ctrl->timings[channel][slotrank].val_320c =
2367 rn.middle - 127;
2368 printram("3val %d, %d: %d\n", channel,
2369 slotrank,
2370 ctrl->timings[channel][slotrank].val_320c);
2371 if (rn.all || rn.length < MIN_C320C_LEN) {
2372 FOR_ALL_POPULATED_RANKS {
2373 ctrl->timings[channel][slotrank] =
2374 saved_timings[channel][slotrank];
2375 }
2376 return MAKE_ERR;
2377 }
2378 }
2379
2380 return 0;
2381}
2382
2383/* Adjust CMD phase shift and try multiple command rates.
2384 * A command rate of 2T doubles the time needed for address and
2385 * command decode. */
2386int command_training(ramctr_timing *ctrl)
2387{
2388 int channel;
2389 int err;
2390
2391 FOR_ALL_POPULATED_CHANNELS {
2392 fill_pattern5(ctrl, channel, 0);
2393 write32(DEFAULT_MCHBAR + 0x4288 + 0x400 * channel, 0x1f);
2394 }
2395
2396 FOR_ALL_POPULATED_CHANNELS {
2397 /* try command rate 1T and 2T */
2398 err = try_cmd_stretch(ctrl, channel, 0);
2399 if (err) {
2400 err = try_cmd_stretch(ctrl, channel, 2);
2401 if (err) {
2402 printk(BIOS_EMERG, "c320c discovery failed\n");
2403 return err;
2404 }
2405 printram("Using CMD rate 2T on channel %u\n", channel);
2406 } else
2407 printram("Using CMD rate 1T on channel %u\n", channel);
2408 }
2409
2410 FOR_ALL_POPULATED_CHANNELS
2411 program_timings(ctrl, channel);
2412
2413 reprogram_320c(ctrl);
2414 return 0;
2415}
2416
2417
2418static int discover_edges_real(ramctr_timing *ctrl, int channel, int slotrank,
2419 int *edges)
2420{
2421 int edge;
2422 int statistics[NUM_LANES][MAX_EDGE_TIMING + 1];
2423 int lane;
2424
2425 for (edge = 0; edge <= MAX_EDGE_TIMING; edge++) {
2426 FOR_ALL_LANES {
2427 ctrl->timings[channel][slotrank].lanes[lane].rising =
2428 edge;
2429 ctrl->timings[channel][slotrank].lanes[lane].falling =
2430 edge;
2431 }
2432 program_timings(ctrl, channel);
2433
2434 FOR_ALL_LANES {
2435 write32(DEFAULT_MCHBAR + 0x4340 + 0x400 * channel +
2436 4 * lane, 0);
2437 read32(DEFAULT_MCHBAR + 0x400 * channel + 4 * lane +
2438 0x4140);
2439 }
2440
2441 wait_428c(channel);
2442 /* DRAM command MRS
2443 * write MR3 MPR enable
2444 * in this mode only RD and RDA are allowed
2445 * all reads return a predefined pattern */
2446 write32(DEFAULT_MCHBAR + 0x4220 + 0x400 * channel, 0x1f000);
2447 write32(DEFAULT_MCHBAR + 0x4230 + 0x400 * channel,
2448 (0xc01 | (ctrl->tMOD << 16)));
2449 write32(DEFAULT_MCHBAR + 0x4200 + 0x400 * channel,
2450 (slotrank << 24) | 0x360004);
2451 write32(DEFAULT_MCHBAR + 0x4210 + 0x400 * channel, 0);
2452
2453 /* DRAM command RD */
2454 write32(DEFAULT_MCHBAR + 0x4224 + 0x400 * channel, 0x1f105);
2455 write32(DEFAULT_MCHBAR + 0x4234 + 0x400 * channel, 0x40411f4);
2456 write32(DEFAULT_MCHBAR + 0x4204 + 0x400 * channel,
2457 (slotrank << 24));
2458 write32(DEFAULT_MCHBAR + 0x4214 + 0x400 * channel, 0);
2459
2460 /* DRAM command RD */
2461 write32(DEFAULT_MCHBAR + 0x4228 + 0x400 * channel, 0x1f105);
2462 write32(DEFAULT_MCHBAR + 0x4238 + 0x400 * channel,
2463 0x1001 | ((ctrl->CAS + 8) << 16));
2464 write32(DEFAULT_MCHBAR + 0x4208 + 0x400 * channel,
2465 (slotrank << 24) | 0x60000);
2466 write32(DEFAULT_MCHBAR + 0x4218 + 0x400 * channel, 0);
2467
2468 /* DRAM command MRS
2469 * MR3 disable MPR */
2470 write32(DEFAULT_MCHBAR + 0x422c + 0x400 * channel, 0x1f000);
2471 write32(DEFAULT_MCHBAR + 0x423c + 0x400 * channel,
2472 (0xc01 | (ctrl->tMOD << 16)));
2473 write32(DEFAULT_MCHBAR + 0x420c + 0x400 * channel,
2474 (slotrank << 24) | 0x360000);
2475 write32(DEFAULT_MCHBAR + 0x421c + 0x400 * channel, 0);
2476
2477 write32(DEFAULT_MCHBAR + 0x4284 + 0x400 * channel, 0xc0001);
2478
2479 wait_428c(channel);
2480
2481 FOR_ALL_LANES {
2482 statistics[lane][edge] =
2483 read32(DEFAULT_MCHBAR + 0x4340 + 0x400 * channel +
2484 lane * 4);
2485 }
2486 }
2487 FOR_ALL_LANES {
2488 struct run rn =
2489 get_longest_zero_run(statistics[lane], MAX_EDGE_TIMING + 1);
2490 edges[lane] = rn.middle;
2491 if (rn.all) {
2492 printk(BIOS_EMERG, "edge discovery failed: %d, %d, %d\n",
2493 channel, slotrank, lane);
2494 return MAKE_ERR;
2495 }
2496 printram("eval %d, %d, %d: %02x\n", channel, slotrank,
2497 lane, edges[lane]);
2498 }
2499 return 0;
2500}
2501
2502int discover_edges(ramctr_timing *ctrl)
2503{
2504 int falling_edges[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES];
2505 int rising_edges[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES];
2506 int channel, slotrank, lane;
2507 int err;
2508
2509 write32(DEFAULT_MCHBAR + 0x3400, 0);
2510
2511 toggle_io_reset();
2512
2513 FOR_ALL_POPULATED_CHANNELS FOR_ALL_LANES {
2514 write32(DEFAULT_MCHBAR + 4 * lane +
2515 0x400 * channel + 0x4080, 0);
2516 }
2517
2518 FOR_ALL_POPULATED_CHANNELS {
2519 fill_pattern0(ctrl, channel, 0, 0);
2520 write32(DEFAULT_MCHBAR + 0x4288 + (channel << 10), 0);
2521 FOR_ALL_LANES {
2522 read32(DEFAULT_MCHBAR + 0x400 * channel +
2523 lane * 4 + 0x4140);
2524 }
2525
2526 FOR_ALL_POPULATED_RANKS FOR_ALL_LANES {
2527 ctrl->timings[channel][slotrank].lanes[lane].falling =
2528 16;
2529 ctrl->timings[channel][slotrank].lanes[lane].rising =
2530 16;
2531 }
2532
2533 program_timings(ctrl, channel);
2534
2535 FOR_ALL_POPULATED_RANKS {
2536 wait_428c(channel);
2537
2538 /* DRAM command MRS
2539 * MR3 enable MPR
2540 * write MR3 MPR enable
2541 * in this mode only RD and RDA are allowed
2542 * all reads return a predefined pattern */
2543 write32(DEFAULT_MCHBAR + 0x4220 + 0x400 * channel,
2544 0x1f000);
2545 write32(DEFAULT_MCHBAR + 0x4230 + 0x400 * channel,
2546 0xc01 | (ctrl->tMOD << 16));
2547 write32(DEFAULT_MCHBAR + 0x4200 + 0x400 * channel,
2548 (slotrank << 24) | 0x360004);
2549 write32(DEFAULT_MCHBAR + 0x4210 + 0x400 * channel, 0);
2550
2551 /* DRAM command RD */
2552 write32(DEFAULT_MCHBAR + 0x4224 + 0x400 * channel,
2553 0x1f105);
2554 write32(DEFAULT_MCHBAR + 0x4234 + 0x400 * channel,
2555 0x4041003);
2556 write32(DEFAULT_MCHBAR + 0x4204 + 0x400 * channel,
2557 (slotrank << 24) | 0);
2558 write32(DEFAULT_MCHBAR + 0x4214 + 0x400 * channel, 0);
2559
2560 /* DRAM command RD */
2561 write32(DEFAULT_MCHBAR + 0x4228 + 0x400 * channel,
2562 0x1f105);
2563 write32(DEFAULT_MCHBAR + 0x4238 + 0x400 * channel,
2564 0x1001 | ((ctrl->CAS + 8) << 16));
2565 write32(DEFAULT_MCHBAR + 0x4208 + 0x400 * channel,
2566 (slotrank << 24) | 0x60000);
2567 write32(DEFAULT_MCHBAR + 0x4218 + 0x400 * channel, 0);
2568
2569 /* DRAM command MRS
2570 * MR3 disable MPR */
2571 write32(DEFAULT_MCHBAR + 0x422c + 0x400 * channel,
2572 0x1f000);
2573 write32(DEFAULT_MCHBAR + 0x423c + 0x400 * channel,
2574 0xc01 | (ctrl->tMOD << 16));
2575 write32(DEFAULT_MCHBAR + 0x420c + 0x400 * channel,
2576 (slotrank << 24) | 0x360000);
2577 write32(DEFAULT_MCHBAR + 0x421c + 0x400 * channel, 0);
2578 write32(DEFAULT_MCHBAR + 0x4284 + 0x400 * channel,
2579 0xc0001);
2580
2581 wait_428c(channel);
2582 }
2583
2584 /* XXX: check any measured value ? */
2585
2586 FOR_ALL_POPULATED_RANKS FOR_ALL_LANES {
2587 ctrl->timings[channel][slotrank].lanes[lane].falling =
2588 48;
2589 ctrl->timings[channel][slotrank].lanes[lane].rising =
2590 48;
2591 }
2592
2593 program_timings(ctrl, channel);
2594
2595 FOR_ALL_POPULATED_RANKS {
2596 wait_428c(channel);
2597
2598 /* DRAM command MRS
2599 * MR3 enable MPR
2600 * write MR3 MPR enable
2601 * in this mode only RD and RDA are allowed
2602 * all reads return a predefined pattern */
2603 write32(DEFAULT_MCHBAR + 0x4220 + 0x400 * channel,
2604 0x1f000);
2605 write32(DEFAULT_MCHBAR + 0x4230 + 0x400 * channel,
2606 0xc01 | (ctrl->tMOD << 16));
2607 write32(DEFAULT_MCHBAR + 0x4200 + 0x400 * channel,
2608 (slotrank << 24) | 0x360004);
2609 write32(DEFAULT_MCHBAR + 0x4210 + 0x400 * channel, 0);
2610
2611 /* DRAM command RD */
2612 write32(DEFAULT_MCHBAR + 0x4224 + 0x400 * channel,
2613 0x1f105);
2614 write32(DEFAULT_MCHBAR + 0x4234 + 0x400 * channel,
2615 0x4041003);
2616 write32(DEFAULT_MCHBAR + 0x4204 + 0x400 * channel,
2617 (slotrank << 24) | 0);
2618 write32(DEFAULT_MCHBAR + 0x4214 + 0x400 * channel, 0);
2619
2620 /* DRAM command RD */
2621 write32(DEFAULT_MCHBAR + 0x4228 + 0x400 * channel,
2622 0x1f105);
2623 write32(DEFAULT_MCHBAR + 0x4238 + 0x400 * channel,
2624 0x1001 | ((ctrl->CAS + 8) << 16));
2625 write32(DEFAULT_MCHBAR + 0x4208 + 0x400 * channel,
2626 (slotrank << 24) | 0x60000);
2627 write32(DEFAULT_MCHBAR + 0x4218 + 0x400 * channel, 0);
2628
2629 /* DRAM command MRS
2630 * MR3 disable MPR */
2631 write32(DEFAULT_MCHBAR + 0x422c + 0x400 * channel,
2632 0x1f000);
2633 write32(DEFAULT_MCHBAR + 0x423c + 0x400 * channel,
2634 0xc01 | (ctrl->tMOD << 16));
2635 write32(DEFAULT_MCHBAR + 0x420c + 0x400 * channel,
2636 (slotrank << 24) | 0x360000);
2637 write32(DEFAULT_MCHBAR + 0x421c + 0x400 * channel, 0);
2638
2639 write32(DEFAULT_MCHBAR + 0x4284 + 0x400 * channel,
2640 0xc0001);
2641 wait_428c(channel);
2642 }
2643
2644 /* XXX: check any measured value ? */
2645
2646 FOR_ALL_LANES {
2647 write32(DEFAULT_MCHBAR + 0x4080 + 0x400 * channel +
2648 lane * 4,
2649 ~read32(DEFAULT_MCHBAR + 0x4040 +
2650 0x400 * channel + lane * 4) & 0xff);
2651 }
2652
2653 fill_pattern0(ctrl, channel, 0, 0xffffffff);
2654 write32(DEFAULT_MCHBAR + 0x4288 + (channel << 10), 0);
2655 }
2656
2657 /* FIXME: under some conditions (older chipsets?) vendor BIOS sets both edges to the same value. */
2658 write32(DEFAULT_MCHBAR + 0x4eb0, 0x300);
2659 printram("discover falling edges:\n[%x] = %x\n", 0x4eb0, 0x300);
2660
2661 FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS {
2662 err = discover_edges_real(ctrl, channel, slotrank,
2663 falling_edges[channel][slotrank]);
2664 if (err)
2665 return err;
2666 }
2667
2668 write32(DEFAULT_MCHBAR + 0x4eb0, 0x200);
2669 printram("discover rising edges:\n[%x] = %x\n", 0x4eb0, 0x200);
2670
2671 FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS {
2672 err = discover_edges_real(ctrl, channel, slotrank,
2673 rising_edges[channel][slotrank]);
2674 if (err)
2675 return err;
2676 }
2677
2678 write32(DEFAULT_MCHBAR + 0x4eb0, 0);
2679
2680 FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS FOR_ALL_LANES {
2681 ctrl->timings[channel][slotrank].lanes[lane].falling =
2682 falling_edges[channel][slotrank][lane];
2683 ctrl->timings[channel][slotrank].lanes[lane].rising =
2684 rising_edges[channel][slotrank][lane];
2685 }
2686
2687 FOR_ALL_POPULATED_CHANNELS {
2688 program_timings(ctrl, channel);
2689 }
2690
2691 FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS FOR_ALL_LANES {
2692 write32(DEFAULT_MCHBAR + 0x4080 + 0x400 * channel + 4 * lane,
2693 0);
2694 }
2695 return 0;
2696}
2697
2698static int discover_edges_write_real(ramctr_timing *ctrl, int channel,
2699 int slotrank, int *edges)
2700{
2701 int edge;
2702 u32 raw_statistics[MAX_EDGE_TIMING + 1];
2703 int statistics[MAX_EDGE_TIMING + 1];
2704 const int reg3000b24[] = { 0, 0xc, 0x2c };
2705 int lane, i;
2706 int lower[NUM_LANES];
2707 int upper[NUM_LANES];
2708 int pat;
2709
2710 FOR_ALL_LANES {
2711 lower[lane] = 0;
2712 upper[lane] = MAX_EDGE_TIMING;
2713 }
2714
2715 for (i = 0; i < 3; i++) {
2716 write32(DEFAULT_MCHBAR + 0x3000 + 0x100 * channel,
2717 reg3000b24[i] << 24);
2718 printram("[%x] = 0x%08x\n",
2719 0x3000 + 0x100 * channel, reg3000b24[i] << 24);
2720 for (pat = 0; pat < NUM_PATTERNS; pat++) {
2721 fill_pattern5(ctrl, channel, pat);
2722 write32(DEFAULT_MCHBAR + 0x4288 + 0x400 * channel, 0x1f);
2723 printram("using pattern %d\n", pat);
2724 for (edge = 0; edge <= MAX_EDGE_TIMING; edge++) {
2725 FOR_ALL_LANES {
2726 ctrl->timings[channel][slotrank].lanes[lane].
2727 rising = edge;
2728 ctrl->timings[channel][slotrank].lanes[lane].
2729 falling = edge;
2730 }
2731 program_timings(ctrl, channel);
2732
2733 FOR_ALL_LANES {
2734 write32(DEFAULT_MCHBAR + 0x4340 +
2735 0x400 * channel + 4 * lane, 0);
2736 read32(DEFAULT_MCHBAR + 0x400 * channel +
2737 4 * lane + 0x4140);
2738 }
2739 wait_428c(channel);
2740
2741 /* DRAM command ACT */
2742 write32(DEFAULT_MCHBAR + 0x4220 + 0x400 * channel,
2743 0x1f006);
2744 write32(DEFAULT_MCHBAR + 0x4230 + 0x400 * channel,
2745 0x4 | (ctrl->tRCD << 16)
2746 | (max(ctrl->tRRD, (ctrl->tFAW >> 2) + 1) <<
2747 10));
2748 write32(DEFAULT_MCHBAR + 0x4200 + 0x400 * channel,
2749 (slotrank << 24) | 0x60000);
2750 write32(DEFAULT_MCHBAR + 0x4210 + 0x400 * channel,
2751 0x240);
2752
2753 /* DRAM command WR */
2754 write32(DEFAULT_MCHBAR + 0x4224 + 0x400 * channel,
2755 0x1f201);
2756 write32(DEFAULT_MCHBAR + 0x4234 + 0x400 * channel,
2757 0x8005020 | ((ctrl->tWTR + ctrl->CWL + 8) <<
2758 16));
2759 write32(DEFAULT_MCHBAR + 0x4204 + 0x400 * channel,
2760 (slotrank << 24));
2761 write32(DEFAULT_MCHBAR + 0x4214 + 0x400 * channel,
2762 0x242);
2763
2764 /* DRAM command RD */
2765 write32(DEFAULT_MCHBAR + 0x4228 + 0x400 * channel,
2766 0x1f105);
2767 write32(DEFAULT_MCHBAR + 0x4238 + 0x400 * channel,
2768 0x4005020 | (max(ctrl->tRTP, 8) << 16));
2769 write32(DEFAULT_MCHBAR + 0x4208 + 0x400 * channel,
2770 (slotrank << 24));
2771 write32(DEFAULT_MCHBAR + 0x4218 + 0x400 * channel,
2772 0x242);
2773
2774 /* DRAM command PRE */
2775 write32(DEFAULT_MCHBAR + 0x422c + 0x400 * channel,
2776 0x1f002);
2777 write32(DEFAULT_MCHBAR + 0x423c + 0x400 * channel,
2778 0xc01 | (ctrl->tRP << 16));
2779 write32(DEFAULT_MCHBAR + 0x420c + 0x400 * channel,
2780 (slotrank << 24) | 0x60400);
2781 write32(DEFAULT_MCHBAR + 0x421c + 0x400 * channel, 0);
2782
2783 write32(DEFAULT_MCHBAR + 0x4284 + 0x400 * channel,
2784 0xc0001);
2785 wait_428c(channel);
2786 FOR_ALL_LANES {
2787 read32(DEFAULT_MCHBAR + 0x4340 +
2788 0x400 * channel + lane * 4);
2789 }
2790
2791 raw_statistics[edge] =
2792 MCHBAR32(0x436c + 0x400 * channel);
2793 }
2794 FOR_ALL_LANES {
2795 struct run rn;
2796 for (edge = 0; edge <= MAX_EDGE_TIMING; edge++)
2797 statistics[edge] =
2798 ! !(raw_statistics[edge] & (1 << lane));
2799 rn = get_longest_zero_run(statistics,
2800 MAX_EDGE_TIMING + 1);
2801 printram("edges: %d, %d, %d: 0x%02x-0x%02x-0x%02x, 0x%02x-0x%02x\n",
2802 channel, slotrank, i, rn.start, rn.middle,
2803 rn.end, rn.start + ctrl->edge_offset[i],
2804 rn.end - ctrl->edge_offset[i]);
2805 lower[lane] =
2806 max(rn.start + ctrl->edge_offset[i], lower[lane]);
2807 upper[lane] =
2808 min(rn.end - ctrl->edge_offset[i], upper[lane]);
2809 edges[lane] = (lower[lane] + upper[lane]) / 2;
2810 if (rn.all || (lower[lane] > upper[lane])) {
2811 printk(BIOS_EMERG, "edge write discovery failed: %d, %d, %d\n",
2812 channel, slotrank, lane);
2813 return MAKE_ERR;
2814 }
2815 }
2816 }
2817 }
2818
2819 write32(DEFAULT_MCHBAR + 0x3000, 0);
2820 printram("CPA\n");
2821 return 0;
2822}
2823
2824int discover_edges_write(ramctr_timing *ctrl)
2825{
2826 int falling_edges[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES];
2827 int rising_edges[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES];
2828 int channel, slotrank, lane;
2829 int err;
2830
2831 /* FIXME: under some conditions (older chipsets?) vendor BIOS sets both edges to the same value. */
2832 write32(DEFAULT_MCHBAR + 0x4eb0, 0x300);
2833 printram("discover falling edges write:\n[%x] = %x\n", 0x4eb0, 0x300);
2834
2835 FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS {
2836 err = discover_edges_write_real(ctrl, channel, slotrank,
2837 falling_edges[channel][slotrank]);
2838 if (err)
2839 return err;
2840 }
2841
2842 write32(DEFAULT_MCHBAR + 0x4eb0, 0x200);
2843 printram("discover rising edges write:\n[%x] = %x\n", 0x4eb0, 0x200);
2844
2845 FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS {
2846 err = discover_edges_write_real(ctrl, channel, slotrank,
2847 rising_edges[channel][slotrank]);
2848 if (err)
2849 return err;
2850 }
2851
2852 write32(DEFAULT_MCHBAR + 0x4eb0, 0);
2853
2854 FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS FOR_ALL_LANES {
2855 ctrl->timings[channel][slotrank].lanes[lane].falling =
2856 falling_edges[channel][slotrank][lane];
2857 ctrl->timings[channel][slotrank].lanes[lane].rising =
2858 rising_edges[channel][slotrank][lane];
2859 }
2860
2861 FOR_ALL_POPULATED_CHANNELS
2862 program_timings(ctrl, channel);
2863
2864 FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS FOR_ALL_LANES {
2865 write32(DEFAULT_MCHBAR + 0x4080 + 0x400 * channel + 4 * lane,
2866 0);
2867 }
2868 return 0;
2869}
2870
2871static void test_timC_write(ramctr_timing *ctrl, int channel, int slotrank)
2872{
2873 wait_428c(channel);
2874 /* DRAM command ACT */
2875 write32(DEFAULT_MCHBAR + 0x4220 + 0x400 * channel, 0x1f006);
2876 write32(DEFAULT_MCHBAR + 0x4230 + 0x400 * channel,
2877 (max((ctrl->tFAW >> 2) + 1, ctrl->tRRD)
2878 << 10) | (ctrl->tRCD << 16) | 4);
2879 write32(DEFAULT_MCHBAR + 0x4200 + 0x400 * channel,
2880 (slotrank << 24) | 0x60000);
2881 write32(DEFAULT_MCHBAR + 0x4210 + 0x400 * channel, 0x244);
2882
2883 /* DRAM command WR */
2884 write32(DEFAULT_MCHBAR + 0x4224 + 0x400 * channel, 0x1f201);
2885 write32(DEFAULT_MCHBAR + 0x4234 + 0x400 * channel,
2886 0x80011e0 |
2887 ((ctrl->tWTR + ctrl->CWL + 8) << 16));
2888 write32(DEFAULT_MCHBAR + 0x4204 +
2889 0x400 * channel, (slotrank << 24));
2890 write32(DEFAULT_MCHBAR + 0x4214 +
2891 0x400 * channel, 0x242);
2892
2893 /* DRAM command RD */
2894 write32(DEFAULT_MCHBAR + 0x4228 +
2895 0x400 * channel, 0x1f105);
2896 write32(DEFAULT_MCHBAR + 0x4238 +
2897 0x400 * channel,
2898 0x40011e0 | (max(ctrl->tRTP, 8) << 16));
2899 write32(DEFAULT_MCHBAR + 0x4208 +
2900 0x400 * channel, (slotrank << 24));
2901 write32(DEFAULT_MCHBAR + 0x4218 +
2902 0x400 * channel, 0x242);
2903
2904 /* DRAM command PRE */
2905 write32(DEFAULT_MCHBAR + 0x422c +
2906 0x400 * channel, 0x1f002);
2907 write32(DEFAULT_MCHBAR + 0x423c +
2908 0x400 * channel,
2909 0x1001 | (ctrl->tRP << 16));
2910 write32(DEFAULT_MCHBAR + 0x420c +
2911 0x400 * channel,
2912 (slotrank << 24) | 0x60400);
2913 write32(DEFAULT_MCHBAR + 0x421c +
2914 0x400 * channel, 0);
2915
2916 write32(DEFAULT_MCHBAR + 0x4284 +
2917 0x400 * channel, 0xc0001);
2918 wait_428c(channel);
2919}
2920
2921int discover_timC_write(ramctr_timing *ctrl)
2922{
2923 const u8 rege3c_b24[3] = { 0, 0xf, 0x2f };
2924 int i, pat;
2925
2926 int lower[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES];
2927 int upper[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES];
2928 int channel, slotrank, lane;
2929
2930 FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS FOR_ALL_LANES {
2931 lower[channel][slotrank][lane] = 0;
2932 upper[channel][slotrank][lane] = MAX_TIMC;
2933 }
2934
2935 write32(DEFAULT_MCHBAR + 0x4ea8, 1);
2936 printram("discover timC write:\n");
2937
2938 for (i = 0; i < 3; i++)
2939 FOR_ALL_POPULATED_CHANNELS {
2940 write32(DEFAULT_MCHBAR + 0xe3c + (channel * 0x100),
2941 (rege3c_b24[i] << 24)
2942 | (read32(DEFAULT_MCHBAR + 0xe3c + (channel * 0x100))
2943 & ~0x3f000000));
2944 udelay(2);
2945 for (pat = 0; pat < NUM_PATTERNS; pat++) {
2946 FOR_ALL_POPULATED_RANKS {
2947 int timC;
2948 u32 raw_statistics[MAX_TIMC + 1];
2949 int statistics[MAX_TIMC + 1];
2950
2951 /* Make sure rn.start < rn.end */
2952 statistics[MAX_TIMC] = 1;
2953
2954 fill_pattern5(ctrl, channel, pat);
2955 write32(DEFAULT_MCHBAR + 0x4288 + 0x400 * channel, 0x1f);
2956 for (timC = 0; timC < MAX_TIMC; timC++) {
2957 FOR_ALL_LANES
2958 ctrl->timings[channel][slotrank].lanes[lane].timC = timC;
2959 program_timings(ctrl, channel);
2960
2961 test_timC_write (ctrl, channel, slotrank);
2962
2963 raw_statistics[timC] =
2964 MCHBAR32(0x436c + 0x400 * channel);
2965 }
2966 FOR_ALL_LANES {
2967 struct run rn;
2968 for (timC = 0; timC < MAX_TIMC; timC++)
2969 statistics[timC] =
2970 !!(raw_statistics[timC] &
2971 (1 << lane));
2972
2973 rn = get_longest_zero_run(statistics,
2974 MAX_TIMC + 1);
2975 if (rn.all) {
2976 printk(BIOS_EMERG, "timC write discovery failed: %d, %d, %d\n",
2977 channel, slotrank, lane);
2978 return MAKE_ERR;
2979 }
2980 printram("timC: %d, %d, %d: 0x%02x-0x%02x-0x%02x, 0x%02x-0x%02x\n",
2981 channel, slotrank, i, rn.start,
2982 rn.middle, rn.end,
2983 rn.start + ctrl->timC_offset[i],
2984 rn.end - ctrl->timC_offset[i]);
2985 lower[channel][slotrank][lane] =
2986 max(rn.start + ctrl->timC_offset[i],
2987 lower[channel][slotrank][lane]);
2988 upper[channel][slotrank][lane] =
2989 min(rn.end - ctrl->timC_offset[i],
2990 upper[channel][slotrank][lane]);
2991
2992 }
2993 }
2994 }
2995 }
2996
2997 FOR_ALL_CHANNELS {
2998 write32(DEFAULT_MCHBAR + (channel * 0x100) + 0xe3c,
2999 0 | (read32(DEFAULT_MCHBAR + (channel * 0x100) + 0xe3c) &
3000 ~0x3f000000));
3001 udelay(2);
3002 }
3003
3004 write32(DEFAULT_MCHBAR + 0x4ea8, 0);
3005
3006 printram("CPB\n");
3007
3008 FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS FOR_ALL_LANES {
3009 printram("timC %d, %d, %d: %x\n", channel,
3010 slotrank, lane,
3011 (lower[channel][slotrank][lane] +
3012 upper[channel][slotrank][lane]) / 2);
3013 ctrl->timings[channel][slotrank].lanes[lane].timC =
3014 (lower[channel][slotrank][lane] +
3015 upper[channel][slotrank][lane]) / 2;
3016 }
3017 FOR_ALL_POPULATED_CHANNELS {
3018 program_timings(ctrl, channel);
3019 }
3020 return 0;
3021}
3022
3023void normalize_training(ramctr_timing * ctrl)
3024{
3025 int channel, slotrank, lane;
3026 int mat = 0;
3027
3028 FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS {
3029 int delta;
3030 FOR_ALL_LANES mat =
3031 max(ctrl->timings[channel][slotrank].lanes[lane].timA, mat);
3032 delta = (mat >> 6) - ctrl->timings[channel][slotrank].val_4028;
3033 ctrl->timings[channel][slotrank].val_4024 += delta;
3034 ctrl->timings[channel][slotrank].val_4028 += delta;
3035 }
3036
3037 FOR_ALL_POPULATED_CHANNELS {
3038 program_timings(ctrl, channel);
3039 }
3040}
3041
3042void write_controller_mr(ramctr_timing * ctrl)
3043{
3044 int channel, slotrank;
3045
3046 FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS {
3047 write32(DEFAULT_MCHBAR + 0x0004 + (channel << 8) +
3048 lane_registers[slotrank], make_mr0(ctrl, slotrank));
3049 write32(DEFAULT_MCHBAR + 0x0008 + (channel << 8) +
3050 lane_registers[slotrank],
3051 make_mr1(ctrl, slotrank, channel));
3052 }
3053}
3054
3055int channel_test(ramctr_timing *ctrl)
3056{
3057 int channel, slotrank, lane;
3058
3059 slotrank = 0;
3060 FOR_ALL_POPULATED_CHANNELS
3061 if (read32(DEFAULT_MCHBAR + 0x42a0 + (channel << 10)) & 0xa000) {
3062 printk(BIOS_EMERG, "Mini channel test failed (1): %d\n",
3063 channel);
3064 return MAKE_ERR;
3065 }
3066 FOR_ALL_POPULATED_CHANNELS {
3067 fill_pattern0(ctrl, channel, 0x12345678, 0x98765432);
3068
3069 write32(DEFAULT_MCHBAR + 0x4288 + (channel << 10), 0);
3070 }
3071
3072 for (slotrank = 0; slotrank < 4; slotrank++)
3073 FOR_ALL_CHANNELS
3074 if (ctrl->rankmap[channel] & (1 << slotrank)) {
3075 FOR_ALL_LANES {
3076 write32(DEFAULT_MCHBAR + (0x4f40 + 4 * lane), 0);
3077 write32(DEFAULT_MCHBAR + (0x4d40 + 4 * lane), 0);
3078 }
3079 wait_428c(channel);
3080 /* DRAM command ACT */
3081 write32(DEFAULT_MCHBAR + 0x4220 + (channel << 10), 0x0001f006);
3082 write32(DEFAULT_MCHBAR + 0x4230 + (channel << 10), 0x0028a004);
3083 write32(DEFAULT_MCHBAR + 0x4200 + (channel << 10),
3084 0x00060000 | (slotrank << 24));
3085 write32(DEFAULT_MCHBAR + 0x4210 + (channel << 10), 0x00000244);
3086 /* DRAM command WR */
3087 write32(DEFAULT_MCHBAR + 0x4224 + (channel << 10), 0x0001f201);
3088 write32(DEFAULT_MCHBAR + 0x4234 + (channel << 10), 0x08281064);
3089 write32(DEFAULT_MCHBAR + 0x4204 + (channel << 10),
3090 0x00000000 | (slotrank << 24));
3091 write32(DEFAULT_MCHBAR + 0x4214 + (channel << 10), 0x00000242);
3092 /* DRAM command RD */
3093 write32(DEFAULT_MCHBAR + 0x4228 + (channel << 10), 0x0001f105);
3094 write32(DEFAULT_MCHBAR + 0x4238 + (channel << 10), 0x04281064);
3095 write32(DEFAULT_MCHBAR + 0x4208 + (channel << 10),
3096 0x00000000 | (slotrank << 24));
3097 write32(DEFAULT_MCHBAR + 0x4218 + (channel << 10), 0x00000242);
3098 /* DRAM command PRE */
3099 write32(DEFAULT_MCHBAR + 0x422c + (channel << 10), 0x0001f002);
3100 write32(DEFAULT_MCHBAR + 0x423c + (channel << 10), 0x00280c01);
3101 write32(DEFAULT_MCHBAR + 0x420c + (channel << 10),
3102 0x00060400 | (slotrank << 24));
3103 write32(DEFAULT_MCHBAR + 0x421c + (channel << 10), 0x00000240);
3104 write32(DEFAULT_MCHBAR + 0x4284 + (channel << 10), 0x000c0001);
3105 wait_428c(channel);
3106 FOR_ALL_LANES
3107 if (read32(DEFAULT_MCHBAR + 0x4340 + (channel << 10) + 4 * lane)) {
3108 printk(BIOS_EMERG, "Mini channel test failed (2): %d, %d, %d\n",
3109 channel, slotrank, lane);
3110 return MAKE_ERR;
3111 }
3112 }
3113 return 0;
3114}
3115
3116void set_scrambling_seed(ramctr_timing * ctrl)
3117{
3118 int channel;
3119
3120 /* FIXME: we hardcode seeds. Do we need to use some PRNG for them?
3121 I don't think so. */
3122 static u32 seeds[NUM_CHANNELS][3] = {
3123 {0x00009a36, 0xbafcfdcf, 0x46d1ab68},
3124 {0x00028bfa, 0x53fe4b49, 0x19ed5483}
3125 };
3126 FOR_ALL_POPULATED_CHANNELS {
3127 MCHBAR32(0x4020 + 0x400 * channel) &= ~0x10000000;
3128 write32(DEFAULT_MCHBAR + 0x4034, seeds[channel][0]);
3129 write32(DEFAULT_MCHBAR + 0x403c, seeds[channel][1]);
3130 write32(DEFAULT_MCHBAR + 0x4038, seeds[channel][2]);
3131 }
3132}
3133
3134void set_4f8c(void)
3135{
3136 struct cpuid_result cpures;
3137 u32 cpu;
3138
3139 cpures = cpuid(1);
3140 cpu = (cpures.eax);
3141 if (IS_SANDY_CPU(cpu) && (IS_SANDY_CPU_D0(cpu) || IS_SANDY_CPU_D1(cpu))) {
3142 MCHBAR32(0x4f8c) = 0x141D1519;
3143 } else {
3144 MCHBAR32(0x4f8c) = 0x551D1519;
3145 }
3146}
3147
3148void prepare_training(ramctr_timing * ctrl)
3149{
3150 int channel;
3151
3152 FOR_ALL_POPULATED_CHANNELS {
3153 // Always drive command bus
3154 MCHBAR32(0x4004 + 0x400 * channel) |= 0x20000000;
3155 }
3156
3157 udelay(1);
3158
3159 FOR_ALL_POPULATED_CHANNELS {
3160 wait_428c(channel);
3161 }
3162}
3163
3164void set_4008c(ramctr_timing * ctrl)
3165{
3166 int channel, slotrank;
3167 u32 reg;
3168 FOR_ALL_POPULATED_CHANNELS {
3169 u32 b20, b4_8_12;
3170 int min_320c = 10000;
3171 int max_320c = -10000;
3172
3173 FOR_ALL_POPULATED_RANKS {
3174 max_320c = max(ctrl->timings[channel][slotrank].val_320c, max_320c);
3175 min_320c = min(ctrl->timings[channel][slotrank].val_320c, min_320c);
3176 }
3177
3178 if (max_320c - min_320c > 51)
3179 b20 = 0;
3180 else
3181 b20 = ctrl->ref_card_offset[channel];
3182
3183 if (ctrl->reg_320c_range_threshold < max_320c - min_320c)
3184 b4_8_12 = 0x3330;
3185 else
3186 b4_8_12 = 0x2220;
3187
3188 reg = read32(DEFAULT_MCHBAR + 0x400c + (channel << 10));
3189 write32(DEFAULT_MCHBAR + 0x400c + (channel << 10),
3190 (reg & 0xFFF0FFFF)
3191 | (ctrl->ref_card_offset[channel] << 16)
3192 | (ctrl->ref_card_offset[channel] << 18));
3193 write32(DEFAULT_MCHBAR + 0x4008 + (channel << 10),
3194 0x0a000000
3195 | (b20 << 20)
3196 | ((ctrl->ref_card_offset[channel] + 2) << 16)
3197 | b4_8_12);
3198 }
3199}
3200
3201void set_42a0(ramctr_timing * ctrl)
3202{
3203 int channel;
3204 FOR_ALL_POPULATED_CHANNELS {
3205 write32(DEFAULT_MCHBAR + (0x42a0 + 0x400 * channel),
3206 0x00001000 | ctrl->rankmap[channel]);
3207 MCHBAR32(0x4004 + 0x400 * channel) &= ~0x20000000; // OK
3208 }
3209}
3210
3211static int encode_5d10(int ns)
3212{
3213 return (ns + 499) / 500;
3214}
3215
3216/* FIXME: values in this function should be hardware revision-dependent. */
3217void final_registers(ramctr_timing * ctrl)
3218{
3219 int channel;
3220 int t1_cycles = 0, t1_ns = 0, t2_ns;
3221 int t3_ns;
3222 u32 r32;
3223
3224 write32(DEFAULT_MCHBAR + 0x4cd4, 0x00000046);
3225
3226 write32(DEFAULT_MCHBAR + 0x400c, (read32(DEFAULT_MCHBAR + 0x400c) & 0xFFFFCFFF) | 0x1000); // OK
3227 write32(DEFAULT_MCHBAR + 0x440c, (read32(DEFAULT_MCHBAR + 0x440c) & 0xFFFFCFFF) | 0x1000); // OK
3228 write32(DEFAULT_MCHBAR + 0x4cb0, 0x00000740);
3229 write32(DEFAULT_MCHBAR + 0x4380, 0x00000aaa); // OK
3230 write32(DEFAULT_MCHBAR + 0x4780, 0x00000aaa); // OK
3231 write32(DEFAULT_MCHBAR + 0x4f88, 0x5f7003ff); // OK
3232 write32(DEFAULT_MCHBAR + 0x5064, 0x00073000 | ctrl->reg_5064b0); // OK
3233
3234 FOR_ALL_CHANNELS {
3235 switch (ctrl->rankmap[channel]) {
3236 /* Unpopulated channel. */
3237 case 0:
3238 write32(DEFAULT_MCHBAR + 0x4384 + channel * 0x400, 0);
3239 break;
3240 /* Only single-ranked dimms. */
3241 case 1:
3242 case 4:
3243 case 5:
3244 write32(DEFAULT_MCHBAR + 0x4384 + channel * 0x400, 0x373131);
3245 break;
3246 /* Dual-ranked dimms present. */
3247 default:
3248 write32(DEFAULT_MCHBAR + 0x4384 + channel * 0x400, 0x9b6ea1);
3249 break;
3250 }
3251 }
3252
3253 write32 (DEFAULT_MCHBAR + 0x5880, 0xca9171e5);
3254 write32 (DEFAULT_MCHBAR + 0x5888,
3255 (read32 (DEFAULT_MCHBAR + 0x5888) & ~0xffffff) | 0xe4d5d0);
3256 write32 (DEFAULT_MCHBAR + 0x58a8, read32 (DEFAULT_MCHBAR + 0x58a8) & ~0x1f);
3257 write32 (DEFAULT_MCHBAR + 0x4294,
3258 (read32 (DEFAULT_MCHBAR + 0x4294) & ~0x30000)
3259 | (1 << 16));
3260 write32 (DEFAULT_MCHBAR + 0x4694,
3261 (read32 (DEFAULT_MCHBAR + 0x4694) & ~0x30000)
3262 | (1 << 16));
3263
3264 MCHBAR32(0x5030) |= 1; // OK
3265 MCHBAR32(0x5030) |= 0x80; // OK
3266 MCHBAR32(0x5f18) = 0xfa; // OK
3267
3268 /* Find a populated channel. */
3269 FOR_ALL_POPULATED_CHANNELS
3270 break;
3271
3272 t1_cycles = ((read32(DEFAULT_MCHBAR + 0x4290 + channel * 0x400) >> 8) & 0xff);
3273 r32 = read32(DEFAULT_MCHBAR + 0x5064);
3274 if (r32 & 0x20000)
3275 t1_cycles += (r32 & 0xfff);
3276 t1_cycles += (read32(DEFAULT_MCHBAR + channel * 0x400 + 0x42a4) & 0xfff);
3277 t1_ns = t1_cycles * ctrl->tCK / 256 + 544;
3278 if (!(r32 & 0x20000))
3279 t1_ns += 500;
3280
3281 t2_ns = 10 * ((read32(DEFAULT_MCHBAR + 0x5f10) >> 8) & 0xfff);
3282 if ( read32(DEFAULT_MCHBAR + 0x5f00) & 8 )
3283 {
3284 t3_ns = 10 * ((read32(DEFAULT_MCHBAR + 0x5f20) >> 8) & 0xfff);
3285 t3_ns += 10 * (read32(DEFAULT_MCHBAR + 0x5f18) & 0xff);
3286 }
3287 else
3288 {
3289 t3_ns = 500;
3290 }
3291 printk(BIOS_DEBUG, "t123: %d, %d, %d\n",
3292 t1_ns, t2_ns, t3_ns);
3293 write32 (DEFAULT_MCHBAR + 0x5d10,
3294 ((encode_5d10(t1_ns) + encode_5d10(t2_ns)) << 16)
3295 | (encode_5d10(t1_ns) << 8)
3296 | ((encode_5d10(t3_ns) + encode_5d10(t2_ns) + encode_5d10(t1_ns)) << 24)
3297 | (read32(DEFAULT_MCHBAR + 0x5d10) & 0xC0C0C0C0)
3298 | 0xc);
3299}
3300
3301void restore_timings(ramctr_timing * ctrl)
3302{
3303 int channel, slotrank, lane;
3304
3305 FOR_ALL_POPULATED_CHANNELS
3306 MCHBAR32(0x4004 + 0x400 * channel) =
3307 ctrl->tRRD
3308 | (ctrl->tRTP << 4)
3309 | (ctrl->tCKE << 8)
3310 | (ctrl->tWTR << 12)
3311 | (ctrl->tFAW << 16)
3312 | (ctrl->tWR << 24)
3313 | (ctrl->cmd_stretch[channel] << 30);
3314
3315 udelay(1);
3316
3317 FOR_ALL_POPULATED_CHANNELS {
3318 wait_428c(channel);
3319 }
3320
3321 FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS FOR_ALL_LANES {
3322 write32(DEFAULT_MCHBAR + 0x4080 + 0x400 * channel
3323 + 4 * lane, 0);
3324 }
3325
3326 FOR_ALL_POPULATED_CHANNELS
3327 write32(DEFAULT_MCHBAR + 0x4008 + 0x400 * channel,
3328 read32(DEFAULT_MCHBAR + 0x4008 +
3329 0x400 * channel) | 0x8000000);
3330
3331 FOR_ALL_POPULATED_CHANNELS {
3332 udelay (1);
3333 write32(DEFAULT_MCHBAR + 0x4020 + 0x400 * channel,
3334 read32(DEFAULT_MCHBAR + 0x4020 +
3335 0x400 * channel) | 0x200000);
3336 }
3337
3338 printram("CPE\n");
3339
3340 write32(DEFAULT_MCHBAR + 0x3400, 0);
3341 write32(DEFAULT_MCHBAR + 0x4eb0, 0);
3342
3343 printram("CP5b\n");
3344
3345 FOR_ALL_POPULATED_CHANNELS {
3346 program_timings(ctrl, channel);
3347 }
3348
3349 u32 reg, addr;
3350
3351 while (!(MCHBAR32(0x5084) & 0x10000));
3352 do {
3353 reg = MCHBAR32(0x428c);
3354 } while ((reg & 0x14) == 0);
3355
3356 // Set state of memory controller
3357 MCHBAR32(0x5030) = 0x116;
3358 MCHBAR32(0x4ea0) = 0;
3359
3360 // Wait 500us
3361 udelay(500);
3362
3363 FOR_ALL_CHANNELS {
3364 // Set valid rank CKE
3365 reg = 0;
3366 reg = (reg & ~0xf) | ctrl->rankmap[channel];
3367 addr = 0x400 * channel + 0x42a0;
3368 MCHBAR32(addr) = reg;
3369
3370 // Wait 10ns for ranks to settle
3371 //udelay(0.01);
3372
3373 reg = (reg & ~0xf0) | (ctrl->rankmap[channel] << 4);
3374 MCHBAR32(addr) = reg;
3375
3376 // Write reset using a NOP
3377 write_reset(ctrl);
3378 }
3379
3380 /* mrs commands. */
3381 dram_mrscommands(ctrl);
3382
3383 printram("CP5c\n");
3384
3385 write32(DEFAULT_MCHBAR + 0x3000, 0);
3386
3387 FOR_ALL_CHANNELS {
3388 write32(DEFAULT_MCHBAR + (channel * 0x100) + 0xe3c,
3389 0 | (read32(DEFAULT_MCHBAR + (channel * 0x100) + 0xe3c) &
3390 ~0x3f000000));
3391 udelay(2);
3392 }
3393
3394 write32(DEFAULT_MCHBAR + 0x4ea8, 0);
3395}