blob: 64131d89b80f2213ae1b95edaf46b4b5b08a8819 [file] [log] [blame]
Arthur Heymans95c48cb2017-11-04 08:07:06 +01001/*
2 * This file is part of the coreboot project.
3 *
4 * Copyright (C) 2017-2018 Arthur Heymans <arthur@aheymans.xyz>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as
8 * published by the Free Software Foundation; either version 2 of
9 * the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 */
16
17#include <arch/io.h>
18#include <console/console.h>
Arthur Heymansb5170c32017-12-25 20:13:28 +010019#include <delay.h>
Arthur Heymans95c48cb2017-11-04 08:07:06 +010020#include <stdint.h>
21#include <string.h>
22#include <types.h>
23#include "x4x.h"
24#include "iomap.h"
25
26static void print_dll_setting(const struct dll_setting *dll_setting,
27 u8 default_verbose)
28{
29 u8 debug_level = default_verbose ? BIOS_DEBUG : RAM_DEBUG;
30
31 printk(debug_level, "%d.%d.%d.%d:%d.%d\n", dll_setting->coarse,
32 dll_setting->clk_delay, dll_setting->tap,
33 dll_setting->pi, dll_setting->db_en,
34 dll_setting->db_sel);
35}
36
37struct db_limit {
38 u8 tap0;
39 u8 tap1;
40 u8 pi0;
41 u8 pi1;
42};
43
44static void set_db(const struct sysinfo *s, struct dll_setting *dq_dqs_setting)
45{
Elyes HAOUAS88607a42018-10-05 10:36:45 +020046 struct db_limit limit;
Arthur Heymans95c48cb2017-11-04 08:07:06 +010047
48 switch (s->selected_timings.mem_clk) {
49 default:
50 case MEM_CLOCK_800MHz:
51 limit.tap0 = 3;
52 limit.tap1 = 10;
53 limit.pi0 = 2;
54 limit.pi1 = 3;
55 break;
56 case MEM_CLOCK_1066MHz:
57 limit.tap0 = 2;
58 limit.tap1 = 8;
59 limit.pi0 = 6;
60 limit.pi1 = 7;
61 break;
62 case MEM_CLOCK_1333MHz:
63 limit.tap0 = 3;
64 limit.tap1 = 11;
65 /* TO CHECK: Might be reverse since this makes little sense */
66 limit.pi0 = 6;
67 limit.pi1 = 4;
68 break;
69 }
70
71 if (dq_dqs_setting->tap < limit.tap0) {
72 dq_dqs_setting->db_en = 1;
73 dq_dqs_setting->db_sel = 1;
74 } else if ((dq_dqs_setting->tap == limit.tap0)
75 && (dq_dqs_setting->pi < limit.pi0)) {
76 dq_dqs_setting->db_en = 1;
77 dq_dqs_setting->db_sel = 1;
78 } else if (dq_dqs_setting->tap < limit.tap1) {
79 dq_dqs_setting->db_en = 0;
80 dq_dqs_setting->db_sel = 0;
81 } else if ((dq_dqs_setting->tap == limit.tap1)
82 && (dq_dqs_setting->pi < limit.pi1)) {
83 dq_dqs_setting->db_en = 0;
84 dq_dqs_setting->db_sel = 0;
85 } else {
86 dq_dqs_setting->db_en = 1;
87 dq_dqs_setting->db_sel = 0;
88 }
89}
90
91const static u8 max_tap[3] = {12, 10, 13};
92
93static int increment_dq_dqs(const struct sysinfo *s,
94 struct dll_setting *dq_dqs_setting)
95{
96 u8 max_tap_val = max_tap[s->selected_timings.mem_clk
97 - MEM_CLOCK_800MHz];
98
99 if (dq_dqs_setting->pi < 6) {
100 dq_dqs_setting->pi += 1;
101 } else if (dq_dqs_setting->tap < max_tap_val) {
102 dq_dqs_setting->pi = 0;
103 dq_dqs_setting->tap += 1;
104 } else if (dq_dqs_setting->clk_delay < 2) {
105 dq_dqs_setting->pi = 0;
106 dq_dqs_setting->tap = 0;
107 dq_dqs_setting->clk_delay += 1;
108 } else if (dq_dqs_setting->coarse < 1) {
109 dq_dqs_setting->pi = 0;
110 dq_dqs_setting->tap = 0;
111 dq_dqs_setting->clk_delay -= 1;
112 dq_dqs_setting->coarse += 1;
113 } else {
114 return CB_ERR;
115 }
116 set_db(s, dq_dqs_setting);
117 return CB_SUCCESS;
118}
119
Arthur Heymansb5170c32017-12-25 20:13:28 +0100120static int decrement_dq_dqs(const struct sysinfo *s,
121 struct dll_setting *dq_dqs_setting)
122{
123 u8 max_tap_val = max_tap[s->selected_timings.mem_clk
124 - MEM_CLOCK_800MHz];
125
126 if (dq_dqs_setting->pi > 0) {
127 dq_dqs_setting->pi -= 1;
128 } else if (dq_dqs_setting->tap > 0) {
129 dq_dqs_setting->pi = 6;
130 dq_dqs_setting->tap -= 1;
131 } else if (dq_dqs_setting->clk_delay > 0) {
132 dq_dqs_setting->pi = 6;
133 dq_dqs_setting->tap = max_tap_val;
134 dq_dqs_setting->clk_delay -= 1;
135 } else if (dq_dqs_setting->coarse > 0) {
136 dq_dqs_setting->pi = 6;
137 dq_dqs_setting->tap = max_tap_val;
138 dq_dqs_setting->clk_delay += 1;
139 dq_dqs_setting->coarse -= 1;
140 } else {
141 return CB_ERR;
142 }
143 set_db(s, dq_dqs_setting);
144 return CB_SUCCESS;
145}
146
147
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100148#define WT_PATTERN_SIZE 80
149
150static const u32 write_training_schedule[WT_PATTERN_SIZE] = {
151 0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
152 0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
153 0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
154 0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
155 0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
156 0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
157 0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
158 0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
159 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
160 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
161 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
162 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
163 0x03030303, 0x04040404, 0x09090909, 0x10101010,
164 0x21212121, 0x40404040, 0x81818181, 0x00000000,
165 0x03030303, 0x04040404, 0x09090909, 0x10101010,
166 0x21212121, 0x40404040, 0x81818181, 0x00000000,
167 0xfdfdfdfd, 0xfafafafa, 0xf7f7f7f7, 0xeeeeeeee,
168 0xdfdfdfdf, 0xbebebebe, 0x7f7f7f7f, 0xfefefefe,
169 0xfdfdfdfd, 0xfafafafa, 0xf7f7f7f7, 0xeeeeeeee,
170 0xdfdfdfdf, 0xbebebebe, 0x7f7f7f7f, 0xfefefefe,
171};
172
173enum training_modes {
174 SUCCEEDING = 0,
175 FAILING = 1
176};
177
178static u8 test_dq_aligned(const struct sysinfo *s,
179 const u8 channel)
180{
181 u32 address;
182 int rank, lane;
183 u8 count, count1;
184 u8 data[8];
185 u8 lane_error = 0;
186
187 FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, channel, rank) {
188 address = test_address(channel, rank);
189 for (count = 0; count < WT_PATTERN_SIZE; count++) {
190 for (count1 = 0; count1 < WT_PATTERN_SIZE; count1++) {
191 if ((count1 % 16) == 0)
192 MCHBAR32(0xf90) = 1;
193 const u32 pattern =
194 write_training_schedule[count1];
195 write32((u32 *)address + 8 * count1, pattern);
196 write32((u32 *)address + 8 * count1 + 4,
197 pattern);
198 }
199
200 const u32 good = write_training_schedule[count];
201 write32(&data[0], read32((u32 *)address + 8 * count));
202 write32(&data[4],
203 read32((u32 *)address + 8 * count + 4));
204 FOR_EACH_BYTELANE(lane) {
205 u8 expected = (good >> ((lane % 4) * 8)) & 0xff;
206 if (data[lane] != expected)
207 lane_error |= 1 << lane;
208 }
209 }
210 }
211 return lane_error;
212}
213
214#define CONSISTENCY 10
215
216/*
217 * This function finds either failing or succeeding writes by increasing DQ.
218 * When it has found a failing or succeeding setting it will increase DQ
219 * another 10 times to make sure the result is consistent.
220 * This is probably done because lanes cannot be trained independent from
221 * each other.
222 */
223static int find_dq_limit(const struct sysinfo *s, const u8 channel,
224 struct dll_setting dq_setting[TOTAL_BYTELANES],
225 u8 dq_lim[TOTAL_BYTELANES],
226 const enum training_modes expected_result)
227{
228 int status = CB_SUCCESS;
229 int lane;
230 u8 test_result;
231 u8 pass_count[TOTAL_BYTELANES];
232 u8 succes_mask = 0xff;
233
234 printk(RAM_DEBUG, "Looking for %s writes on channel %d\n",
235 expected_result == FAILING ? "failing" : "succeeding", channel);
236 memset(pass_count, 0, sizeof(pass_count));
237
238 while(succes_mask) {
239 test_result = test_dq_aligned(s, channel);
240 FOR_EACH_BYTELANE(lane) {
241 if (((test_result >> lane) & 1) != expected_result) {
242 status = increment_dq_dqs(s, &dq_setting[lane]);
243 dqset(channel, lane, &dq_setting[lane]);
244 dq_lim[lane]++;
245 } else if (pass_count[lane] < CONSISTENCY) {
246 status = increment_dq_dqs(s, &dq_setting[lane]);
247 dqset(channel, lane, &dq_setting[lane]);
248 dq_lim[lane]++;
249 pass_count[lane]++;
250 } else if (pass_count[lane] == CONSISTENCY) {
251 succes_mask &= ~(1 << lane);
252 }
253 if (status == CB_ERR) {
254 printk(BIOS_CRIT, "Could not find a case of %s "
255 "writes on CH%d, lane %d\n",
256 expected_result == FAILING ? "failing"
257 : "succeeding", channel, lane);
258 return CB_ERR;
259 }
260 }
261 }
262 return CB_SUCCESS;
263}
264
265/*
266 * This attempts to find the ideal delay for DQ to account for the skew between
267 * the DQ and the DQS signal.
268 * The training works this way:
269 * - start from the DQS delay values (DQ is always later than DQS)
270 * - increment the DQ delay until a succeeding write is found on all bytelayes,
271 * on all ranks on a channel and save these values
272 * - again increment the DQ delay until write start to fail on all bytelanes and
273 * save that value
274 * - use the mean between the saved succeeding and failing value
275 * - note: bytelanes cannot be trained independently, so the delays need to be
276 * adjusted and tested for all of them at the same time
277 */
278int do_write_training(struct sysinfo *s)
279{
280 int i;
281 u8 channel, lane;
282 u8 dq_lower[TOTAL_BYTELANES];
283 u8 dq_upper[TOTAL_BYTELANES];
284 struct dll_setting dq_setting[TOTAL_BYTELANES];
285 u8 dq_average;
286 u32 dq_absolute;
287
288 printk(BIOS_DEBUG, "Starting DQ write training\n");
289
290 FOR_EACH_POPULATED_CHANNEL(s->dimms, channel) {
291 printk(BIOS_DEBUG, "Doing DQ write training on CH%d\n", channel);
292
293 dq_average = 0;
294 dq_absolute = 0;
295 /* Start all lanes at DQS values */
296 FOR_EACH_BYTELANE(lane) {
297 dqset(channel, lane, &s->dqs_settings[channel][lane]);
298 s->dq_settings[channel][lane] = s->dqs_settings[channel][lane];
299 }
300 memset(dq_lower, 0, sizeof(dq_lower));
301 /* Start from DQS settings */
302 memcpy(dq_setting, s->dqs_settings[channel], sizeof(dq_setting));
303
304 if (find_dq_limit(s, channel, dq_setting, dq_lower,
305 SUCCEEDING)) {
306 printk(BIOS_CRIT,
307 "Could not find working lower limit DQ setting\n");
308 return CB_ERR;
309 }
310
311 memcpy(dq_upper, dq_lower, sizeof(dq_lower));
312
313 if (find_dq_limit(s, channel, dq_setting, dq_upper,
314 FAILING)) {
315 printk(BIOS_WARNING,
316 "Could not find failing upper limit DQ setting\n");
317 return CB_ERR;
318 }
319
320 FOR_EACH_BYTELANE(lane) {
321 dq_lower[lane] -= CONSISTENCY - 1;
322 dq_upper[lane] -= CONSISTENCY - 1;
323 u8 dq_center = (dq_upper[lane] + dq_lower[lane]) / 2;
324
325 printk(RAM_DEBUG, "Centered value for DQ DLL:"
326 " ch%d, lane %d, #steps = %d\n",
327 channel, lane, dq_center);
328 for (i = 0; i < dq_center; i++) {
329 /* Should never happen */
330 if (increment_dq_dqs(s, &s->dq_settings[channel][lane])
331 == CB_ERR)
332 printk(BIOS_ERR,
333 "Huh? write training overflowed!!\n");
334 }
335 }
336
337 /* Reset DQ DLL settings and increment with centered value*/
338 printk(BIOS_DEBUG, "Final DQ timings on CH%d\n", channel);
339 FOR_EACH_BYTELANE(lane) {
340 printk(BIOS_DEBUG, "\tlane%d: ", lane);
341 print_dll_setting(&s->dq_settings[channel][lane], 1);
342 dqset(channel, lane, &s->dq_settings[channel][lane]);
343 }
344 }
345 printk(BIOS_DEBUG, "Done DQ write training\n");
346 return CB_SUCCESS;
347}
348
349#define RT_PATTERN_SIZE 40
350
351static const u32 read_training_schedule[RT_PATTERN_SIZE] = {
352 0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
353 0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
354 0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
355 0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
356 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
357 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
358 0x03030303, 0x04040404, 0x09090909, 0x10101010,
359 0x21212121, 0x40404040, 0x81818181, 0x00000000,
360 0xfdfdfdfd, 0xfafafafa, 0xf7f7f7f7, 0xeeeeeeee,
361 0xdfdfdfdf, 0xbebebebe, 0x7f7f7f7f, 0xfefefefe
362};
363
364static int rt_increment_dqs(struct rt_dqs_setting *setting)
365{
366 if (setting->pi < 7) {
367 setting->pi++;
368 } else if (setting->tap < 14) {
369 setting->pi = 0;
370 setting->tap++;
371 } else {
372 return CB_ERR;
373 }
374 return CB_SUCCESS;
375}
376
377static u8 test_dqs_aligned(const struct sysinfo *s, const u8 channel)
378{
379 int i, rank, lane;
380 volatile u8 data[8];
381 u32 address;
382 u8 bytelane_error = 0;
383
384 FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, channel, rank) {
385 address = test_address(channel, rank);
386 for (i = 0; i < RT_PATTERN_SIZE; i++) {
387 const u32 good = read_training_schedule[i];
388 write32(&data[0], read32((u32 *)address + i * 8));
389 write32(&data[4], read32((u32 *)address + i * 8 + 4));
390
391 FOR_EACH_BYTELANE(lane) {
392 if (data[lane] != (good & 0xff))
393 bytelane_error |= 1 << lane;
394 }
395 }
396 }
397 return bytelane_error;
398}
399
400static int rt_find_dqs_limit(struct sysinfo *s, u8 channel,
401 struct rt_dqs_setting dqs_setting[TOTAL_BYTELANES],
402 u8 dqs_lim[TOTAL_BYTELANES],
403 const enum training_modes expected_result)
404{
405 int lane;
406 u8 test_result;
407 int status = CB_SUCCESS;
408
409 FOR_EACH_BYTELANE(lane)
410 rt_set_dqs(channel, lane, 0, &dqs_setting[lane]);
411
412 while(status == CB_SUCCESS) {
413 test_result = test_dqs_aligned(s, channel);
414 if (test_result == (expected_result == SUCCEEDING ? 0 : 0xff))
415 return CB_SUCCESS;
416 FOR_EACH_BYTELANE(lane) {
417 if (((test_result >> lane) & 1) != expected_result) {
418 status = rt_increment_dqs(&dqs_setting[lane]);
419 dqs_lim[lane]++;
420 rt_set_dqs(channel, lane, 0, &dqs_setting[lane]);
421 }
422 }
423 }
424
425 if (expected_result == SUCCEEDING) {
426 printk(BIOS_CRIT,
427 "Could not find RT DQS setting\n");
428 return CB_ERR;
429 } else {
430 printk(RAM_DEBUG,
431 "Read succeeded over all DQS"
432 " settings, continuing\n");
433 return CB_SUCCESS;
434 }
435}
436
437#define RT_LOOPS 3
438
439/*
440 * This attempts to find the ideal delay for DQS on reads (rx).
441 * The training works this way:
442 * - start from the lowest possible delay (0) on all bytelanes
443 * - increment the DQS rx delays until a succeeding write is found on all
444 * bytelayes, on all ranks on a channel and save these values
445 * - again increment the DQS rx delay until write start to fail on all bytelanes
446 * and save that value
447 * - use the mean between the saved succeeding and failing value
448 * - note0: bytelanes cannot be trained independently, so the delays need to be
449 * adjusted and tested for all of them at the same time
450 * - note1: this memory controller appears to have per rank registers for these
451 * DQS rx delays, but only the one rank 0 seems to be used for all of them
452 */
453int do_read_training(struct sysinfo *s)
454{
455 int loop, channel, i, lane, rank;
456 u32 address, content;
457 u8 dqs_lower[TOTAL_BYTELANES];
458 u8 dqs_upper[TOTAL_BYTELANES];
459 struct rt_dqs_setting dqs_setting[TOTAL_BYTELANES];
460 u16 saved_dqs_center[TOTAL_CHANNELS][TOTAL_BYTELANES];
461
462 memset(saved_dqs_center, 0 , sizeof(saved_dqs_center));
463
464 printk(BIOS_DEBUG, "Starting DQS read training\n");
465
466 for (loop = 0; loop < RT_LOOPS; loop++) {
467 FOR_EACH_POPULATED_CHANNEL(s->dimms, channel) {
468 printk(RAM_DEBUG, "Doing DQS read training on CH%d\n",
469 channel);
470
471 /* Write pattern to strobe address */
472 FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, channel, rank) {
473 address = test_address(channel, rank);
474 for (i = 0; i < RT_PATTERN_SIZE; i++) {
475 content = read_training_schedule[i];
476 write32((u32 *)address + 8 * i, content);
477 write32((u32 *)address + 8 * i + 4, content);
478 }
479 }
480
481 memset(dqs_lower, 0, sizeof(dqs_lower));
482 memset(&dqs_setting, 0, sizeof(dqs_setting));
483 if (rt_find_dqs_limit(s, channel, dqs_setting, dqs_lower,
484 SUCCEEDING)) {
485 printk(BIOS_CRIT,
486 "Could not find working lower limit DQS setting\n");
487 return CB_ERR;
488 }
489
490 FOR_EACH_BYTELANE(lane)
491 dqs_upper[lane] = dqs_lower[lane];
492
493 if (rt_find_dqs_limit(s, channel, dqs_setting, dqs_upper,
494 FAILING)) {
495 printk(BIOS_CRIT,
496 "Could not find failing upper limit DQ setting\n");
497 return CB_ERR;
498 }
499
500 printk(RAM_DEBUG, "Centered values, loop %d:\n", loop);
501 FOR_EACH_BYTELANE(lane) {
502 u8 center = (dqs_lower[lane] + dqs_upper[lane]) / 2;
503 printk(RAM_DEBUG, "\t lane%d: #%d\n", lane, center);
504 saved_dqs_center[channel][lane] += center;
505 }
506 } /* END FOR_EACH_POPULATED_CHANNEL */
507 } /* end RT_LOOPS */
508
509 memset(s->rt_dqs, 0, sizeof(s->rt_dqs));
510
511 FOR_EACH_POPULATED_CHANNEL(s->dimms, channel) {
512 printk(BIOS_DEBUG, "Final timings on CH%d:\n", channel);
513 FOR_EACH_BYTELANE(lane) {
514 saved_dqs_center[channel][lane] /= RT_LOOPS;
515 while (saved_dqs_center[channel][lane]--) {
516 if(rt_increment_dqs(&s->rt_dqs[channel][lane])
517 == CB_ERR)
518 /* Should never happen */
519 printk(BIOS_ERR,
520 "Huh? read training overflowed!!\n");
521 }
522 FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, channel, rank)
523 rt_set_dqs(channel, lane, rank,
524 &s->rt_dqs[channel][lane]);
525 printk(BIOS_DEBUG, "\tlane%d: %d.%d\n",
526 lane, s->rt_dqs[channel][lane].tap,
527 s->rt_dqs[channel][lane].pi);
528 }
529 }
530 printk(BIOS_DEBUG, "Done DQS read training\n");
531 return CB_SUCCESS;
532}
Arthur Heymansb5170c32017-12-25 20:13:28 +0100533
534/* Enable write leveling on selected rank and disable output on other ranks */
535static void set_rank_write_level(struct sysinfo *s, u8 channel, u8 config,
536 u8 config_rank, u8 target_rank, int wl_enable)
537{
538 u32 emrs1;
539
540 /* Is shifted by bits 2 later so u8 can be used to reduce size */
541 const static u8 emrs1_lut[8][4][4]={ /* [Config][Leveling Rank][Rank] */
542 { /* Config 0: 2R2R */
543 {0x11, 0x00, 0x91, 0x00},
544 {0x00, 0x11, 0x91, 0x00},
545 {0x91, 0x00, 0x11, 0x00},
546 {0x91, 0x00, 0x00, 0x11}
547 },
548 { // Config 1: 2R1R
549 {0x11, 0x00, 0x91, 0x00},
550 {0x00, 0x11, 0x91, 0x00},
551 {0x91, 0x00, 0x11, 0x00},
552 {0x00, 0x00, 0x00, 0x00}
553 },
554 { // Config 2: 1R2R
555 {0x11, 0x00, 0x91, 0x00},
556 {0x00, 0x00, 0x00, 0x00},
557 {0x91, 0x00, 0x11, 0x00},
558 {0x91, 0x00, 0x00, 0x11}
559 },
560 { // Config 3: 1R1R
561 {0x11, 0x00, 0x91, 0x00},
562 {0x00, 0x00, 0x00, 0x00},
563 {0x91, 0x00, 0x11, 0x00},
564 {0x00, 0x00, 0x00, 0x00}
565 },
566 { // Config 4: 2R0R
567 {0x11, 0x00, 0x00, 0x00},
568 {0x00, 0x11, 0x00, 0x00},
569 {0x00, 0x00, 0x00, 0x00},
570 {0x00, 0x00, 0x00, 0x00}
571 },
572 { // Config 5: 0R2R
573 {0x00, 0x00, 0x00, 0x00},
574 {0x00, 0x00, 0x00, 0x00},
575 {0x00, 0x00, 0x11, 0x00},
576 {0x00, 0x00, 0x00, 0x11}
577 },
578 { // Config 6: 1R0R
579 {0x11, 0x00, 0x00, 0x00},
580 {0x00, 0x00, 0x00, 0x00},
581 {0x00, 0x00, 0x00, 0x00},
582 {0x00, 0x00, 0x00, 0x00}
583 },
584 { // Config 7: 0R1R
585 {0x00, 0x00, 0x00, 0x00},
586 {0x00, 0x00, 0x00, 0x00},
587 {0x00, 0x00, 0x11, 0x00},
588 {0x00, 0x00, 0x00, 0x00}
589 }
590 };
591
592 if (wl_enable) {
593 printk(RAM_DEBUG, "Entering WL mode\n");
594 printk(RAM_DEBUG, "Using WL ODT values\n");
595 emrs1 = emrs1_lut[config][target_rank][config_rank];
596 } else {
597 printk(RAM_DEBUG, "Exiting WL mode\n");
598 emrs1 = ddr3_emrs1_rtt_nom_config[s->dimm_config[channel]][config_rank];
599 }
600 printk(RAM_DEBUG, "Setting ODT for rank%d to ", config_rank);
601 switch (emrs1) {
602 case 0:
603 printk(RAM_DEBUG, "High-Z\n");
604 break;
605 case 0x11:
606 printk(RAM_DEBUG, "40 Ohm\n");
607 break;
608 case 0x81:
609 printk(RAM_DEBUG, "30 Ohm\n");
610 break;
611 case 0x80:
612 printk(RAM_DEBUG, "20 Ohm\n");
613 break;
614 case 0x10:
615 printk(RAM_DEBUG, "120 Ohm\n");
616 break;
617 case 0x01:
618 printk(RAM_DEBUG, "60 Ohm\n");
619 break;
620 default:
621 printk(BIOS_WARNING, "ODT value Undefined!\n");
622 break;
623 }
624
625 emrs1 <<= 2;
626 /* Set output drive strength to 34 Ohm during write levelling */
627 emrs1 |= (1 << 1);
628
629 if (wl_enable && (target_rank != config_rank)) {
630 printk(RAM_DEBUG, "Disabling output for rank%d\n", config_rank);
631 emrs1 |= (1 << 12);
632 }
633 if (wl_enable && (target_rank == config_rank)) {
634 printk(RAM_DEBUG, "Enabling WL for rank%d\n", config_rank);
635 emrs1 |= (1 << 7);
636 }
637 send_jedec_cmd(s, config_rank, channel, EMRS1_CMD, emrs1);
638}
639
640#define N_SAMPLES 5
641
642static void sample_dq(const struct sysinfo *s, u8 channel, u8 rank,
643 u8 high_found[8]) {
644 u32 address = test_address(channel, rank);
645 int samples, lane;
646
647 memset(high_found, 0, TOTAL_BYTELANES * sizeof(high_found[0]));
648 for (samples = 0; samples < N_SAMPLES; samples++) {
649 write32((u32 *)address, 0x12341234);
650 write32((u32 *)address + 4, 0x12341234);
651 udelay(5);
652 FOR_EACH_BYTELANE(lane) {
653 u8 dq_high = (MCHBAR8(0x561 + 0x400 * channel
654 + (lane * 4)) >> 7) & 1;
655 high_found[lane] += dq_high;
656 }
657 }
658}
659
660static enum cb_err increment_to_dqs_edge(struct sysinfo *s, u8 channel, u8 rank)
661{
662 int lane;
663 u8 saved_24d;
664 struct dll_setting dqs_setting[TOTAL_BYTELANES];
665 u8 bytelane_ok = 0;
666 u8 dq_sample[TOTAL_BYTELANES];
667
668 memcpy(dqs_setting, s->dqs_settings[channel], sizeof(dqs_setting));
669 FOR_EACH_BYTELANE(lane)
670 dqsset(channel, lane, &dqs_setting[lane]);
671
672 saved_24d = MCHBAR8(0x24d + 0x400 * channel);
673
674 /* Loop 0: Find DQ sample low, by decreasing */
675 while (bytelane_ok != 0xff) {
676 sample_dq(s, channel, rank, dq_sample);
677 FOR_EACH_BYTELANE(lane) {
678 if (bytelane_ok & (1 << lane))
679 continue;
680
681 printk(RAM_SPEW, "%d, %d, %02d, %d,"
682 " lane%d sample: %d\n",
683 dqs_setting[lane].coarse,
684 dqs_setting[lane].clk_delay,
685 dqs_setting[lane].tap,
686 dqs_setting[lane].pi,
687 lane,
688 dq_sample[lane]);
689
690 if (dq_sample[lane] > 0) {
691 if (decrement_dq_dqs(s, &dqs_setting[lane])) {
692 printk(BIOS_EMERG,
693 "DQS setting channel%d, "
694 "lane %d reached a minimum!\n",
695 channel, lane);
696 return CB_ERR;
697 }
698 } else {
699 bytelane_ok |= (1 << lane);
700 }
701 dqsset(channel, lane, &dqs_setting[lane]);
702 }
703 }
704
705 printk(RAM_DEBUG, "DQS settings on PASS #0:\n");
706 FOR_EACH_BYTELANE(lane) {
707 printk(RAM_DEBUG, "lane %d: ", lane);
708 print_dll_setting(&dqs_setting[lane], 0);
709 }
710
711 /* Loop 1: Find DQ sample high, by increasing */
712 bytelane_ok = 0;
713 while (bytelane_ok != 0xff) {
714 sample_dq(s, channel, rank, dq_sample);
715 FOR_EACH_BYTELANE(lane) {
716 if (bytelane_ok & (1 << lane))
717 continue;
718
719 printk(RAM_SPEW, "%d, %d, %02d, %d, lane%d sample: %d\n",
720 dqs_setting[lane].coarse,
721 dqs_setting[lane].clk_delay,
722 dqs_setting[lane].tap,
723 dqs_setting[lane].pi,
724 lane,
725 dq_sample[lane]);
726
727 if (dq_sample[lane] == N_SAMPLES) {
728 bytelane_ok |= (1 << lane);
729 } else {
730 if (increment_dq_dqs(s, &dqs_setting[lane])) {
731 printk(BIOS_EMERG,
732 "DQS setting channel%d, "
733 "lane %d reached a maximum!\n",
734 channel, lane);
735 return CB_ERR;
736 }
737 }
738 dqsset(channel, lane, &dqs_setting[lane]);
739 }
740 }
741
742 printk(RAM_DEBUG, "DQS settings on PASS #1:\n");
743 FOR_EACH_BYTELANE(lane) {
744 printk(RAM_DEBUG, "lane %d: ", lane);
745 print_dll_setting(&dqs_setting[lane], 0);
746 }
747
748 printk(BIOS_DEBUG, "final WL DQS settings on CH%d\n", channel);
749 FOR_EACH_BYTELANE(lane) {
750 printk(BIOS_DEBUG, "\tlane%d: ", lane);
751 print_dll_setting(&dqs_setting[lane], 1);
752 s->dqs_settings[channel][lane] = dqs_setting[lane];
753 }
754
755 MCHBAR8(0x24d + 0x400 * channel) = saved_24d;
756 return CB_SUCCESS;
757}
758
759/*
760 * DDR3 uses flyby topology where the clock signal takes a different path
761 * than the data signal, to allow for better signal intergrity.
762 * Therefore the delay on the data signals needs to account for this.
763 * This is done by by sampleling the the DQS write (tx) signal back over
764 * the DQ signal and looking for delay values where the sample transitions
765 * from high to low.
766 * Here the following is done:
767 * - enable write levelling on the first populated rank
768 * - disable output on other populated ranks
769 * - start from safe DQS (tx) delays (other transitions can be
770 * found at different starting values but are generally bad)
771 * - loop0: decrease DQS (tx) delays until low is sampled,
772 * loop1: increase DQS (tx) delays until high is sampled,
773 * That way we are sure to hit a low-high transition
774 * - put all ranks in normal mode of operation again
775 * - note: All ranks need to be leveled together
776 */
777void search_write_leveling(struct sysinfo *s)
778{
779 int i, ch, count;
780 u8 config, rank0, rank1, lane;
781 struct dll_setting dq_setting;
782
783 u8 chanconfig_lut[16]={0, 6, 4, 6, 7, 3, 1, 3, 5, 2, 0, 2, 7, 3, 1, 3};
784
785 u8 odt_force[8][4] = { /* [Config][leveling rank] */
786 {0x5, 0x6, 0x5, 0x9},
787 {0x5, 0x6, 0x5, 0x0},
788 {0x5, 0x0, 0x5, 0x9},
789 {0x5, 0x0, 0x5, 0x0},
790 {0x1, 0x2, 0x0, 0x0},
791 {0x0, 0x0, 0x4, 0x8},
792 {0x1, 0x0, 0x0, 0x0},
793 {0x0, 0x0, 0x4, 0x0}
794 };
795
796 printk(BIOS_DEBUG, "Starting write levelling.\n");
797
798 FOR_EACH_POPULATED_CHANNEL(s->dimms, ch) {
799 printk(BIOS_DEBUG, "\tCH%d\n", ch);
800 config = chanconfig_lut[s->dimm_config[ch]];
801
802 MCHBAR8(0x5d8 + 0x400 * ch) =
803 MCHBAR8(0x5d8 + 0x400 * ch) & ~0x0e;
804 MCHBAR16(0x5c4 + 0x400 * ch) = (MCHBAR16(0x5c4 + 0x400 * ch) &
805 ~0x3fff) | 0x3fff;
806 MCHBAR8(0x265 + 0x400 * ch) =
807 MCHBAR8(0x265 + 0x400 * ch) & ~0x1f;
808 /* find the first populated rank */
809 FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, ch, rank0)
810 break;
811
812 /* Enable WL for the first populated rank and disable output
813 for others */
814 FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, ch, rank1)
815 set_rank_write_level(s, ch, config, rank1, rank0, 1);
816
817 MCHBAR8(0x298 + 2 + 0x400 * ch) =
818 (MCHBAR8(0x298 + 2 + 0x400 * ch) & ~0x0f)
819 | odt_force[config][rank0];
820 MCHBAR8(0x271 + 0x400 * ch) = (MCHBAR8(0x271 + 0x400 * ch)
821 & ~0x7e) | 0x4e;
822 MCHBAR8(0x5d9 + 0x400 * ch) =
823 (MCHBAR8(0x5d9 + 0x400 * ch) & ~0x04) | 0x04;
824 MCHBAR32(0x1a0) = (MCHBAR32(0x1a0) & ~0x07ffffff)
825 | 0x00014000;
826
827 if (increment_to_dqs_edge(s, ch, rank0))
828 die("Write Leveling failed!");
829
830 MCHBAR8(0x298 + 2 + 0x400 * ch) =
831 MCHBAR8(0x298 + 2 + 0x400 * ch) & ~0x0f;
832 MCHBAR8(0x271 + 0x400 * ch) =
833 (MCHBAR8(0x271 + 0x400 * ch) & ~0x7e)
834 | 0x0e;
835 MCHBAR8(0x5d9 + 0x400 * ch) =
836 (MCHBAR8(0x5d9 + 0x400 * ch) & ~0x04);
837 MCHBAR32(0x1a0) = (MCHBAR32(0x1a0)
838 & ~0x07ffffff) | 0x00555801;
839
840 /* Disable WL on the trained rank */
841 set_rank_write_level(s, ch, config, rank0, rank0, 0);
842 send_jedec_cmd(s, rank0, ch, NORMALOP_CMD, 1 << 12);
843
844 MCHBAR8(0x5d8 + 0x400 * ch) = (MCHBAR8(0x5d8 + 0x400 * ch)
845 & ~0x0e) | 0x0e;
846 MCHBAR16(0x5c4 + 0x400 * ch) = (MCHBAR16(0x5c4 + 0x400 * ch)
847 & ~0x3fff) | 0x1807;
848 MCHBAR8(0x265 + 0x400 * ch) = MCHBAR8(0x265 + 0x400 * ch) & ~0x1f;
849
850 /* Disable write level mode for all ranks */
851 FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, ch, rank0)
852 set_rank_write_level(s, ch, config, rank0, rank0, 0);
853 }
854
855 MCHBAR8(0x5dc) = (MCHBAR8(0x5dc) & ~0x80) | 0x80;
856
857 /* Increment DQ (rx) dll setting by a standard amount past DQS,
858 This is further trained in write training. */
859 switch (s->selected_timings.mem_clk) {
860 default:
861 case MEM_CLOCK_800MHz:
862 count = 39;
863 break;
864 case MEM_CLOCK_1066MHz:
865 count = 32;
866 break;
867 case MEM_CLOCK_1333MHz:
868 count = 42;
869 break;
870 }
871
872 FOR_EACH_POPULATED_CHANNEL_AND_BYTELANE(s->dimms, ch, lane) {
873 dq_setting = s->dqs_settings[ch][lane];
874 for (i = 0; i < count; i++)
875 if (increment_dq_dqs(s, &dq_setting))
876 die("Can't further increase DQ past DQS delay");
877 dqset(ch, lane, &dq_setting);
878 }
879
880 printk(BIOS_DEBUG, "Done write levelling.\n");
881}