blob: ce184405319093af7b173d9f1bc685655c0169c1 [file] [log] [blame]
Arthur Heymans95c48cb2017-11-04 08:07:06 +01001/*
2 * This file is part of the coreboot project.
3 *
4 * Copyright (C) 2017-2018 Arthur Heymans <arthur@aheymans.xyz>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as
8 * published by the Free Software Foundation; either version 2 of
9 * the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 */
16
Kyösti Mälkki13f66502019-03-03 08:01:05 +020017#include <device/mmio.h>
Arthur Heymans95c48cb2017-11-04 08:07:06 +010018#include <console/console.h>
Arthur Heymansb5170c32017-12-25 20:13:28 +010019#include <delay.h>
Arthur Heymans95c48cb2017-11-04 08:07:06 +010020#include <stdint.h>
21#include <string.h>
22#include <types.h>
23#include "x4x.h"
24#include "iomap.h"
25
26static void print_dll_setting(const struct dll_setting *dll_setting,
27 u8 default_verbose)
28{
29 u8 debug_level = default_verbose ? BIOS_DEBUG : RAM_DEBUG;
30
31 printk(debug_level, "%d.%d.%d.%d:%d.%d\n", dll_setting->coarse,
32 dll_setting->clk_delay, dll_setting->tap,
33 dll_setting->pi, dll_setting->db_en,
34 dll_setting->db_sel);
35}
36
37struct db_limit {
38 u8 tap0;
39 u8 tap1;
40 u8 pi0;
41 u8 pi1;
42};
43
44static void set_db(const struct sysinfo *s, struct dll_setting *dq_dqs_setting)
45{
Elyes HAOUAS88607a42018-10-05 10:36:45 +020046 struct db_limit limit;
Arthur Heymans95c48cb2017-11-04 08:07:06 +010047
48 switch (s->selected_timings.mem_clk) {
49 default:
50 case MEM_CLOCK_800MHz:
51 limit.tap0 = 3;
52 limit.tap1 = 10;
53 limit.pi0 = 2;
54 limit.pi1 = 3;
55 break;
56 case MEM_CLOCK_1066MHz:
57 limit.tap0 = 2;
58 limit.tap1 = 8;
59 limit.pi0 = 6;
60 limit.pi1 = 7;
61 break;
62 case MEM_CLOCK_1333MHz:
63 limit.tap0 = 3;
64 limit.tap1 = 11;
65 /* TO CHECK: Might be reverse since this makes little sense */
66 limit.pi0 = 6;
67 limit.pi1 = 4;
68 break;
69 }
70
71 if (dq_dqs_setting->tap < limit.tap0) {
72 dq_dqs_setting->db_en = 1;
73 dq_dqs_setting->db_sel = 1;
74 } else if ((dq_dqs_setting->tap == limit.tap0)
75 && (dq_dqs_setting->pi < limit.pi0)) {
76 dq_dqs_setting->db_en = 1;
77 dq_dqs_setting->db_sel = 1;
78 } else if (dq_dqs_setting->tap < limit.tap1) {
79 dq_dqs_setting->db_en = 0;
80 dq_dqs_setting->db_sel = 0;
81 } else if ((dq_dqs_setting->tap == limit.tap1)
82 && (dq_dqs_setting->pi < limit.pi1)) {
83 dq_dqs_setting->db_en = 0;
84 dq_dqs_setting->db_sel = 0;
85 } else {
86 dq_dqs_setting->db_en = 1;
87 dq_dqs_setting->db_sel = 0;
88 }
89}
90
91const static u8 max_tap[3] = {12, 10, 13};
92
93static int increment_dq_dqs(const struct sysinfo *s,
94 struct dll_setting *dq_dqs_setting)
95{
96 u8 max_tap_val = max_tap[s->selected_timings.mem_clk
97 - MEM_CLOCK_800MHz];
98
99 if (dq_dqs_setting->pi < 6) {
100 dq_dqs_setting->pi += 1;
101 } else if (dq_dqs_setting->tap < max_tap_val) {
102 dq_dqs_setting->pi = 0;
103 dq_dqs_setting->tap += 1;
104 } else if (dq_dqs_setting->clk_delay < 2) {
105 dq_dqs_setting->pi = 0;
106 dq_dqs_setting->tap = 0;
107 dq_dqs_setting->clk_delay += 1;
108 } else if (dq_dqs_setting->coarse < 1) {
109 dq_dqs_setting->pi = 0;
110 dq_dqs_setting->tap = 0;
111 dq_dqs_setting->clk_delay -= 1;
112 dq_dqs_setting->coarse += 1;
113 } else {
114 return CB_ERR;
115 }
116 set_db(s, dq_dqs_setting);
117 return CB_SUCCESS;
118}
119
Arthur Heymansb5170c32017-12-25 20:13:28 +0100120static int decrement_dq_dqs(const struct sysinfo *s,
121 struct dll_setting *dq_dqs_setting)
122{
123 u8 max_tap_val = max_tap[s->selected_timings.mem_clk
124 - MEM_CLOCK_800MHz];
125
126 if (dq_dqs_setting->pi > 0) {
127 dq_dqs_setting->pi -= 1;
128 } else if (dq_dqs_setting->tap > 0) {
129 dq_dqs_setting->pi = 6;
130 dq_dqs_setting->tap -= 1;
131 } else if (dq_dqs_setting->clk_delay > 0) {
132 dq_dqs_setting->pi = 6;
133 dq_dqs_setting->tap = max_tap_val;
134 dq_dqs_setting->clk_delay -= 1;
135 } else if (dq_dqs_setting->coarse > 0) {
136 dq_dqs_setting->pi = 6;
137 dq_dqs_setting->tap = max_tap_val;
138 dq_dqs_setting->clk_delay += 1;
139 dq_dqs_setting->coarse -= 1;
140 } else {
141 return CB_ERR;
142 }
143 set_db(s, dq_dqs_setting);
144 return CB_SUCCESS;
145}
146
147
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100148#define WT_PATTERN_SIZE 80
149
150static const u32 write_training_schedule[WT_PATTERN_SIZE] = {
151 0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
152 0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
153 0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
154 0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
155 0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
156 0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
157 0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
158 0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
159 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
160 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
161 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
162 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
163 0x03030303, 0x04040404, 0x09090909, 0x10101010,
164 0x21212121, 0x40404040, 0x81818181, 0x00000000,
165 0x03030303, 0x04040404, 0x09090909, 0x10101010,
166 0x21212121, 0x40404040, 0x81818181, 0x00000000,
167 0xfdfdfdfd, 0xfafafafa, 0xf7f7f7f7, 0xeeeeeeee,
168 0xdfdfdfdf, 0xbebebebe, 0x7f7f7f7f, 0xfefefefe,
169 0xfdfdfdfd, 0xfafafafa, 0xf7f7f7f7, 0xeeeeeeee,
170 0xdfdfdfdf, 0xbebebebe, 0x7f7f7f7f, 0xfefefefe,
171};
172
173enum training_modes {
174 SUCCEEDING = 0,
175 FAILING = 1
176};
177
178static u8 test_dq_aligned(const struct sysinfo *s,
179 const u8 channel)
180{
181 u32 address;
182 int rank, lane;
183 u8 count, count1;
184 u8 data[8];
185 u8 lane_error = 0;
186
187 FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, channel, rank) {
188 address = test_address(channel, rank);
189 for (count = 0; count < WT_PATTERN_SIZE; count++) {
190 for (count1 = 0; count1 < WT_PATTERN_SIZE; count1++) {
191 if ((count1 % 16) == 0)
192 MCHBAR32(0xf90) = 1;
193 const u32 pattern =
194 write_training_schedule[count1];
195 write32((u32 *)address + 8 * count1, pattern);
196 write32((u32 *)address + 8 * count1 + 4,
197 pattern);
198 }
199
200 const u32 good = write_training_schedule[count];
201 write32(&data[0], read32((u32 *)address + 8 * count));
202 write32(&data[4],
203 read32((u32 *)address + 8 * count + 4));
204 FOR_EACH_BYTELANE(lane) {
205 u8 expected = (good >> ((lane % 4) * 8)) & 0xff;
206 if (data[lane] != expected)
207 lane_error |= 1 << lane;
208 }
209 }
210 }
211 return lane_error;
212}
213
214#define CONSISTENCY 10
215
216/*
217 * This function finds either failing or succeeding writes by increasing DQ.
218 * When it has found a failing or succeeding setting it will increase DQ
219 * another 10 times to make sure the result is consistent.
220 * This is probably done because lanes cannot be trained independent from
221 * each other.
222 */
223static int find_dq_limit(const struct sysinfo *s, const u8 channel,
224 struct dll_setting dq_setting[TOTAL_BYTELANES],
225 u8 dq_lim[TOTAL_BYTELANES],
226 const enum training_modes expected_result)
227{
228 int status = CB_SUCCESS;
229 int lane;
230 u8 test_result;
231 u8 pass_count[TOTAL_BYTELANES];
232 u8 succes_mask = 0xff;
233
234 printk(RAM_DEBUG, "Looking for %s writes on channel %d\n",
235 expected_result == FAILING ? "failing" : "succeeding", channel);
236 memset(pass_count, 0, sizeof(pass_count));
237
238 while(succes_mask) {
239 test_result = test_dq_aligned(s, channel);
240 FOR_EACH_BYTELANE(lane) {
241 if (((test_result >> lane) & 1) != expected_result) {
242 status = increment_dq_dqs(s, &dq_setting[lane]);
243 dqset(channel, lane, &dq_setting[lane]);
244 dq_lim[lane]++;
245 } else if (pass_count[lane] < CONSISTENCY) {
246 status = increment_dq_dqs(s, &dq_setting[lane]);
247 dqset(channel, lane, &dq_setting[lane]);
248 dq_lim[lane]++;
249 pass_count[lane]++;
250 } else if (pass_count[lane] == CONSISTENCY) {
251 succes_mask &= ~(1 << lane);
252 }
253 if (status == CB_ERR) {
254 printk(BIOS_CRIT, "Could not find a case of %s "
255 "writes on CH%d, lane %d\n",
256 expected_result == FAILING ? "failing"
257 : "succeeding", channel, lane);
258 return CB_ERR;
259 }
260 }
261 }
262 return CB_SUCCESS;
263}
264
265/*
266 * This attempts to find the ideal delay for DQ to account for the skew between
267 * the DQ and the DQS signal.
268 * The training works this way:
269 * - start from the DQS delay values (DQ is always later than DQS)
270 * - increment the DQ delay until a succeeding write is found on all bytelayes,
271 * on all ranks on a channel and save these values
272 * - again increment the DQ delay until write start to fail on all bytelanes and
273 * save that value
274 * - use the mean between the saved succeeding and failing value
275 * - note: bytelanes cannot be trained independently, so the delays need to be
276 * adjusted and tested for all of them at the same time
277 */
278int do_write_training(struct sysinfo *s)
279{
280 int i;
281 u8 channel, lane;
282 u8 dq_lower[TOTAL_BYTELANES];
283 u8 dq_upper[TOTAL_BYTELANES];
284 struct dll_setting dq_setting[TOTAL_BYTELANES];
285 u8 dq_average;
286 u32 dq_absolute;
287
288 printk(BIOS_DEBUG, "Starting DQ write training\n");
289
290 FOR_EACH_POPULATED_CHANNEL(s->dimms, channel) {
291 printk(BIOS_DEBUG, "Doing DQ write training on CH%d\n", channel);
292
293 dq_average = 0;
294 dq_absolute = 0;
295 /* Start all lanes at DQS values */
296 FOR_EACH_BYTELANE(lane) {
297 dqset(channel, lane, &s->dqs_settings[channel][lane]);
298 s->dq_settings[channel][lane] = s->dqs_settings[channel][lane];
299 }
300 memset(dq_lower, 0, sizeof(dq_lower));
301 /* Start from DQS settings */
302 memcpy(dq_setting, s->dqs_settings[channel], sizeof(dq_setting));
303
304 if (find_dq_limit(s, channel, dq_setting, dq_lower,
305 SUCCEEDING)) {
306 printk(BIOS_CRIT,
307 "Could not find working lower limit DQ setting\n");
308 return CB_ERR;
309 }
310
311 memcpy(dq_upper, dq_lower, sizeof(dq_lower));
312
313 if (find_dq_limit(s, channel, dq_setting, dq_upper,
314 FAILING)) {
315 printk(BIOS_WARNING,
316 "Could not find failing upper limit DQ setting\n");
317 return CB_ERR;
318 }
319
320 FOR_EACH_BYTELANE(lane) {
321 dq_lower[lane] -= CONSISTENCY - 1;
322 dq_upper[lane] -= CONSISTENCY - 1;
323 u8 dq_center = (dq_upper[lane] + dq_lower[lane]) / 2;
324
325 printk(RAM_DEBUG, "Centered value for DQ DLL:"
326 " ch%d, lane %d, #steps = %d\n",
327 channel, lane, dq_center);
328 for (i = 0; i < dq_center; i++) {
329 /* Should never happen */
330 if (increment_dq_dqs(s, &s->dq_settings[channel][lane])
331 == CB_ERR)
332 printk(BIOS_ERR,
333 "Huh? write training overflowed!!\n");
334 }
335 }
336
337 /* Reset DQ DLL settings and increment with centered value*/
338 printk(BIOS_DEBUG, "Final DQ timings on CH%d\n", channel);
339 FOR_EACH_BYTELANE(lane) {
340 printk(BIOS_DEBUG, "\tlane%d: ", lane);
341 print_dll_setting(&s->dq_settings[channel][lane], 1);
342 dqset(channel, lane, &s->dq_settings[channel][lane]);
343 }
344 }
345 printk(BIOS_DEBUG, "Done DQ write training\n");
346 return CB_SUCCESS;
347}
348
349#define RT_PATTERN_SIZE 40
350
351static const u32 read_training_schedule[RT_PATTERN_SIZE] = {
352 0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
353 0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
354 0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
355 0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
356 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
357 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
358 0x03030303, 0x04040404, 0x09090909, 0x10101010,
359 0x21212121, 0x40404040, 0x81818181, 0x00000000,
360 0xfdfdfdfd, 0xfafafafa, 0xf7f7f7f7, 0xeeeeeeee,
361 0xdfdfdfdf, 0xbebebebe, 0x7f7f7f7f, 0xfefefefe
362};
363
364static int rt_increment_dqs(struct rt_dqs_setting *setting)
365{
366 if (setting->pi < 7) {
367 setting->pi++;
368 } else if (setting->tap < 14) {
369 setting->pi = 0;
370 setting->tap++;
371 } else {
372 return CB_ERR;
373 }
374 return CB_SUCCESS;
375}
376
377static u8 test_dqs_aligned(const struct sysinfo *s, const u8 channel)
378{
379 int i, rank, lane;
380 volatile u8 data[8];
381 u32 address;
382 u8 bytelane_error = 0;
383
384 FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, channel, rank) {
385 address = test_address(channel, rank);
386 for (i = 0; i < RT_PATTERN_SIZE; i++) {
387 const u32 good = read_training_schedule[i];
388 write32(&data[0], read32((u32 *)address + i * 8));
389 write32(&data[4], read32((u32 *)address + i * 8 + 4));
390
391 FOR_EACH_BYTELANE(lane) {
392 if (data[lane] != (good & 0xff))
393 bytelane_error |= 1 << lane;
394 }
395 }
396 }
397 return bytelane_error;
398}
399
400static int rt_find_dqs_limit(struct sysinfo *s, u8 channel,
401 struct rt_dqs_setting dqs_setting[TOTAL_BYTELANES],
402 u8 dqs_lim[TOTAL_BYTELANES],
403 const enum training_modes expected_result)
404{
405 int lane;
406 u8 test_result;
407 int status = CB_SUCCESS;
408
409 FOR_EACH_BYTELANE(lane)
410 rt_set_dqs(channel, lane, 0, &dqs_setting[lane]);
411
412 while(status == CB_SUCCESS) {
413 test_result = test_dqs_aligned(s, channel);
414 if (test_result == (expected_result == SUCCEEDING ? 0 : 0xff))
415 return CB_SUCCESS;
416 FOR_EACH_BYTELANE(lane) {
417 if (((test_result >> lane) & 1) != expected_result) {
418 status = rt_increment_dqs(&dqs_setting[lane]);
419 dqs_lim[lane]++;
420 rt_set_dqs(channel, lane, 0, &dqs_setting[lane]);
421 }
422 }
423 }
424
425 if (expected_result == SUCCEEDING) {
426 printk(BIOS_CRIT,
427 "Could not find RT DQS setting\n");
428 return CB_ERR;
429 } else {
430 printk(RAM_DEBUG,
431 "Read succeeded over all DQS"
432 " settings, continuing\n");
433 return CB_SUCCESS;
434 }
435}
436
437#define RT_LOOPS 3
438
439/*
440 * This attempts to find the ideal delay for DQS on reads (rx).
441 * The training works this way:
442 * - start from the lowest possible delay (0) on all bytelanes
443 * - increment the DQS rx delays until a succeeding write is found on all
444 * bytelayes, on all ranks on a channel and save these values
445 * - again increment the DQS rx delay until write start to fail on all bytelanes
446 * and save that value
447 * - use the mean between the saved succeeding and failing value
448 * - note0: bytelanes cannot be trained independently, so the delays need to be
449 * adjusted and tested for all of them at the same time
Arthur Heymans8ddd7d12018-09-11 22:26:13 +0200450 * - note1: At this stage all ranks effectively use the rank0's rt_dqs settings,
451 * but later on their respective settings are used (TODO where is the
452 * 'switch' register??). So programming the results for all ranks at the end
453 * of the training. Programming on all ranks instead of all populated ranks,
454 * seems to be required, most likely because the signals can't really be generated
455 * separately.
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100456 */
457int do_read_training(struct sysinfo *s)
458{
459 int loop, channel, i, lane, rank;
460 u32 address, content;
461 u8 dqs_lower[TOTAL_BYTELANES];
462 u8 dqs_upper[TOTAL_BYTELANES];
463 struct rt_dqs_setting dqs_setting[TOTAL_BYTELANES];
464 u16 saved_dqs_center[TOTAL_CHANNELS][TOTAL_BYTELANES];
465
Elyes HAOUASa342f392018-10-17 10:56:26 +0200466 memset(saved_dqs_center, 0, sizeof(saved_dqs_center));
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100467
468 printk(BIOS_DEBUG, "Starting DQS read training\n");
469
470 for (loop = 0; loop < RT_LOOPS; loop++) {
471 FOR_EACH_POPULATED_CHANNEL(s->dimms, channel) {
472 printk(RAM_DEBUG, "Doing DQS read training on CH%d\n",
473 channel);
474
475 /* Write pattern to strobe address */
476 FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, channel, rank) {
477 address = test_address(channel, rank);
478 for (i = 0; i < RT_PATTERN_SIZE; i++) {
479 content = read_training_schedule[i];
480 write32((u32 *)address + 8 * i, content);
481 write32((u32 *)address + 8 * i + 4, content);
482 }
483 }
484
485 memset(dqs_lower, 0, sizeof(dqs_lower));
486 memset(&dqs_setting, 0, sizeof(dqs_setting));
487 if (rt_find_dqs_limit(s, channel, dqs_setting, dqs_lower,
488 SUCCEEDING)) {
489 printk(BIOS_CRIT,
490 "Could not find working lower limit DQS setting\n");
491 return CB_ERR;
492 }
493
494 FOR_EACH_BYTELANE(lane)
495 dqs_upper[lane] = dqs_lower[lane];
496
497 if (rt_find_dqs_limit(s, channel, dqs_setting, dqs_upper,
498 FAILING)) {
499 printk(BIOS_CRIT,
500 "Could not find failing upper limit DQ setting\n");
501 return CB_ERR;
502 }
503
504 printk(RAM_DEBUG, "Centered values, loop %d:\n", loop);
505 FOR_EACH_BYTELANE(lane) {
506 u8 center = (dqs_lower[lane] + dqs_upper[lane]) / 2;
507 printk(RAM_DEBUG, "\t lane%d: #%d\n", lane, center);
508 saved_dqs_center[channel][lane] += center;
509 }
510 } /* END FOR_EACH_POPULATED_CHANNEL */
511 } /* end RT_LOOPS */
512
513 memset(s->rt_dqs, 0, sizeof(s->rt_dqs));
514
515 FOR_EACH_POPULATED_CHANNEL(s->dimms, channel) {
516 printk(BIOS_DEBUG, "Final timings on CH%d:\n", channel);
517 FOR_EACH_BYTELANE(lane) {
518 saved_dqs_center[channel][lane] /= RT_LOOPS;
519 while (saved_dqs_center[channel][lane]--) {
520 if(rt_increment_dqs(&s->rt_dqs[channel][lane])
521 == CB_ERR)
522 /* Should never happen */
523 printk(BIOS_ERR,
524 "Huh? read training overflowed!!\n");
525 }
Arthur Heymans8ddd7d12018-09-11 22:26:13 +0200526 /* Later on separate settings for each rank are used so program
527 all of them */
528 FOR_EACH_RANK_IN_CHANNEL(rank)
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100529 rt_set_dqs(channel, lane, rank,
530 &s->rt_dqs[channel][lane]);
531 printk(BIOS_DEBUG, "\tlane%d: %d.%d\n",
532 lane, s->rt_dqs[channel][lane].tap,
533 s->rt_dqs[channel][lane].pi);
534 }
535 }
536 printk(BIOS_DEBUG, "Done DQS read training\n");
537 return CB_SUCCESS;
538}
Arthur Heymansb5170c32017-12-25 20:13:28 +0100539
540/* Enable write leveling on selected rank and disable output on other ranks */
541static void set_rank_write_level(struct sysinfo *s, u8 channel, u8 config,
542 u8 config_rank, u8 target_rank, int wl_enable)
543{
544 u32 emrs1;
545
546 /* Is shifted by bits 2 later so u8 can be used to reduce size */
547 const static u8 emrs1_lut[8][4][4]={ /* [Config][Leveling Rank][Rank] */
548 { /* Config 0: 2R2R */
549 {0x11, 0x00, 0x91, 0x00},
550 {0x00, 0x11, 0x91, 0x00},
551 {0x91, 0x00, 0x11, 0x00},
552 {0x91, 0x00, 0x00, 0x11}
553 },
554 { // Config 1: 2R1R
555 {0x11, 0x00, 0x91, 0x00},
556 {0x00, 0x11, 0x91, 0x00},
557 {0x91, 0x00, 0x11, 0x00},
558 {0x00, 0x00, 0x00, 0x00}
559 },
560 { // Config 2: 1R2R
561 {0x11, 0x00, 0x91, 0x00},
562 {0x00, 0x00, 0x00, 0x00},
563 {0x91, 0x00, 0x11, 0x00},
564 {0x91, 0x00, 0x00, 0x11}
565 },
566 { // Config 3: 1R1R
567 {0x11, 0x00, 0x91, 0x00},
568 {0x00, 0x00, 0x00, 0x00},
569 {0x91, 0x00, 0x11, 0x00},
570 {0x00, 0x00, 0x00, 0x00}
571 },
572 { // Config 4: 2R0R
573 {0x11, 0x00, 0x00, 0x00},
574 {0x00, 0x11, 0x00, 0x00},
575 {0x00, 0x00, 0x00, 0x00},
576 {0x00, 0x00, 0x00, 0x00}
577 },
578 { // Config 5: 0R2R
579 {0x00, 0x00, 0x00, 0x00},
580 {0x00, 0x00, 0x00, 0x00},
581 {0x00, 0x00, 0x11, 0x00},
582 {0x00, 0x00, 0x00, 0x11}
583 },
584 { // Config 6: 1R0R
585 {0x11, 0x00, 0x00, 0x00},
586 {0x00, 0x00, 0x00, 0x00},
587 {0x00, 0x00, 0x00, 0x00},
588 {0x00, 0x00, 0x00, 0x00}
589 },
590 { // Config 7: 0R1R
591 {0x00, 0x00, 0x00, 0x00},
592 {0x00, 0x00, 0x00, 0x00},
593 {0x00, 0x00, 0x11, 0x00},
594 {0x00, 0x00, 0x00, 0x00}
595 }
596 };
597
598 if (wl_enable) {
599 printk(RAM_DEBUG, "Entering WL mode\n");
600 printk(RAM_DEBUG, "Using WL ODT values\n");
601 emrs1 = emrs1_lut[config][target_rank][config_rank];
602 } else {
603 printk(RAM_DEBUG, "Exiting WL mode\n");
604 emrs1 = ddr3_emrs1_rtt_nom_config[s->dimm_config[channel]][config_rank];
605 }
606 printk(RAM_DEBUG, "Setting ODT for rank%d to ", config_rank);
607 switch (emrs1) {
608 case 0:
609 printk(RAM_DEBUG, "High-Z\n");
610 break;
611 case 0x11:
612 printk(RAM_DEBUG, "40 Ohm\n");
613 break;
614 case 0x81:
615 printk(RAM_DEBUG, "30 Ohm\n");
616 break;
617 case 0x80:
618 printk(RAM_DEBUG, "20 Ohm\n");
619 break;
620 case 0x10:
621 printk(RAM_DEBUG, "120 Ohm\n");
622 break;
623 case 0x01:
624 printk(RAM_DEBUG, "60 Ohm\n");
625 break;
626 default:
627 printk(BIOS_WARNING, "ODT value Undefined!\n");
628 break;
629 }
630
631 emrs1 <<= 2;
632 /* Set output drive strength to 34 Ohm during write levelling */
633 emrs1 |= (1 << 1);
634
635 if (wl_enable && (target_rank != config_rank)) {
636 printk(RAM_DEBUG, "Disabling output for rank%d\n", config_rank);
637 emrs1 |= (1 << 12);
638 }
639 if (wl_enable && (target_rank == config_rank)) {
640 printk(RAM_DEBUG, "Enabling WL for rank%d\n", config_rank);
641 emrs1 |= (1 << 7);
642 }
643 send_jedec_cmd(s, config_rank, channel, EMRS1_CMD, emrs1);
644}
645
646#define N_SAMPLES 5
647
648static void sample_dq(const struct sysinfo *s, u8 channel, u8 rank,
649 u8 high_found[8]) {
650 u32 address = test_address(channel, rank);
651 int samples, lane;
652
653 memset(high_found, 0, TOTAL_BYTELANES * sizeof(high_found[0]));
654 for (samples = 0; samples < N_SAMPLES; samples++) {
655 write32((u32 *)address, 0x12341234);
656 write32((u32 *)address + 4, 0x12341234);
657 udelay(5);
658 FOR_EACH_BYTELANE(lane) {
659 u8 dq_high = (MCHBAR8(0x561 + 0x400 * channel
660 + (lane * 4)) >> 7) & 1;
661 high_found[lane] += dq_high;
662 }
663 }
664}
665
666static enum cb_err increment_to_dqs_edge(struct sysinfo *s, u8 channel, u8 rank)
667{
668 int lane;
669 u8 saved_24d;
670 struct dll_setting dqs_setting[TOTAL_BYTELANES];
671 u8 bytelane_ok = 0;
672 u8 dq_sample[TOTAL_BYTELANES];
673
674 memcpy(dqs_setting, s->dqs_settings[channel], sizeof(dqs_setting));
675 FOR_EACH_BYTELANE(lane)
676 dqsset(channel, lane, &dqs_setting[lane]);
677
678 saved_24d = MCHBAR8(0x24d + 0x400 * channel);
679
680 /* Loop 0: Find DQ sample low, by decreasing */
681 while (bytelane_ok != 0xff) {
682 sample_dq(s, channel, rank, dq_sample);
683 FOR_EACH_BYTELANE(lane) {
684 if (bytelane_ok & (1 << lane))
685 continue;
686
687 printk(RAM_SPEW, "%d, %d, %02d, %d,"
688 " lane%d sample: %d\n",
689 dqs_setting[lane].coarse,
690 dqs_setting[lane].clk_delay,
691 dqs_setting[lane].tap,
692 dqs_setting[lane].pi,
693 lane,
694 dq_sample[lane]);
695
696 if (dq_sample[lane] > 0) {
697 if (decrement_dq_dqs(s, &dqs_setting[lane])) {
698 printk(BIOS_EMERG,
699 "DQS setting channel%d, "
700 "lane %d reached a minimum!\n",
701 channel, lane);
702 return CB_ERR;
703 }
704 } else {
705 bytelane_ok |= (1 << lane);
706 }
707 dqsset(channel, lane, &dqs_setting[lane]);
708 }
709 }
710
711 printk(RAM_DEBUG, "DQS settings on PASS #0:\n");
712 FOR_EACH_BYTELANE(lane) {
713 printk(RAM_DEBUG, "lane %d: ", lane);
714 print_dll_setting(&dqs_setting[lane], 0);
715 }
716
717 /* Loop 1: Find DQ sample high, by increasing */
718 bytelane_ok = 0;
719 while (bytelane_ok != 0xff) {
720 sample_dq(s, channel, rank, dq_sample);
721 FOR_EACH_BYTELANE(lane) {
722 if (bytelane_ok & (1 << lane))
723 continue;
724
725 printk(RAM_SPEW, "%d, %d, %02d, %d, lane%d sample: %d\n",
726 dqs_setting[lane].coarse,
727 dqs_setting[lane].clk_delay,
728 dqs_setting[lane].tap,
729 dqs_setting[lane].pi,
730 lane,
731 dq_sample[lane]);
732
733 if (dq_sample[lane] == N_SAMPLES) {
734 bytelane_ok |= (1 << lane);
735 } else {
736 if (increment_dq_dqs(s, &dqs_setting[lane])) {
737 printk(BIOS_EMERG,
738 "DQS setting channel%d, "
739 "lane %d reached a maximum!\n",
740 channel, lane);
741 return CB_ERR;
742 }
743 }
744 dqsset(channel, lane, &dqs_setting[lane]);
745 }
746 }
747
748 printk(RAM_DEBUG, "DQS settings on PASS #1:\n");
749 FOR_EACH_BYTELANE(lane) {
750 printk(RAM_DEBUG, "lane %d: ", lane);
751 print_dll_setting(&dqs_setting[lane], 0);
752 }
753
754 printk(BIOS_DEBUG, "final WL DQS settings on CH%d\n", channel);
755 FOR_EACH_BYTELANE(lane) {
756 printk(BIOS_DEBUG, "\tlane%d: ", lane);
757 print_dll_setting(&dqs_setting[lane], 1);
758 s->dqs_settings[channel][lane] = dqs_setting[lane];
759 }
760
761 MCHBAR8(0x24d + 0x400 * channel) = saved_24d;
762 return CB_SUCCESS;
763}
764
765/*
766 * DDR3 uses flyby topology where the clock signal takes a different path
767 * than the data signal, to allow for better signal intergrity.
768 * Therefore the delay on the data signals needs to account for this.
769 * This is done by by sampleling the the DQS write (tx) signal back over
770 * the DQ signal and looking for delay values where the sample transitions
771 * from high to low.
772 * Here the following is done:
773 * - enable write levelling on the first populated rank
774 * - disable output on other populated ranks
775 * - start from safe DQS (tx) delays (other transitions can be
776 * found at different starting values but are generally bad)
777 * - loop0: decrease DQS (tx) delays until low is sampled,
778 * loop1: increase DQS (tx) delays until high is sampled,
779 * That way we are sure to hit a low-high transition
780 * - put all ranks in normal mode of operation again
781 * - note: All ranks need to be leveled together
782 */
783void search_write_leveling(struct sysinfo *s)
784{
785 int i, ch, count;
786 u8 config, rank0, rank1, lane;
787 struct dll_setting dq_setting;
788
789 u8 chanconfig_lut[16]={0, 6, 4, 6, 7, 3, 1, 3, 5, 2, 0, 2, 7, 3, 1, 3};
790
791 u8 odt_force[8][4] = { /* [Config][leveling rank] */
792 {0x5, 0x6, 0x5, 0x9},
793 {0x5, 0x6, 0x5, 0x0},
794 {0x5, 0x0, 0x5, 0x9},
795 {0x5, 0x0, 0x5, 0x0},
796 {0x1, 0x2, 0x0, 0x0},
797 {0x0, 0x0, 0x4, 0x8},
798 {0x1, 0x0, 0x0, 0x0},
799 {0x0, 0x0, 0x4, 0x0}
800 };
801
802 printk(BIOS_DEBUG, "Starting write levelling.\n");
803
804 FOR_EACH_POPULATED_CHANNEL(s->dimms, ch) {
805 printk(BIOS_DEBUG, "\tCH%d\n", ch);
806 config = chanconfig_lut[s->dimm_config[ch]];
807
808 MCHBAR8(0x5d8 + 0x400 * ch) =
809 MCHBAR8(0x5d8 + 0x400 * ch) & ~0x0e;
810 MCHBAR16(0x5c4 + 0x400 * ch) = (MCHBAR16(0x5c4 + 0x400 * ch) &
811 ~0x3fff) | 0x3fff;
812 MCHBAR8(0x265 + 0x400 * ch) =
813 MCHBAR8(0x265 + 0x400 * ch) & ~0x1f;
814 /* find the first populated rank */
815 FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, ch, rank0)
816 break;
817
818 /* Enable WL for the first populated rank and disable output
819 for others */
820 FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, ch, rank1)
821 set_rank_write_level(s, ch, config, rank1, rank0, 1);
822
823 MCHBAR8(0x298 + 2 + 0x400 * ch) =
824 (MCHBAR8(0x298 + 2 + 0x400 * ch) & ~0x0f)
825 | odt_force[config][rank0];
826 MCHBAR8(0x271 + 0x400 * ch) = (MCHBAR8(0x271 + 0x400 * ch)
827 & ~0x7e) | 0x4e;
828 MCHBAR8(0x5d9 + 0x400 * ch) =
829 (MCHBAR8(0x5d9 + 0x400 * ch) & ~0x04) | 0x04;
830 MCHBAR32(0x1a0) = (MCHBAR32(0x1a0) & ~0x07ffffff)
831 | 0x00014000;
832
833 if (increment_to_dqs_edge(s, ch, rank0))
834 die("Write Leveling failed!");
835
836 MCHBAR8(0x298 + 2 + 0x400 * ch) =
837 MCHBAR8(0x298 + 2 + 0x400 * ch) & ~0x0f;
838 MCHBAR8(0x271 + 0x400 * ch) =
839 (MCHBAR8(0x271 + 0x400 * ch) & ~0x7e)
840 | 0x0e;
841 MCHBAR8(0x5d9 + 0x400 * ch) =
842 (MCHBAR8(0x5d9 + 0x400 * ch) & ~0x04);
843 MCHBAR32(0x1a0) = (MCHBAR32(0x1a0)
844 & ~0x07ffffff) | 0x00555801;
845
846 /* Disable WL on the trained rank */
847 set_rank_write_level(s, ch, config, rank0, rank0, 0);
848 send_jedec_cmd(s, rank0, ch, NORMALOP_CMD, 1 << 12);
849
850 MCHBAR8(0x5d8 + 0x400 * ch) = (MCHBAR8(0x5d8 + 0x400 * ch)
851 & ~0x0e) | 0x0e;
852 MCHBAR16(0x5c4 + 0x400 * ch) = (MCHBAR16(0x5c4 + 0x400 * ch)
853 & ~0x3fff) | 0x1807;
854 MCHBAR8(0x265 + 0x400 * ch) = MCHBAR8(0x265 + 0x400 * ch) & ~0x1f;
855
856 /* Disable write level mode for all ranks */
857 FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, ch, rank0)
858 set_rank_write_level(s, ch, config, rank0, rank0, 0);
859 }
860
861 MCHBAR8(0x5dc) = (MCHBAR8(0x5dc) & ~0x80) | 0x80;
862
863 /* Increment DQ (rx) dll setting by a standard amount past DQS,
864 This is further trained in write training. */
865 switch (s->selected_timings.mem_clk) {
866 default:
867 case MEM_CLOCK_800MHz:
868 count = 39;
869 break;
870 case MEM_CLOCK_1066MHz:
871 count = 32;
872 break;
873 case MEM_CLOCK_1333MHz:
874 count = 42;
875 break;
876 }
877
878 FOR_EACH_POPULATED_CHANNEL_AND_BYTELANE(s->dimms, ch, lane) {
879 dq_setting = s->dqs_settings[ch][lane];
880 for (i = 0; i < count; i++)
881 if (increment_dq_dqs(s, &dq_setting))
882 die("Can't further increase DQ past DQS delay");
883 dqset(ch, lane, &dq_setting);
884 }
885
886 printk(BIOS_DEBUG, "Done write levelling.\n");
887}