blob: 52166ed7e059a6e0ef087d2a4a603b95ca60d9ea [file] [log] [blame]
Patrick Georgiac959032020-05-05 22:49:26 +02001/* SPDX-License-Identifier: GPL-2.0-or-later */
Arthur Heymans95c48cb2017-11-04 08:07:06 +01002
Kyösti Mälkki13f66502019-03-03 08:01:05 +02003#include <device/mmio.h>
Arthur Heymans95c48cb2017-11-04 08:07:06 +01004#include <console/console.h>
Arthur Heymansb5170c32017-12-25 20:13:28 +01005#include <delay.h>
Arthur Heymans95c48cb2017-11-04 08:07:06 +01006#include <string.h>
7#include <types.h>
8#include "x4x.h"
9#include "iomap.h"
10
11static void print_dll_setting(const struct dll_setting *dll_setting,
12 u8 default_verbose)
13{
14 u8 debug_level = default_verbose ? BIOS_DEBUG : RAM_DEBUG;
15
16 printk(debug_level, "%d.%d.%d.%d:%d.%d\n", dll_setting->coarse,
17 dll_setting->clk_delay, dll_setting->tap,
18 dll_setting->pi, dll_setting->db_en,
19 dll_setting->db_sel);
20}
21
22struct db_limit {
23 u8 tap0;
24 u8 tap1;
25 u8 pi0;
26 u8 pi1;
27};
28
29static void set_db(const struct sysinfo *s, struct dll_setting *dq_dqs_setting)
30{
Elyes HAOUAS88607a42018-10-05 10:36:45 +020031 struct db_limit limit;
Arthur Heymans95c48cb2017-11-04 08:07:06 +010032
33 switch (s->selected_timings.mem_clk) {
34 default:
35 case MEM_CLOCK_800MHz:
36 limit.tap0 = 3;
37 limit.tap1 = 10;
38 limit.pi0 = 2;
39 limit.pi1 = 3;
40 break;
41 case MEM_CLOCK_1066MHz:
42 limit.tap0 = 2;
43 limit.tap1 = 8;
44 limit.pi0 = 6;
45 limit.pi1 = 7;
46 break;
47 case MEM_CLOCK_1333MHz:
48 limit.tap0 = 3;
49 limit.tap1 = 11;
50 /* TO CHECK: Might be reverse since this makes little sense */
51 limit.pi0 = 6;
52 limit.pi1 = 4;
53 break;
54 }
55
56 if (dq_dqs_setting->tap < limit.tap0) {
57 dq_dqs_setting->db_en = 1;
58 dq_dqs_setting->db_sel = 1;
59 } else if ((dq_dqs_setting->tap == limit.tap0)
60 && (dq_dqs_setting->pi < limit.pi0)) {
61 dq_dqs_setting->db_en = 1;
62 dq_dqs_setting->db_sel = 1;
63 } else if (dq_dqs_setting->tap < limit.tap1) {
64 dq_dqs_setting->db_en = 0;
65 dq_dqs_setting->db_sel = 0;
66 } else if ((dq_dqs_setting->tap == limit.tap1)
67 && (dq_dqs_setting->pi < limit.pi1)) {
68 dq_dqs_setting->db_en = 0;
69 dq_dqs_setting->db_sel = 0;
70 } else {
71 dq_dqs_setting->db_en = 1;
72 dq_dqs_setting->db_sel = 0;
73 }
74}
75
Elyes HAOUAS68ec3eb2019-06-22 09:21:18 +020076static const u8 max_tap[3] = {12, 10, 13};
Arthur Heymans95c48cb2017-11-04 08:07:06 +010077
78static int increment_dq_dqs(const struct sysinfo *s,
79 struct dll_setting *dq_dqs_setting)
80{
81 u8 max_tap_val = max_tap[s->selected_timings.mem_clk
82 - MEM_CLOCK_800MHz];
83
84 if (dq_dqs_setting->pi < 6) {
85 dq_dqs_setting->pi += 1;
86 } else if (dq_dqs_setting->tap < max_tap_val) {
87 dq_dqs_setting->pi = 0;
88 dq_dqs_setting->tap += 1;
89 } else if (dq_dqs_setting->clk_delay < 2) {
90 dq_dqs_setting->pi = 0;
91 dq_dqs_setting->tap = 0;
92 dq_dqs_setting->clk_delay += 1;
93 } else if (dq_dqs_setting->coarse < 1) {
94 dq_dqs_setting->pi = 0;
95 dq_dqs_setting->tap = 0;
96 dq_dqs_setting->clk_delay -= 1;
97 dq_dqs_setting->coarse += 1;
98 } else {
99 return CB_ERR;
100 }
101 set_db(s, dq_dqs_setting);
102 return CB_SUCCESS;
103}
104
Arthur Heymansb5170c32017-12-25 20:13:28 +0100105static int decrement_dq_dqs(const struct sysinfo *s,
106 struct dll_setting *dq_dqs_setting)
107{
108 u8 max_tap_val = max_tap[s->selected_timings.mem_clk
109 - MEM_CLOCK_800MHz];
110
111 if (dq_dqs_setting->pi > 0) {
112 dq_dqs_setting->pi -= 1;
113 } else if (dq_dqs_setting->tap > 0) {
114 dq_dqs_setting->pi = 6;
115 dq_dqs_setting->tap -= 1;
116 } else if (dq_dqs_setting->clk_delay > 0) {
117 dq_dqs_setting->pi = 6;
118 dq_dqs_setting->tap = max_tap_val;
119 dq_dqs_setting->clk_delay -= 1;
120 } else if (dq_dqs_setting->coarse > 0) {
121 dq_dqs_setting->pi = 6;
122 dq_dqs_setting->tap = max_tap_val;
123 dq_dqs_setting->clk_delay += 1;
124 dq_dqs_setting->coarse -= 1;
125 } else {
126 return CB_ERR;
127 }
128 set_db(s, dq_dqs_setting);
129 return CB_SUCCESS;
130}
131
132
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100133#define WT_PATTERN_SIZE 80
134
135static const u32 write_training_schedule[WT_PATTERN_SIZE] = {
136 0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
137 0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
138 0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
139 0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
140 0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
141 0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
142 0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
143 0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
144 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
145 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
146 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
147 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
148 0x03030303, 0x04040404, 0x09090909, 0x10101010,
149 0x21212121, 0x40404040, 0x81818181, 0x00000000,
150 0x03030303, 0x04040404, 0x09090909, 0x10101010,
151 0x21212121, 0x40404040, 0x81818181, 0x00000000,
152 0xfdfdfdfd, 0xfafafafa, 0xf7f7f7f7, 0xeeeeeeee,
153 0xdfdfdfdf, 0xbebebebe, 0x7f7f7f7f, 0xfefefefe,
154 0xfdfdfdfd, 0xfafafafa, 0xf7f7f7f7, 0xeeeeeeee,
155 0xdfdfdfdf, 0xbebebebe, 0x7f7f7f7f, 0xfefefefe,
156};
157
158enum training_modes {
159 SUCCEEDING = 0,
160 FAILING = 1
161};
162
163static u8 test_dq_aligned(const struct sysinfo *s,
164 const u8 channel)
165{
166 u32 address;
167 int rank, lane;
168 u8 count, count1;
169 u8 data[8];
170 u8 lane_error = 0;
171
172 FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, channel, rank) {
173 address = test_address(channel, rank);
174 for (count = 0; count < WT_PATTERN_SIZE; count++) {
175 for (count1 = 0; count1 < WT_PATTERN_SIZE; count1++) {
176 if ((count1 % 16) == 0)
177 MCHBAR32(0xf90) = 1;
178 const u32 pattern =
179 write_training_schedule[count1];
180 write32((u32 *)address + 8 * count1, pattern);
181 write32((u32 *)address + 8 * count1 + 4,
182 pattern);
183 }
184
185 const u32 good = write_training_schedule[count];
186 write32(&data[0], read32((u32 *)address + 8 * count));
187 write32(&data[4],
188 read32((u32 *)address + 8 * count + 4));
189 FOR_EACH_BYTELANE(lane) {
190 u8 expected = (good >> ((lane % 4) * 8)) & 0xff;
191 if (data[lane] != expected)
192 lane_error |= 1 << lane;
193 }
194 }
195 }
196 return lane_error;
197}
198
199#define CONSISTENCY 10
200
201/*
202 * This function finds either failing or succeeding writes by increasing DQ.
203 * When it has found a failing or succeeding setting it will increase DQ
204 * another 10 times to make sure the result is consistent.
205 * This is probably done because lanes cannot be trained independent from
206 * each other.
207 */
208static int find_dq_limit(const struct sysinfo *s, const u8 channel,
209 struct dll_setting dq_setting[TOTAL_BYTELANES],
210 u8 dq_lim[TOTAL_BYTELANES],
211 const enum training_modes expected_result)
212{
213 int status = CB_SUCCESS;
214 int lane;
215 u8 test_result;
216 u8 pass_count[TOTAL_BYTELANES];
217 u8 succes_mask = 0xff;
218
219 printk(RAM_DEBUG, "Looking for %s writes on channel %d\n",
220 expected_result == FAILING ? "failing" : "succeeding", channel);
221 memset(pass_count, 0, sizeof(pass_count));
222
Elyes HAOUAS6aa9d662020-08-04 13:20:13 +0200223 while (succes_mask) {
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100224 test_result = test_dq_aligned(s, channel);
225 FOR_EACH_BYTELANE(lane) {
226 if (((test_result >> lane) & 1) != expected_result) {
227 status = increment_dq_dqs(s, &dq_setting[lane]);
228 dqset(channel, lane, &dq_setting[lane]);
229 dq_lim[lane]++;
230 } else if (pass_count[lane] < CONSISTENCY) {
231 status = increment_dq_dqs(s, &dq_setting[lane]);
232 dqset(channel, lane, &dq_setting[lane]);
233 dq_lim[lane]++;
234 pass_count[lane]++;
235 } else if (pass_count[lane] == CONSISTENCY) {
236 succes_mask &= ~(1 << lane);
237 }
238 if (status == CB_ERR) {
239 printk(BIOS_CRIT, "Could not find a case of %s "
240 "writes on CH%d, lane %d\n",
241 expected_result == FAILING ? "failing"
242 : "succeeding", channel, lane);
243 return CB_ERR;
244 }
245 }
246 }
247 return CB_SUCCESS;
248}
249
250/*
251 * This attempts to find the ideal delay for DQ to account for the skew between
252 * the DQ and the DQS signal.
253 * The training works this way:
254 * - start from the DQS delay values (DQ is always later than DQS)
255 * - increment the DQ delay until a succeeding write is found on all bytelayes,
256 * on all ranks on a channel and save these values
257 * - again increment the DQ delay until write start to fail on all bytelanes and
258 * save that value
259 * - use the mean between the saved succeeding and failing value
260 * - note: bytelanes cannot be trained independently, so the delays need to be
261 * adjusted and tested for all of them at the same time
262 */
263int do_write_training(struct sysinfo *s)
264{
265 int i;
266 u8 channel, lane;
267 u8 dq_lower[TOTAL_BYTELANES];
268 u8 dq_upper[TOTAL_BYTELANES];
269 struct dll_setting dq_setting[TOTAL_BYTELANES];
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100270
271 printk(BIOS_DEBUG, "Starting DQ write training\n");
272
273 FOR_EACH_POPULATED_CHANNEL(s->dimms, channel) {
274 printk(BIOS_DEBUG, "Doing DQ write training on CH%d\n", channel);
275
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100276 /* Start all lanes at DQS values */
277 FOR_EACH_BYTELANE(lane) {
278 dqset(channel, lane, &s->dqs_settings[channel][lane]);
279 s->dq_settings[channel][lane] = s->dqs_settings[channel][lane];
280 }
281 memset(dq_lower, 0, sizeof(dq_lower));
282 /* Start from DQS settings */
283 memcpy(dq_setting, s->dqs_settings[channel], sizeof(dq_setting));
284
285 if (find_dq_limit(s, channel, dq_setting, dq_lower,
286 SUCCEEDING)) {
287 printk(BIOS_CRIT,
288 "Could not find working lower limit DQ setting\n");
289 return CB_ERR;
290 }
291
292 memcpy(dq_upper, dq_lower, sizeof(dq_lower));
293
294 if (find_dq_limit(s, channel, dq_setting, dq_upper,
295 FAILING)) {
296 printk(BIOS_WARNING,
297 "Could not find failing upper limit DQ setting\n");
298 return CB_ERR;
299 }
300
301 FOR_EACH_BYTELANE(lane) {
302 dq_lower[lane] -= CONSISTENCY - 1;
303 dq_upper[lane] -= CONSISTENCY - 1;
304 u8 dq_center = (dq_upper[lane] + dq_lower[lane]) / 2;
305
306 printk(RAM_DEBUG, "Centered value for DQ DLL:"
307 " ch%d, lane %d, #steps = %d\n",
308 channel, lane, dq_center);
309 for (i = 0; i < dq_center; i++) {
310 /* Should never happen */
311 if (increment_dq_dqs(s, &s->dq_settings[channel][lane])
312 == CB_ERR)
313 printk(BIOS_ERR,
314 "Huh? write training overflowed!!\n");
315 }
316 }
317
318 /* Reset DQ DLL settings and increment with centered value*/
319 printk(BIOS_DEBUG, "Final DQ timings on CH%d\n", channel);
320 FOR_EACH_BYTELANE(lane) {
321 printk(BIOS_DEBUG, "\tlane%d: ", lane);
322 print_dll_setting(&s->dq_settings[channel][lane], 1);
323 dqset(channel, lane, &s->dq_settings[channel][lane]);
324 }
325 }
326 printk(BIOS_DEBUG, "Done DQ write training\n");
327 return CB_SUCCESS;
328}
329
330#define RT_PATTERN_SIZE 40
331
332static const u32 read_training_schedule[RT_PATTERN_SIZE] = {
333 0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
334 0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
335 0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
336 0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
337 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
338 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
339 0x03030303, 0x04040404, 0x09090909, 0x10101010,
340 0x21212121, 0x40404040, 0x81818181, 0x00000000,
341 0xfdfdfdfd, 0xfafafafa, 0xf7f7f7f7, 0xeeeeeeee,
342 0xdfdfdfdf, 0xbebebebe, 0x7f7f7f7f, 0xfefefefe
343};
344
345static int rt_increment_dqs(struct rt_dqs_setting *setting)
346{
347 if (setting->pi < 7) {
348 setting->pi++;
349 } else if (setting->tap < 14) {
350 setting->pi = 0;
351 setting->tap++;
352 } else {
353 return CB_ERR;
354 }
355 return CB_SUCCESS;
356}
357
358static u8 test_dqs_aligned(const struct sysinfo *s, const u8 channel)
359{
360 int i, rank, lane;
361 volatile u8 data[8];
362 u32 address;
363 u8 bytelane_error = 0;
364
365 FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, channel, rank) {
366 address = test_address(channel, rank);
367 for (i = 0; i < RT_PATTERN_SIZE; i++) {
368 const u32 good = read_training_schedule[i];
369 write32(&data[0], read32((u32 *)address + i * 8));
370 write32(&data[4], read32((u32 *)address + i * 8 + 4));
371
372 FOR_EACH_BYTELANE(lane) {
373 if (data[lane] != (good & 0xff))
374 bytelane_error |= 1 << lane;
375 }
376 }
377 }
378 return bytelane_error;
379}
380
381static int rt_find_dqs_limit(struct sysinfo *s, u8 channel,
382 struct rt_dqs_setting dqs_setting[TOTAL_BYTELANES],
383 u8 dqs_lim[TOTAL_BYTELANES],
384 const enum training_modes expected_result)
385{
386 int lane;
387 u8 test_result;
388 int status = CB_SUCCESS;
389
390 FOR_EACH_BYTELANE(lane)
391 rt_set_dqs(channel, lane, 0, &dqs_setting[lane]);
392
Elyes HAOUAS6aa9d662020-08-04 13:20:13 +0200393 while (status == CB_SUCCESS) {
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100394 test_result = test_dqs_aligned(s, channel);
395 if (test_result == (expected_result == SUCCEEDING ? 0 : 0xff))
396 return CB_SUCCESS;
397 FOR_EACH_BYTELANE(lane) {
398 if (((test_result >> lane) & 1) != expected_result) {
399 status = rt_increment_dqs(&dqs_setting[lane]);
400 dqs_lim[lane]++;
401 rt_set_dqs(channel, lane, 0, &dqs_setting[lane]);
402 }
403 }
404 }
405
406 if (expected_result == SUCCEEDING) {
407 printk(BIOS_CRIT,
408 "Could not find RT DQS setting\n");
409 return CB_ERR;
410 } else {
411 printk(RAM_DEBUG,
412 "Read succeeded over all DQS"
413 " settings, continuing\n");
414 return CB_SUCCESS;
415 }
416}
417
418#define RT_LOOPS 3
419
420/*
421 * This attempts to find the ideal delay for DQS on reads (rx).
422 * The training works this way:
423 * - start from the lowest possible delay (0) on all bytelanes
424 * - increment the DQS rx delays until a succeeding write is found on all
425 * bytelayes, on all ranks on a channel and save these values
426 * - again increment the DQS rx delay until write start to fail on all bytelanes
427 * and save that value
428 * - use the mean between the saved succeeding and failing value
429 * - note0: bytelanes cannot be trained independently, so the delays need to be
430 * adjusted and tested for all of them at the same time
Arthur Heymans8ddd7d12018-09-11 22:26:13 +0200431 * - note1: At this stage all ranks effectively use the rank0's rt_dqs settings,
432 * but later on their respective settings are used (TODO where is the
433 * 'switch' register??). So programming the results for all ranks at the end
434 * of the training. Programming on all ranks instead of all populated ranks,
435 * seems to be required, most likely because the signals can't really be generated
436 * separately.
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100437 */
438int do_read_training(struct sysinfo *s)
439{
440 int loop, channel, i, lane, rank;
441 u32 address, content;
442 u8 dqs_lower[TOTAL_BYTELANES];
443 u8 dqs_upper[TOTAL_BYTELANES];
444 struct rt_dqs_setting dqs_setting[TOTAL_BYTELANES];
445 u16 saved_dqs_center[TOTAL_CHANNELS][TOTAL_BYTELANES];
446
Elyes HAOUASa342f392018-10-17 10:56:26 +0200447 memset(saved_dqs_center, 0, sizeof(saved_dqs_center));
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100448
449 printk(BIOS_DEBUG, "Starting DQS read training\n");
450
451 for (loop = 0; loop < RT_LOOPS; loop++) {
452 FOR_EACH_POPULATED_CHANNEL(s->dimms, channel) {
453 printk(RAM_DEBUG, "Doing DQS read training on CH%d\n",
454 channel);
455
456 /* Write pattern to strobe address */
457 FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, channel, rank) {
458 address = test_address(channel, rank);
459 for (i = 0; i < RT_PATTERN_SIZE; i++) {
460 content = read_training_schedule[i];
461 write32((u32 *)address + 8 * i, content);
462 write32((u32 *)address + 8 * i + 4, content);
463 }
464 }
465
466 memset(dqs_lower, 0, sizeof(dqs_lower));
467 memset(&dqs_setting, 0, sizeof(dqs_setting));
468 if (rt_find_dqs_limit(s, channel, dqs_setting, dqs_lower,
469 SUCCEEDING)) {
470 printk(BIOS_CRIT,
471 "Could not find working lower limit DQS setting\n");
472 return CB_ERR;
473 }
474
475 FOR_EACH_BYTELANE(lane)
476 dqs_upper[lane] = dqs_lower[lane];
477
478 if (rt_find_dqs_limit(s, channel, dqs_setting, dqs_upper,
479 FAILING)) {
480 printk(BIOS_CRIT,
481 "Could not find failing upper limit DQ setting\n");
482 return CB_ERR;
483 }
484
485 printk(RAM_DEBUG, "Centered values, loop %d:\n", loop);
486 FOR_EACH_BYTELANE(lane) {
487 u8 center = (dqs_lower[lane] + dqs_upper[lane]) / 2;
488 printk(RAM_DEBUG, "\t lane%d: #%d\n", lane, center);
489 saved_dqs_center[channel][lane] += center;
490 }
491 } /* END FOR_EACH_POPULATED_CHANNEL */
492 } /* end RT_LOOPS */
493
494 memset(s->rt_dqs, 0, sizeof(s->rt_dqs));
495
496 FOR_EACH_POPULATED_CHANNEL(s->dimms, channel) {
497 printk(BIOS_DEBUG, "Final timings on CH%d:\n", channel);
498 FOR_EACH_BYTELANE(lane) {
499 saved_dqs_center[channel][lane] /= RT_LOOPS;
500 while (saved_dqs_center[channel][lane]--) {
501 if(rt_increment_dqs(&s->rt_dqs[channel][lane])
502 == CB_ERR)
503 /* Should never happen */
504 printk(BIOS_ERR,
505 "Huh? read training overflowed!!\n");
506 }
Arthur Heymans8ddd7d12018-09-11 22:26:13 +0200507 /* Later on separate settings for each rank are used so program
508 all of them */
509 FOR_EACH_RANK_IN_CHANNEL(rank)
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100510 rt_set_dqs(channel, lane, rank,
511 &s->rt_dqs[channel][lane]);
512 printk(BIOS_DEBUG, "\tlane%d: %d.%d\n",
513 lane, s->rt_dqs[channel][lane].tap,
514 s->rt_dqs[channel][lane].pi);
515 }
516 }
517 printk(BIOS_DEBUG, "Done DQS read training\n");
518 return CB_SUCCESS;
519}
Arthur Heymansb5170c32017-12-25 20:13:28 +0100520
521/* Enable write leveling on selected rank and disable output on other ranks */
522static void set_rank_write_level(struct sysinfo *s, u8 channel, u8 config,
523 u8 config_rank, u8 target_rank, int wl_enable)
524{
525 u32 emrs1;
526
527 /* Is shifted by bits 2 later so u8 can be used to reduce size */
Elyes HAOUAS68ec3eb2019-06-22 09:21:18 +0200528 static const u8 emrs1_lut[8][4][4] = { /* [Config][Leveling Rank][Rank] */
Arthur Heymansb5170c32017-12-25 20:13:28 +0100529 { /* Config 0: 2R2R */
530 {0x11, 0x00, 0x91, 0x00},
531 {0x00, 0x11, 0x91, 0x00},
532 {0x91, 0x00, 0x11, 0x00},
533 {0x91, 0x00, 0x00, 0x11}
534 },
535 { // Config 1: 2R1R
536 {0x11, 0x00, 0x91, 0x00},
537 {0x00, 0x11, 0x91, 0x00},
538 {0x91, 0x00, 0x11, 0x00},
539 {0x00, 0x00, 0x00, 0x00}
540 },
541 { // Config 2: 1R2R
542 {0x11, 0x00, 0x91, 0x00},
543 {0x00, 0x00, 0x00, 0x00},
544 {0x91, 0x00, 0x11, 0x00},
545 {0x91, 0x00, 0x00, 0x11}
546 },
547 { // Config 3: 1R1R
548 {0x11, 0x00, 0x91, 0x00},
549 {0x00, 0x00, 0x00, 0x00},
550 {0x91, 0x00, 0x11, 0x00},
551 {0x00, 0x00, 0x00, 0x00}
552 },
553 { // Config 4: 2R0R
554 {0x11, 0x00, 0x00, 0x00},
555 {0x00, 0x11, 0x00, 0x00},
556 {0x00, 0x00, 0x00, 0x00},
557 {0x00, 0x00, 0x00, 0x00}
558 },
559 { // Config 5: 0R2R
560 {0x00, 0x00, 0x00, 0x00},
561 {0x00, 0x00, 0x00, 0x00},
562 {0x00, 0x00, 0x11, 0x00},
563 {0x00, 0x00, 0x00, 0x11}
564 },
565 { // Config 6: 1R0R
566 {0x11, 0x00, 0x00, 0x00},
567 {0x00, 0x00, 0x00, 0x00},
568 {0x00, 0x00, 0x00, 0x00},
569 {0x00, 0x00, 0x00, 0x00}
570 },
571 { // Config 7: 0R1R
572 {0x00, 0x00, 0x00, 0x00},
573 {0x00, 0x00, 0x00, 0x00},
574 {0x00, 0x00, 0x11, 0x00},
575 {0x00, 0x00, 0x00, 0x00}
576 }
577 };
578
579 if (wl_enable) {
580 printk(RAM_DEBUG, "Entering WL mode\n");
581 printk(RAM_DEBUG, "Using WL ODT values\n");
582 emrs1 = emrs1_lut[config][target_rank][config_rank];
583 } else {
584 printk(RAM_DEBUG, "Exiting WL mode\n");
585 emrs1 = ddr3_emrs1_rtt_nom_config[s->dimm_config[channel]][config_rank];
586 }
587 printk(RAM_DEBUG, "Setting ODT for rank%d to ", config_rank);
588 switch (emrs1) {
589 case 0:
590 printk(RAM_DEBUG, "High-Z\n");
591 break;
592 case 0x11:
593 printk(RAM_DEBUG, "40 Ohm\n");
594 break;
595 case 0x81:
596 printk(RAM_DEBUG, "30 Ohm\n");
597 break;
598 case 0x80:
599 printk(RAM_DEBUG, "20 Ohm\n");
600 break;
601 case 0x10:
602 printk(RAM_DEBUG, "120 Ohm\n");
603 break;
604 case 0x01:
605 printk(RAM_DEBUG, "60 Ohm\n");
606 break;
607 default:
608 printk(BIOS_WARNING, "ODT value Undefined!\n");
609 break;
610 }
611
612 emrs1 <<= 2;
613 /* Set output drive strength to 34 Ohm during write levelling */
614 emrs1 |= (1 << 1);
615
616 if (wl_enable && (target_rank != config_rank)) {
617 printk(RAM_DEBUG, "Disabling output for rank%d\n", config_rank);
618 emrs1 |= (1 << 12);
619 }
620 if (wl_enable && (target_rank == config_rank)) {
621 printk(RAM_DEBUG, "Enabling WL for rank%d\n", config_rank);
622 emrs1 |= (1 << 7);
623 }
624 send_jedec_cmd(s, config_rank, channel, EMRS1_CMD, emrs1);
625}
626
627#define N_SAMPLES 5
628
629static void sample_dq(const struct sysinfo *s, u8 channel, u8 rank,
630 u8 high_found[8]) {
631 u32 address = test_address(channel, rank);
632 int samples, lane;
633
634 memset(high_found, 0, TOTAL_BYTELANES * sizeof(high_found[0]));
635 for (samples = 0; samples < N_SAMPLES; samples++) {
636 write32((u32 *)address, 0x12341234);
637 write32((u32 *)address + 4, 0x12341234);
638 udelay(5);
639 FOR_EACH_BYTELANE(lane) {
640 u8 dq_high = (MCHBAR8(0x561 + 0x400 * channel
641 + (lane * 4)) >> 7) & 1;
642 high_found[lane] += dq_high;
643 }
644 }
645}
646
647static enum cb_err increment_to_dqs_edge(struct sysinfo *s, u8 channel, u8 rank)
648{
649 int lane;
650 u8 saved_24d;
651 struct dll_setting dqs_setting[TOTAL_BYTELANES];
652 u8 bytelane_ok = 0;
653 u8 dq_sample[TOTAL_BYTELANES];
654
655 memcpy(dqs_setting, s->dqs_settings[channel], sizeof(dqs_setting));
656 FOR_EACH_BYTELANE(lane)
657 dqsset(channel, lane, &dqs_setting[lane]);
658
659 saved_24d = MCHBAR8(0x24d + 0x400 * channel);
660
661 /* Loop 0: Find DQ sample low, by decreasing */
662 while (bytelane_ok != 0xff) {
663 sample_dq(s, channel, rank, dq_sample);
664 FOR_EACH_BYTELANE(lane) {
665 if (bytelane_ok & (1 << lane))
666 continue;
667
668 printk(RAM_SPEW, "%d, %d, %02d, %d,"
669 " lane%d sample: %d\n",
670 dqs_setting[lane].coarse,
671 dqs_setting[lane].clk_delay,
672 dqs_setting[lane].tap,
673 dqs_setting[lane].pi,
674 lane,
675 dq_sample[lane]);
676
677 if (dq_sample[lane] > 0) {
678 if (decrement_dq_dqs(s, &dqs_setting[lane])) {
679 printk(BIOS_EMERG,
680 "DQS setting channel%d, "
681 "lane %d reached a minimum!\n",
682 channel, lane);
683 return CB_ERR;
684 }
685 } else {
686 bytelane_ok |= (1 << lane);
687 }
688 dqsset(channel, lane, &dqs_setting[lane]);
689 }
690 }
691
692 printk(RAM_DEBUG, "DQS settings on PASS #0:\n");
693 FOR_EACH_BYTELANE(lane) {
694 printk(RAM_DEBUG, "lane %d: ", lane);
695 print_dll_setting(&dqs_setting[lane], 0);
696 }
697
698 /* Loop 1: Find DQ sample high, by increasing */
699 bytelane_ok = 0;
700 while (bytelane_ok != 0xff) {
701 sample_dq(s, channel, rank, dq_sample);
702 FOR_EACH_BYTELANE(lane) {
703 if (bytelane_ok & (1 << lane))
704 continue;
705
706 printk(RAM_SPEW, "%d, %d, %02d, %d, lane%d sample: %d\n",
707 dqs_setting[lane].coarse,
708 dqs_setting[lane].clk_delay,
709 dqs_setting[lane].tap,
710 dqs_setting[lane].pi,
711 lane,
712 dq_sample[lane]);
713
714 if (dq_sample[lane] == N_SAMPLES) {
715 bytelane_ok |= (1 << lane);
716 } else {
717 if (increment_dq_dqs(s, &dqs_setting[lane])) {
718 printk(BIOS_EMERG,
719 "DQS setting channel%d, "
720 "lane %d reached a maximum!\n",
721 channel, lane);
722 return CB_ERR;
723 }
724 }
725 dqsset(channel, lane, &dqs_setting[lane]);
726 }
727 }
728
729 printk(RAM_DEBUG, "DQS settings on PASS #1:\n");
730 FOR_EACH_BYTELANE(lane) {
731 printk(RAM_DEBUG, "lane %d: ", lane);
732 print_dll_setting(&dqs_setting[lane], 0);
733 }
734
735 printk(BIOS_DEBUG, "final WL DQS settings on CH%d\n", channel);
736 FOR_EACH_BYTELANE(lane) {
737 printk(BIOS_DEBUG, "\tlane%d: ", lane);
738 print_dll_setting(&dqs_setting[lane], 1);
739 s->dqs_settings[channel][lane] = dqs_setting[lane];
740 }
741
742 MCHBAR8(0x24d + 0x400 * channel) = saved_24d;
743 return CB_SUCCESS;
744}
745
746/*
747 * DDR3 uses flyby topology where the clock signal takes a different path
748 * than the data signal, to allow for better signal intergrity.
749 * Therefore the delay on the data signals needs to account for this.
750 * This is done by by sampleling the the DQS write (tx) signal back over
751 * the DQ signal and looking for delay values where the sample transitions
752 * from high to low.
753 * Here the following is done:
754 * - enable write levelling on the first populated rank
755 * - disable output on other populated ranks
756 * - start from safe DQS (tx) delays (other transitions can be
757 * found at different starting values but are generally bad)
758 * - loop0: decrease DQS (tx) delays until low is sampled,
759 * loop1: increase DQS (tx) delays until high is sampled,
760 * That way we are sure to hit a low-high transition
761 * - put all ranks in normal mode of operation again
762 * - note: All ranks need to be leveled together
763 */
764void search_write_leveling(struct sysinfo *s)
765{
766 int i, ch, count;
767 u8 config, rank0, rank1, lane;
768 struct dll_setting dq_setting;
769
770 u8 chanconfig_lut[16]={0, 6, 4, 6, 7, 3, 1, 3, 5, 2, 0, 2, 7, 3, 1, 3};
771
772 u8 odt_force[8][4] = { /* [Config][leveling rank] */
773 {0x5, 0x6, 0x5, 0x9},
774 {0x5, 0x6, 0x5, 0x0},
775 {0x5, 0x0, 0x5, 0x9},
776 {0x5, 0x0, 0x5, 0x0},
777 {0x1, 0x2, 0x0, 0x0},
778 {0x0, 0x0, 0x4, 0x8},
779 {0x1, 0x0, 0x0, 0x0},
780 {0x0, 0x0, 0x4, 0x0}
781 };
782
783 printk(BIOS_DEBUG, "Starting write levelling.\n");
784
785 FOR_EACH_POPULATED_CHANNEL(s->dimms, ch) {
786 printk(BIOS_DEBUG, "\tCH%d\n", ch);
787 config = chanconfig_lut[s->dimm_config[ch]];
788
789 MCHBAR8(0x5d8 + 0x400 * ch) =
790 MCHBAR8(0x5d8 + 0x400 * ch) & ~0x0e;
791 MCHBAR16(0x5c4 + 0x400 * ch) = (MCHBAR16(0x5c4 + 0x400 * ch) &
792 ~0x3fff) | 0x3fff;
793 MCHBAR8(0x265 + 0x400 * ch) =
794 MCHBAR8(0x265 + 0x400 * ch) & ~0x1f;
795 /* find the first populated rank */
796 FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, ch, rank0)
797 break;
798
799 /* Enable WL for the first populated rank and disable output
800 for others */
801 FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, ch, rank1)
802 set_rank_write_level(s, ch, config, rank1, rank0, 1);
803
804 MCHBAR8(0x298 + 2 + 0x400 * ch) =
805 (MCHBAR8(0x298 + 2 + 0x400 * ch) & ~0x0f)
806 | odt_force[config][rank0];
807 MCHBAR8(0x271 + 0x400 * ch) = (MCHBAR8(0x271 + 0x400 * ch)
808 & ~0x7e) | 0x4e;
809 MCHBAR8(0x5d9 + 0x400 * ch) =
810 (MCHBAR8(0x5d9 + 0x400 * ch) & ~0x04) | 0x04;
811 MCHBAR32(0x1a0) = (MCHBAR32(0x1a0) & ~0x07ffffff)
812 | 0x00014000;
813
814 if (increment_to_dqs_edge(s, ch, rank0))
815 die("Write Leveling failed!");
816
817 MCHBAR8(0x298 + 2 + 0x400 * ch) =
818 MCHBAR8(0x298 + 2 + 0x400 * ch) & ~0x0f;
819 MCHBAR8(0x271 + 0x400 * ch) =
820 (MCHBAR8(0x271 + 0x400 * ch) & ~0x7e)
821 | 0x0e;
822 MCHBAR8(0x5d9 + 0x400 * ch) =
823 (MCHBAR8(0x5d9 + 0x400 * ch) & ~0x04);
824 MCHBAR32(0x1a0) = (MCHBAR32(0x1a0)
825 & ~0x07ffffff) | 0x00555801;
826
827 /* Disable WL on the trained rank */
828 set_rank_write_level(s, ch, config, rank0, rank0, 0);
829 send_jedec_cmd(s, rank0, ch, NORMALOP_CMD, 1 << 12);
830
831 MCHBAR8(0x5d8 + 0x400 * ch) = (MCHBAR8(0x5d8 + 0x400 * ch)
832 & ~0x0e) | 0x0e;
833 MCHBAR16(0x5c4 + 0x400 * ch) = (MCHBAR16(0x5c4 + 0x400 * ch)
834 & ~0x3fff) | 0x1807;
835 MCHBAR8(0x265 + 0x400 * ch) = MCHBAR8(0x265 + 0x400 * ch) & ~0x1f;
836
837 /* Disable write level mode for all ranks */
838 FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, ch, rank0)
839 set_rank_write_level(s, ch, config, rank0, rank0, 0);
840 }
841
842 MCHBAR8(0x5dc) = (MCHBAR8(0x5dc) & ~0x80) | 0x80;
843
844 /* Increment DQ (rx) dll setting by a standard amount past DQS,
845 This is further trained in write training. */
846 switch (s->selected_timings.mem_clk) {
847 default:
848 case MEM_CLOCK_800MHz:
849 count = 39;
850 break;
851 case MEM_CLOCK_1066MHz:
852 count = 32;
853 break;
854 case MEM_CLOCK_1333MHz:
855 count = 42;
856 break;
857 }
858
859 FOR_EACH_POPULATED_CHANNEL_AND_BYTELANE(s->dimms, ch, lane) {
860 dq_setting = s->dqs_settings[ch][lane];
861 for (i = 0; i < count; i++)
862 if (increment_dq_dqs(s, &dq_setting))
863 die("Can't further increase DQ past DQS delay");
864 dqset(ch, lane, &dq_setting);
865 }
866
867 printk(BIOS_DEBUG, "Done write levelling.\n");
868}