blob: eab5365341924e6b25f86ef5faf2985a6c9df717 [file] [log] [blame]
Patrick Georgiac959032020-05-05 22:49:26 +02001/* SPDX-License-Identifier: GPL-2.0-or-later */
Arthur Heymans95c48cb2017-11-04 08:07:06 +01002
Kyösti Mälkki13f66502019-03-03 08:01:05 +02003#include <device/mmio.h>
Arthur Heymans95c48cb2017-11-04 08:07:06 +01004#include <console/console.h>
Arthur Heymansb5170c32017-12-25 20:13:28 +01005#include <delay.h>
Arthur Heymans95c48cb2017-11-04 08:07:06 +01006#include <string.h>
7#include <types.h>
Angel Pons41e66ac2020-09-15 13:17:23 +02008#include "raminit.h"
Arthur Heymans95c48cb2017-11-04 08:07:06 +01009#include "x4x.h"
Arthur Heymans95c48cb2017-11-04 08:07:06 +010010
11static void print_dll_setting(const struct dll_setting *dll_setting,
12 u8 default_verbose)
13{
14 u8 debug_level = default_verbose ? BIOS_DEBUG : RAM_DEBUG;
15
16 printk(debug_level, "%d.%d.%d.%d:%d.%d\n", dll_setting->coarse,
17 dll_setting->clk_delay, dll_setting->tap,
18 dll_setting->pi, dll_setting->db_en,
19 dll_setting->db_sel);
20}
21
22struct db_limit {
23 u8 tap0;
24 u8 tap1;
25 u8 pi0;
26 u8 pi1;
27};
28
29static void set_db(const struct sysinfo *s, struct dll_setting *dq_dqs_setting)
30{
Elyes HAOUAS88607a42018-10-05 10:36:45 +020031 struct db_limit limit;
Arthur Heymans95c48cb2017-11-04 08:07:06 +010032
33 switch (s->selected_timings.mem_clk) {
34 default:
35 case MEM_CLOCK_800MHz:
36 limit.tap0 = 3;
37 limit.tap1 = 10;
38 limit.pi0 = 2;
39 limit.pi1 = 3;
40 break;
41 case MEM_CLOCK_1066MHz:
42 limit.tap0 = 2;
43 limit.tap1 = 8;
44 limit.pi0 = 6;
45 limit.pi1 = 7;
46 break;
47 case MEM_CLOCK_1333MHz:
48 limit.tap0 = 3;
49 limit.tap1 = 11;
50 /* TO CHECK: Might be reverse since this makes little sense */
51 limit.pi0 = 6;
52 limit.pi1 = 4;
53 break;
54 }
55
56 if (dq_dqs_setting->tap < limit.tap0) {
57 dq_dqs_setting->db_en = 1;
58 dq_dqs_setting->db_sel = 1;
59 } else if ((dq_dqs_setting->tap == limit.tap0)
60 && (dq_dqs_setting->pi < limit.pi0)) {
61 dq_dqs_setting->db_en = 1;
62 dq_dqs_setting->db_sel = 1;
63 } else if (dq_dqs_setting->tap < limit.tap1) {
64 dq_dqs_setting->db_en = 0;
65 dq_dqs_setting->db_sel = 0;
66 } else if ((dq_dqs_setting->tap == limit.tap1)
67 && (dq_dqs_setting->pi < limit.pi1)) {
68 dq_dqs_setting->db_en = 0;
69 dq_dqs_setting->db_sel = 0;
70 } else {
71 dq_dqs_setting->db_en = 1;
72 dq_dqs_setting->db_sel = 0;
73 }
74}
75
Elyes HAOUAS68ec3eb2019-06-22 09:21:18 +020076static const u8 max_tap[3] = {12, 10, 13};
Arthur Heymans95c48cb2017-11-04 08:07:06 +010077
78static int increment_dq_dqs(const struct sysinfo *s,
79 struct dll_setting *dq_dqs_setting)
80{
81 u8 max_tap_val = max_tap[s->selected_timings.mem_clk
82 - MEM_CLOCK_800MHz];
83
84 if (dq_dqs_setting->pi < 6) {
85 dq_dqs_setting->pi += 1;
86 } else if (dq_dqs_setting->tap < max_tap_val) {
87 dq_dqs_setting->pi = 0;
88 dq_dqs_setting->tap += 1;
89 } else if (dq_dqs_setting->clk_delay < 2) {
90 dq_dqs_setting->pi = 0;
91 dq_dqs_setting->tap = 0;
92 dq_dqs_setting->clk_delay += 1;
93 } else if (dq_dqs_setting->coarse < 1) {
94 dq_dqs_setting->pi = 0;
95 dq_dqs_setting->tap = 0;
96 dq_dqs_setting->clk_delay -= 1;
97 dq_dqs_setting->coarse += 1;
98 } else {
99 return CB_ERR;
100 }
101 set_db(s, dq_dqs_setting);
102 return CB_SUCCESS;
103}
104
Arthur Heymansb5170c32017-12-25 20:13:28 +0100105static int decrement_dq_dqs(const struct sysinfo *s,
106 struct dll_setting *dq_dqs_setting)
107{
108 u8 max_tap_val = max_tap[s->selected_timings.mem_clk
109 - MEM_CLOCK_800MHz];
110
111 if (dq_dqs_setting->pi > 0) {
112 dq_dqs_setting->pi -= 1;
113 } else if (dq_dqs_setting->tap > 0) {
114 dq_dqs_setting->pi = 6;
115 dq_dqs_setting->tap -= 1;
116 } else if (dq_dqs_setting->clk_delay > 0) {
117 dq_dqs_setting->pi = 6;
118 dq_dqs_setting->tap = max_tap_val;
119 dq_dqs_setting->clk_delay -= 1;
120 } else if (dq_dqs_setting->coarse > 0) {
121 dq_dqs_setting->pi = 6;
122 dq_dqs_setting->tap = max_tap_val;
123 dq_dqs_setting->clk_delay += 1;
124 dq_dqs_setting->coarse -= 1;
125 } else {
126 return CB_ERR;
127 }
128 set_db(s, dq_dqs_setting);
129 return CB_SUCCESS;
130}
131
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100132#define WT_PATTERN_SIZE 80
133
134static const u32 write_training_schedule[WT_PATTERN_SIZE] = {
135 0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
136 0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
137 0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
138 0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
139 0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
140 0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
141 0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
142 0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
143 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
144 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
145 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
146 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
147 0x03030303, 0x04040404, 0x09090909, 0x10101010,
148 0x21212121, 0x40404040, 0x81818181, 0x00000000,
149 0x03030303, 0x04040404, 0x09090909, 0x10101010,
150 0x21212121, 0x40404040, 0x81818181, 0x00000000,
151 0xfdfdfdfd, 0xfafafafa, 0xf7f7f7f7, 0xeeeeeeee,
152 0xdfdfdfdf, 0xbebebebe, 0x7f7f7f7f, 0xfefefefe,
153 0xfdfdfdfd, 0xfafafafa, 0xf7f7f7f7, 0xeeeeeeee,
154 0xdfdfdfdf, 0xbebebebe, 0x7f7f7f7f, 0xfefefefe,
155};
156
157enum training_modes {
158 SUCCEEDING = 0,
159 FAILING = 1
160};
161
162static u8 test_dq_aligned(const struct sysinfo *s,
163 const u8 channel)
164{
165 u32 address;
166 int rank, lane;
167 u8 count, count1;
168 u8 data[8];
169 u8 lane_error = 0;
170
171 FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, channel, rank) {
172 address = test_address(channel, rank);
173 for (count = 0; count < WT_PATTERN_SIZE; count++) {
174 for (count1 = 0; count1 < WT_PATTERN_SIZE; count1++) {
175 if ((count1 % 16) == 0)
176 MCHBAR32(0xf90) = 1;
177 const u32 pattern =
178 write_training_schedule[count1];
179 write32((u32 *)address + 8 * count1, pattern);
180 write32((u32 *)address + 8 * count1 + 4,
181 pattern);
182 }
183
184 const u32 good = write_training_schedule[count];
185 write32(&data[0], read32((u32 *)address + 8 * count));
186 write32(&data[4],
187 read32((u32 *)address + 8 * count + 4));
188 FOR_EACH_BYTELANE(lane) {
189 u8 expected = (good >> ((lane % 4) * 8)) & 0xff;
190 if (data[lane] != expected)
191 lane_error |= 1 << lane;
192 }
193 }
194 }
195 return lane_error;
196}
197
198#define CONSISTENCY 10
199
200/*
201 * This function finds either failing or succeeding writes by increasing DQ.
202 * When it has found a failing or succeeding setting it will increase DQ
203 * another 10 times to make sure the result is consistent.
204 * This is probably done because lanes cannot be trained independent from
205 * each other.
206 */
207static int find_dq_limit(const struct sysinfo *s, const u8 channel,
208 struct dll_setting dq_setting[TOTAL_BYTELANES],
209 u8 dq_lim[TOTAL_BYTELANES],
210 const enum training_modes expected_result)
211{
212 int status = CB_SUCCESS;
213 int lane;
214 u8 test_result;
215 u8 pass_count[TOTAL_BYTELANES];
Angel Pons5c3160e2021-03-26 22:48:42 +0100216 u8 success_mask = 0xff;
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100217
218 printk(RAM_DEBUG, "Looking for %s writes on channel %d\n",
219 expected_result == FAILING ? "failing" : "succeeding", channel);
220 memset(pass_count, 0, sizeof(pass_count));
221
Angel Pons5c3160e2021-03-26 22:48:42 +0100222 while (success_mask) {
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100223 test_result = test_dq_aligned(s, channel);
224 FOR_EACH_BYTELANE(lane) {
225 if (((test_result >> lane) & 1) != expected_result) {
226 status = increment_dq_dqs(s, &dq_setting[lane]);
227 dqset(channel, lane, &dq_setting[lane]);
228 dq_lim[lane]++;
229 } else if (pass_count[lane] < CONSISTENCY) {
230 status = increment_dq_dqs(s, &dq_setting[lane]);
231 dqset(channel, lane, &dq_setting[lane]);
232 dq_lim[lane]++;
233 pass_count[lane]++;
234 } else if (pass_count[lane] == CONSISTENCY) {
Angel Pons5c3160e2021-03-26 22:48:42 +0100235 success_mask &= ~(1 << lane);
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100236 }
237 if (status == CB_ERR) {
238 printk(BIOS_CRIT, "Could not find a case of %s "
239 "writes on CH%d, lane %d\n",
240 expected_result == FAILING ? "failing"
241 : "succeeding", channel, lane);
242 return CB_ERR;
243 }
244 }
245 }
246 return CB_SUCCESS;
247}
248
249/*
250 * This attempts to find the ideal delay for DQ to account for the skew between
251 * the DQ and the DQS signal.
252 * The training works this way:
253 * - start from the DQS delay values (DQ is always later than DQS)
254 * - increment the DQ delay until a succeeding write is found on all bytelayes,
255 * on all ranks on a channel and save these values
256 * - again increment the DQ delay until write start to fail on all bytelanes and
257 * save that value
258 * - use the mean between the saved succeeding and failing value
259 * - note: bytelanes cannot be trained independently, so the delays need to be
260 * adjusted and tested for all of them at the same time
261 */
262int do_write_training(struct sysinfo *s)
263{
264 int i;
265 u8 channel, lane;
266 u8 dq_lower[TOTAL_BYTELANES];
267 u8 dq_upper[TOTAL_BYTELANES];
268 struct dll_setting dq_setting[TOTAL_BYTELANES];
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100269
270 printk(BIOS_DEBUG, "Starting DQ write training\n");
271
272 FOR_EACH_POPULATED_CHANNEL(s->dimms, channel) {
273 printk(BIOS_DEBUG, "Doing DQ write training on CH%d\n", channel);
274
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100275 /* Start all lanes at DQS values */
276 FOR_EACH_BYTELANE(lane) {
277 dqset(channel, lane, &s->dqs_settings[channel][lane]);
278 s->dq_settings[channel][lane] = s->dqs_settings[channel][lane];
279 }
280 memset(dq_lower, 0, sizeof(dq_lower));
Angel Pons9d20c842021-01-13 12:39:37 +0100281 /* Start from DQS settings */
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100282 memcpy(dq_setting, s->dqs_settings[channel], sizeof(dq_setting));
283
284 if (find_dq_limit(s, channel, dq_setting, dq_lower,
285 SUCCEEDING)) {
286 printk(BIOS_CRIT,
287 "Could not find working lower limit DQ setting\n");
288 return CB_ERR;
289 }
290
291 memcpy(dq_upper, dq_lower, sizeof(dq_lower));
292
293 if (find_dq_limit(s, channel, dq_setting, dq_upper,
294 FAILING)) {
295 printk(BIOS_WARNING,
296 "Could not find failing upper limit DQ setting\n");
297 return CB_ERR;
298 }
299
300 FOR_EACH_BYTELANE(lane) {
301 dq_lower[lane] -= CONSISTENCY - 1;
302 dq_upper[lane] -= CONSISTENCY - 1;
303 u8 dq_center = (dq_upper[lane] + dq_lower[lane]) / 2;
304
305 printk(RAM_DEBUG, "Centered value for DQ DLL:"
306 " ch%d, lane %d, #steps = %d\n",
307 channel, lane, dq_center);
308 for (i = 0; i < dq_center; i++) {
309 /* Should never happen */
310 if (increment_dq_dqs(s, &s->dq_settings[channel][lane])
311 == CB_ERR)
312 printk(BIOS_ERR,
313 "Huh? write training overflowed!!\n");
314 }
315 }
316
317 /* Reset DQ DLL settings and increment with centered value*/
318 printk(BIOS_DEBUG, "Final DQ timings on CH%d\n", channel);
319 FOR_EACH_BYTELANE(lane) {
320 printk(BIOS_DEBUG, "\tlane%d: ", lane);
321 print_dll_setting(&s->dq_settings[channel][lane], 1);
322 dqset(channel, lane, &s->dq_settings[channel][lane]);
323 }
324 }
325 printk(BIOS_DEBUG, "Done DQ write training\n");
326 return CB_SUCCESS;
327}
328
329#define RT_PATTERN_SIZE 40
330
331static const u32 read_training_schedule[RT_PATTERN_SIZE] = {
332 0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
333 0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
334 0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
335 0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
336 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
337 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
338 0x03030303, 0x04040404, 0x09090909, 0x10101010,
339 0x21212121, 0x40404040, 0x81818181, 0x00000000,
340 0xfdfdfdfd, 0xfafafafa, 0xf7f7f7f7, 0xeeeeeeee,
341 0xdfdfdfdf, 0xbebebebe, 0x7f7f7f7f, 0xfefefefe
342};
343
344static int rt_increment_dqs(struct rt_dqs_setting *setting)
345{
346 if (setting->pi < 7) {
347 setting->pi++;
348 } else if (setting->tap < 14) {
349 setting->pi = 0;
350 setting->tap++;
351 } else {
352 return CB_ERR;
353 }
354 return CB_SUCCESS;
355}
356
357static u8 test_dqs_aligned(const struct sysinfo *s, const u8 channel)
358{
359 int i, rank, lane;
360 volatile u8 data[8];
361 u32 address;
362 u8 bytelane_error = 0;
363
364 FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, channel, rank) {
365 address = test_address(channel, rank);
366 for (i = 0; i < RT_PATTERN_SIZE; i++) {
367 const u32 good = read_training_schedule[i];
368 write32(&data[0], read32((u32 *)address + i * 8));
369 write32(&data[4], read32((u32 *)address + i * 8 + 4));
370
371 FOR_EACH_BYTELANE(lane) {
372 if (data[lane] != (good & 0xff))
373 bytelane_error |= 1 << lane;
374 }
375 }
376 }
377 return bytelane_error;
378}
379
380static int rt_find_dqs_limit(struct sysinfo *s, u8 channel,
381 struct rt_dqs_setting dqs_setting[TOTAL_BYTELANES],
382 u8 dqs_lim[TOTAL_BYTELANES],
383 const enum training_modes expected_result)
384{
385 int lane;
386 u8 test_result;
387 int status = CB_SUCCESS;
388
389 FOR_EACH_BYTELANE(lane)
390 rt_set_dqs(channel, lane, 0, &dqs_setting[lane]);
391
Elyes HAOUAS6aa9d662020-08-04 13:20:13 +0200392 while (status == CB_SUCCESS) {
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100393 test_result = test_dqs_aligned(s, channel);
394 if (test_result == (expected_result == SUCCEEDING ? 0 : 0xff))
395 return CB_SUCCESS;
396 FOR_EACH_BYTELANE(lane) {
397 if (((test_result >> lane) & 1) != expected_result) {
398 status = rt_increment_dqs(&dqs_setting[lane]);
399 dqs_lim[lane]++;
400 rt_set_dqs(channel, lane, 0, &dqs_setting[lane]);
401 }
402 }
403 }
404
405 if (expected_result == SUCCEEDING) {
406 printk(BIOS_CRIT,
407 "Could not find RT DQS setting\n");
408 return CB_ERR;
409 } else {
410 printk(RAM_DEBUG,
411 "Read succeeded over all DQS"
412 " settings, continuing\n");
413 return CB_SUCCESS;
414 }
415}
416
417#define RT_LOOPS 3
418
419/*
420 * This attempts to find the ideal delay for DQS on reads (rx).
421 * The training works this way:
422 * - start from the lowest possible delay (0) on all bytelanes
423 * - increment the DQS rx delays until a succeeding write is found on all
424 * bytelayes, on all ranks on a channel and save these values
425 * - again increment the DQS rx delay until write start to fail on all bytelanes
426 * and save that value
427 * - use the mean between the saved succeeding and failing value
428 * - note0: bytelanes cannot be trained independently, so the delays need to be
429 * adjusted and tested for all of them at the same time
Arthur Heymans8ddd7d12018-09-11 22:26:13 +0200430 * - note1: At this stage all ranks effectively use the rank0's rt_dqs settings,
431 * but later on their respective settings are used (TODO where is the
432 * 'switch' register??). So programming the results for all ranks at the end
433 * of the training. Programming on all ranks instead of all populated ranks,
434 * seems to be required, most likely because the signals can't really be generated
435 * separately.
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100436 */
437int do_read_training(struct sysinfo *s)
438{
439 int loop, channel, i, lane, rank;
440 u32 address, content;
441 u8 dqs_lower[TOTAL_BYTELANES];
442 u8 dqs_upper[TOTAL_BYTELANES];
443 struct rt_dqs_setting dqs_setting[TOTAL_BYTELANES];
444 u16 saved_dqs_center[TOTAL_CHANNELS][TOTAL_BYTELANES];
445
Elyes HAOUASa342f392018-10-17 10:56:26 +0200446 memset(saved_dqs_center, 0, sizeof(saved_dqs_center));
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100447
448 printk(BIOS_DEBUG, "Starting DQS read training\n");
449
450 for (loop = 0; loop < RT_LOOPS; loop++) {
451 FOR_EACH_POPULATED_CHANNEL(s->dimms, channel) {
452 printk(RAM_DEBUG, "Doing DQS read training on CH%d\n",
453 channel);
454
455 /* Write pattern to strobe address */
456 FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, channel, rank) {
457 address = test_address(channel, rank);
458 for (i = 0; i < RT_PATTERN_SIZE; i++) {
459 content = read_training_schedule[i];
460 write32((u32 *)address + 8 * i, content);
461 write32((u32 *)address + 8 * i + 4, content);
462 }
463 }
464
465 memset(dqs_lower, 0, sizeof(dqs_lower));
466 memset(&dqs_setting, 0, sizeof(dqs_setting));
467 if (rt_find_dqs_limit(s, channel, dqs_setting, dqs_lower,
468 SUCCEEDING)) {
469 printk(BIOS_CRIT,
470 "Could not find working lower limit DQS setting\n");
471 return CB_ERR;
472 }
473
474 FOR_EACH_BYTELANE(lane)
475 dqs_upper[lane] = dqs_lower[lane];
476
477 if (rt_find_dqs_limit(s, channel, dqs_setting, dqs_upper,
478 FAILING)) {
479 printk(BIOS_CRIT,
480 "Could not find failing upper limit DQ setting\n");
481 return CB_ERR;
482 }
483
484 printk(RAM_DEBUG, "Centered values, loop %d:\n", loop);
485 FOR_EACH_BYTELANE(lane) {
486 u8 center = (dqs_lower[lane] + dqs_upper[lane]) / 2;
487 printk(RAM_DEBUG, "\t lane%d: #%d\n", lane, center);
488 saved_dqs_center[channel][lane] += center;
489 }
490 } /* END FOR_EACH_POPULATED_CHANNEL */
491 } /* end RT_LOOPS */
492
493 memset(s->rt_dqs, 0, sizeof(s->rt_dqs));
494
495 FOR_EACH_POPULATED_CHANNEL(s->dimms, channel) {
496 printk(BIOS_DEBUG, "Final timings on CH%d:\n", channel);
497 FOR_EACH_BYTELANE(lane) {
498 saved_dqs_center[channel][lane] /= RT_LOOPS;
499 while (saved_dqs_center[channel][lane]--) {
Elyes HAOUAS5ba154a2020-08-04 13:27:52 +0200500 if (rt_increment_dqs(&s->rt_dqs[channel][lane])
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100501 == CB_ERR)
502 /* Should never happen */
503 printk(BIOS_ERR,
504 "Huh? read training overflowed!!\n");
505 }
Arthur Heymans8ddd7d12018-09-11 22:26:13 +0200506 /* Later on separate settings for each rank are used so program
507 all of them */
508 FOR_EACH_RANK_IN_CHANNEL(rank)
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100509 rt_set_dqs(channel, lane, rank,
510 &s->rt_dqs[channel][lane]);
511 printk(BIOS_DEBUG, "\tlane%d: %d.%d\n",
512 lane, s->rt_dqs[channel][lane].tap,
513 s->rt_dqs[channel][lane].pi);
514 }
515 }
516 printk(BIOS_DEBUG, "Done DQS read training\n");
517 return CB_SUCCESS;
518}
Arthur Heymansb5170c32017-12-25 20:13:28 +0100519
520/* Enable write leveling on selected rank and disable output on other ranks */
521static void set_rank_write_level(struct sysinfo *s, u8 channel, u8 config,
522 u8 config_rank, u8 target_rank, int wl_enable)
523{
524 u32 emrs1;
525
526 /* Is shifted by bits 2 later so u8 can be used to reduce size */
Elyes HAOUAS68ec3eb2019-06-22 09:21:18 +0200527 static const u8 emrs1_lut[8][4][4] = { /* [Config][Leveling Rank][Rank] */
Angel Pons9d20c842021-01-13 12:39:37 +0100528 { /* Config 0: 2R2R */
Arthur Heymansb5170c32017-12-25 20:13:28 +0100529 {0x11, 0x00, 0x91, 0x00},
530 {0x00, 0x11, 0x91, 0x00},
531 {0x91, 0x00, 0x11, 0x00},
532 {0x91, 0x00, 0x00, 0x11}
533 },
Angel Pons9d20c842021-01-13 12:39:37 +0100534 { /* Config 1: 2R1R */
Arthur Heymansb5170c32017-12-25 20:13:28 +0100535 {0x11, 0x00, 0x91, 0x00},
536 {0x00, 0x11, 0x91, 0x00},
537 {0x91, 0x00, 0x11, 0x00},
538 {0x00, 0x00, 0x00, 0x00}
539 },
Angel Pons9d20c842021-01-13 12:39:37 +0100540 { /* Config 2: 1R2R */
Arthur Heymansb5170c32017-12-25 20:13:28 +0100541 {0x11, 0x00, 0x91, 0x00},
542 {0x00, 0x00, 0x00, 0x00},
543 {0x91, 0x00, 0x11, 0x00},
544 {0x91, 0x00, 0x00, 0x11}
545 },
Angel Pons9d20c842021-01-13 12:39:37 +0100546 { /* Config 3: 1R1R */
Arthur Heymansb5170c32017-12-25 20:13:28 +0100547 {0x11, 0x00, 0x91, 0x00},
548 {0x00, 0x00, 0x00, 0x00},
549 {0x91, 0x00, 0x11, 0x00},
550 {0x00, 0x00, 0x00, 0x00}
551 },
Angel Pons9d20c842021-01-13 12:39:37 +0100552 { /* Config 4: 2R0R */
Arthur Heymansb5170c32017-12-25 20:13:28 +0100553 {0x11, 0x00, 0x00, 0x00},
554 {0x00, 0x11, 0x00, 0x00},
555 {0x00, 0x00, 0x00, 0x00},
556 {0x00, 0x00, 0x00, 0x00}
557 },
Angel Pons9d20c842021-01-13 12:39:37 +0100558 { /* Config 5: 0R2R */
Arthur Heymansb5170c32017-12-25 20:13:28 +0100559 {0x00, 0x00, 0x00, 0x00},
560 {0x00, 0x00, 0x00, 0x00},
561 {0x00, 0x00, 0x11, 0x00},
562 {0x00, 0x00, 0x00, 0x11}
563 },
Angel Pons9d20c842021-01-13 12:39:37 +0100564 { /* Config 6: 1R0R */
Arthur Heymansb5170c32017-12-25 20:13:28 +0100565 {0x11, 0x00, 0x00, 0x00},
566 {0x00, 0x00, 0x00, 0x00},
567 {0x00, 0x00, 0x00, 0x00},
568 {0x00, 0x00, 0x00, 0x00}
569 },
Angel Pons9d20c842021-01-13 12:39:37 +0100570 { /* Config 7: 0R1R */
Arthur Heymansb5170c32017-12-25 20:13:28 +0100571 {0x00, 0x00, 0x00, 0x00},
572 {0x00, 0x00, 0x00, 0x00},
573 {0x00, 0x00, 0x11, 0x00},
574 {0x00, 0x00, 0x00, 0x00}
575 }
576 };
577
578 if (wl_enable) {
579 printk(RAM_DEBUG, "Entering WL mode\n");
580 printk(RAM_DEBUG, "Using WL ODT values\n");
581 emrs1 = emrs1_lut[config][target_rank][config_rank];
582 } else {
583 printk(RAM_DEBUG, "Exiting WL mode\n");
584 emrs1 = ddr3_emrs1_rtt_nom_config[s->dimm_config[channel]][config_rank];
585 }
586 printk(RAM_DEBUG, "Setting ODT for rank%d to ", config_rank);
587 switch (emrs1) {
588 case 0:
589 printk(RAM_DEBUG, "High-Z\n");
590 break;
591 case 0x11:
592 printk(RAM_DEBUG, "40 Ohm\n");
593 break;
594 case 0x81:
595 printk(RAM_DEBUG, "30 Ohm\n");
596 break;
597 case 0x80:
598 printk(RAM_DEBUG, "20 Ohm\n");
599 break;
600 case 0x10:
601 printk(RAM_DEBUG, "120 Ohm\n");
602 break;
603 case 0x01:
604 printk(RAM_DEBUG, "60 Ohm\n");
605 break;
606 default:
607 printk(BIOS_WARNING, "ODT value Undefined!\n");
608 break;
609 }
610
611 emrs1 <<= 2;
612 /* Set output drive strength to 34 Ohm during write levelling */
613 emrs1 |= (1 << 1);
614
615 if (wl_enable && (target_rank != config_rank)) {
616 printk(RAM_DEBUG, "Disabling output for rank%d\n", config_rank);
617 emrs1 |= (1 << 12);
618 }
619 if (wl_enable && (target_rank == config_rank)) {
620 printk(RAM_DEBUG, "Enabling WL for rank%d\n", config_rank);
621 emrs1 |= (1 << 7);
622 }
623 send_jedec_cmd(s, config_rank, channel, EMRS1_CMD, emrs1);
624}
625
626#define N_SAMPLES 5
627
628static void sample_dq(const struct sysinfo *s, u8 channel, u8 rank,
629 u8 high_found[8]) {
630 u32 address = test_address(channel, rank);
631 int samples, lane;
632
633 memset(high_found, 0, TOTAL_BYTELANES * sizeof(high_found[0]));
634 for (samples = 0; samples < N_SAMPLES; samples++) {
635 write32((u32 *)address, 0x12341234);
636 write32((u32 *)address + 4, 0x12341234);
637 udelay(5);
638 FOR_EACH_BYTELANE(lane) {
639 u8 dq_high = (MCHBAR8(0x561 + 0x400 * channel
640 + (lane * 4)) >> 7) & 1;
641 high_found[lane] += dq_high;
642 }
643 }
644}
645
646static enum cb_err increment_to_dqs_edge(struct sysinfo *s, u8 channel, u8 rank)
647{
648 int lane;
649 u8 saved_24d;
650 struct dll_setting dqs_setting[TOTAL_BYTELANES];
651 u8 bytelane_ok = 0;
652 u8 dq_sample[TOTAL_BYTELANES];
653
654 memcpy(dqs_setting, s->dqs_settings[channel], sizeof(dqs_setting));
655 FOR_EACH_BYTELANE(lane)
656 dqsset(channel, lane, &dqs_setting[lane]);
657
658 saved_24d = MCHBAR8(0x24d + 0x400 * channel);
659
660 /* Loop 0: Find DQ sample low, by decreasing */
661 while (bytelane_ok != 0xff) {
662 sample_dq(s, channel, rank, dq_sample);
663 FOR_EACH_BYTELANE(lane) {
664 if (bytelane_ok & (1 << lane))
665 continue;
666
667 printk(RAM_SPEW, "%d, %d, %02d, %d,"
668 " lane%d sample: %d\n",
669 dqs_setting[lane].coarse,
670 dqs_setting[lane].clk_delay,
671 dqs_setting[lane].tap,
672 dqs_setting[lane].pi,
673 lane,
674 dq_sample[lane]);
675
676 if (dq_sample[lane] > 0) {
677 if (decrement_dq_dqs(s, &dqs_setting[lane])) {
678 printk(BIOS_EMERG,
679 "DQS setting channel%d, "
680 "lane %d reached a minimum!\n",
681 channel, lane);
682 return CB_ERR;
683 }
684 } else {
685 bytelane_ok |= (1 << lane);
686 }
687 dqsset(channel, lane, &dqs_setting[lane]);
688 }
689 }
690
691 printk(RAM_DEBUG, "DQS settings on PASS #0:\n");
692 FOR_EACH_BYTELANE(lane) {
693 printk(RAM_DEBUG, "lane %d: ", lane);
694 print_dll_setting(&dqs_setting[lane], 0);
695 }
696
697 /* Loop 1: Find DQ sample high, by increasing */
698 bytelane_ok = 0;
699 while (bytelane_ok != 0xff) {
700 sample_dq(s, channel, rank, dq_sample);
701 FOR_EACH_BYTELANE(lane) {
702 if (bytelane_ok & (1 << lane))
703 continue;
704
705 printk(RAM_SPEW, "%d, %d, %02d, %d, lane%d sample: %d\n",
706 dqs_setting[lane].coarse,
707 dqs_setting[lane].clk_delay,
708 dqs_setting[lane].tap,
709 dqs_setting[lane].pi,
710 lane,
711 dq_sample[lane]);
712
713 if (dq_sample[lane] == N_SAMPLES) {
714 bytelane_ok |= (1 << lane);
715 } else {
716 if (increment_dq_dqs(s, &dqs_setting[lane])) {
717 printk(BIOS_EMERG,
718 "DQS setting channel%d, "
719 "lane %d reached a maximum!\n",
720 channel, lane);
721 return CB_ERR;
722 }
723 }
724 dqsset(channel, lane, &dqs_setting[lane]);
725 }
726 }
727
728 printk(RAM_DEBUG, "DQS settings on PASS #1:\n");
729 FOR_EACH_BYTELANE(lane) {
730 printk(RAM_DEBUG, "lane %d: ", lane);
731 print_dll_setting(&dqs_setting[lane], 0);
732 }
733
734 printk(BIOS_DEBUG, "final WL DQS settings on CH%d\n", channel);
735 FOR_EACH_BYTELANE(lane) {
736 printk(BIOS_DEBUG, "\tlane%d: ", lane);
737 print_dll_setting(&dqs_setting[lane], 1);
738 s->dqs_settings[channel][lane] = dqs_setting[lane];
739 }
740
741 MCHBAR8(0x24d + 0x400 * channel) = saved_24d;
742 return CB_SUCCESS;
743}
744
745/*
746 * DDR3 uses flyby topology where the clock signal takes a different path
747 * than the data signal, to allow for better signal intergrity.
748 * Therefore the delay on the data signals needs to account for this.
Angel Pons9e58afe2021-01-13 18:23:41 +0100749 * This is done by sampling the DQS write (tx) signal back over the DQ
750 * signal and looking for delay values where the sample transitions
Arthur Heymansb5170c32017-12-25 20:13:28 +0100751 * from high to low.
752 * Here the following is done:
Angel Pons9e58afe2021-01-13 18:23:41 +0100753 * - Enable write levelling on the first populated rank.
754 * - Disable output on other populated ranks.
755 * - Start from safe DQS (tx) delays. Other transitions can be
756 * found at different starting values but are generally bad.
Arthur Heymansb5170c32017-12-25 20:13:28 +0100757 * - loop0: decrease DQS (tx) delays until low is sampled,
758 * loop1: increase DQS (tx) delays until high is sampled,
Angel Pons9e58afe2021-01-13 18:23:41 +0100759 * This way, we are sure to have hit a low-high transition.
760 * - Put all ranks in normal mode of operation again.
761 * Note: All ranks need to be leveled together.
Arthur Heymansb5170c32017-12-25 20:13:28 +0100762 */
763void search_write_leveling(struct sysinfo *s)
764{
765 int i, ch, count;
766 u8 config, rank0, rank1, lane;
767 struct dll_setting dq_setting;
768
Angel Ponsa6daff12021-01-13 18:27:31 +0100769 const u8 chanconfig_lut[16] = {0, 6, 4, 6, 7, 3, 1, 3, 5, 2, 0, 2, 7, 3, 1, 3};
Arthur Heymansb5170c32017-12-25 20:13:28 +0100770
Angel Ponsa6daff12021-01-13 18:27:31 +0100771 const u8 odt_force[8][4] = { /* [Config][leveling rank] */
Arthur Heymansb5170c32017-12-25 20:13:28 +0100772 {0x5, 0x6, 0x5, 0x9},
773 {0x5, 0x6, 0x5, 0x0},
774 {0x5, 0x0, 0x5, 0x9},
775 {0x5, 0x0, 0x5, 0x0},
776 {0x1, 0x2, 0x0, 0x0},
777 {0x0, 0x0, 0x4, 0x8},
778 {0x1, 0x0, 0x0, 0x0},
779 {0x0, 0x0, 0x4, 0x0}
780 };
781
782 printk(BIOS_DEBUG, "Starting write levelling.\n");
783
784 FOR_EACH_POPULATED_CHANNEL(s->dimms, ch) {
785 printk(BIOS_DEBUG, "\tCH%d\n", ch);
786 config = chanconfig_lut[s->dimm_config[ch]];
787
788 MCHBAR8(0x5d8 + 0x400 * ch) =
789 MCHBAR8(0x5d8 + 0x400 * ch) & ~0x0e;
790 MCHBAR16(0x5c4 + 0x400 * ch) = (MCHBAR16(0x5c4 + 0x400 * ch) &
791 ~0x3fff) | 0x3fff;
792 MCHBAR8(0x265 + 0x400 * ch) =
793 MCHBAR8(0x265 + 0x400 * ch) & ~0x1f;
794 /* find the first populated rank */
795 FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, ch, rank0)
796 break;
797
798 /* Enable WL for the first populated rank and disable output
799 for others */
800 FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, ch, rank1)
801 set_rank_write_level(s, ch, config, rank1, rank0, 1);
802
803 MCHBAR8(0x298 + 2 + 0x400 * ch) =
804 (MCHBAR8(0x298 + 2 + 0x400 * ch) & ~0x0f)
805 | odt_force[config][rank0];
806 MCHBAR8(0x271 + 0x400 * ch) = (MCHBAR8(0x271 + 0x400 * ch)
807 & ~0x7e) | 0x4e;
808 MCHBAR8(0x5d9 + 0x400 * ch) =
809 (MCHBAR8(0x5d9 + 0x400 * ch) & ~0x04) | 0x04;
810 MCHBAR32(0x1a0) = (MCHBAR32(0x1a0) & ~0x07ffffff)
811 | 0x00014000;
812
813 if (increment_to_dqs_edge(s, ch, rank0))
814 die("Write Leveling failed!");
815
816 MCHBAR8(0x298 + 2 + 0x400 * ch) =
817 MCHBAR8(0x298 + 2 + 0x400 * ch) & ~0x0f;
818 MCHBAR8(0x271 + 0x400 * ch) =
819 (MCHBAR8(0x271 + 0x400 * ch) & ~0x7e)
820 | 0x0e;
821 MCHBAR8(0x5d9 + 0x400 * ch) =
822 (MCHBAR8(0x5d9 + 0x400 * ch) & ~0x04);
823 MCHBAR32(0x1a0) = (MCHBAR32(0x1a0)
824 & ~0x07ffffff) | 0x00555801;
825
826 /* Disable WL on the trained rank */
827 set_rank_write_level(s, ch, config, rank0, rank0, 0);
828 send_jedec_cmd(s, rank0, ch, NORMALOP_CMD, 1 << 12);
829
830 MCHBAR8(0x5d8 + 0x400 * ch) = (MCHBAR8(0x5d8 + 0x400 * ch)
831 & ~0x0e) | 0x0e;
832 MCHBAR16(0x5c4 + 0x400 * ch) = (MCHBAR16(0x5c4 + 0x400 * ch)
833 & ~0x3fff) | 0x1807;
834 MCHBAR8(0x265 + 0x400 * ch) = MCHBAR8(0x265 + 0x400 * ch) & ~0x1f;
835
836 /* Disable write level mode for all ranks */
837 FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, ch, rank0)
838 set_rank_write_level(s, ch, config, rank0, rank0, 0);
839 }
840
841 MCHBAR8(0x5dc) = (MCHBAR8(0x5dc) & ~0x80) | 0x80;
842
843 /* Increment DQ (rx) dll setting by a standard amount past DQS,
844 This is further trained in write training. */
845 switch (s->selected_timings.mem_clk) {
846 default:
847 case MEM_CLOCK_800MHz:
848 count = 39;
849 break;
850 case MEM_CLOCK_1066MHz:
851 count = 32;
852 break;
853 case MEM_CLOCK_1333MHz:
854 count = 42;
855 break;
856 }
857
858 FOR_EACH_POPULATED_CHANNEL_AND_BYTELANE(s->dimms, ch, lane) {
859 dq_setting = s->dqs_settings[ch][lane];
860 for (i = 0; i < count; i++)
861 if (increment_dq_dqs(s, &dq_setting))
862 die("Can't further increase DQ past DQS delay");
863 dqset(ch, lane, &dq_setting);
864 }
865
866 printk(BIOS_DEBUG, "Done write levelling.\n");
867}