blob: afb77424af651a3e71e6f78d0385f97329d0e56f [file] [log] [blame]
Patrick Georgiac959032020-05-05 22:49:26 +02001/* SPDX-License-Identifier: GPL-2.0-or-later */
Arthur Heymans95c48cb2017-11-04 08:07:06 +01002
Kyösti Mälkki13f66502019-03-03 08:01:05 +02003#include <device/mmio.h>
Arthur Heymans95c48cb2017-11-04 08:07:06 +01004#include <console/console.h>
Arthur Heymansb5170c32017-12-25 20:13:28 +01005#include <delay.h>
Arthur Heymans95c48cb2017-11-04 08:07:06 +01006#include <string.h>
7#include <types.h>
Angel Pons41e66ac2020-09-15 13:17:23 +02008#include "raminit.h"
Arthur Heymans95c48cb2017-11-04 08:07:06 +01009#include "x4x.h"
Arthur Heymans95c48cb2017-11-04 08:07:06 +010010
Angel Ponsdd7ce4e2021-03-26 23:21:02 +010011static void print_dll_setting(const struct dll_setting *dll_setting, u8 default_verbose)
Arthur Heymans95c48cb2017-11-04 08:07:06 +010012{
13 u8 debug_level = default_verbose ? BIOS_DEBUG : RAM_DEBUG;
14
15 printk(debug_level, "%d.%d.%d.%d:%d.%d\n", dll_setting->coarse,
16 dll_setting->clk_delay, dll_setting->tap,
17 dll_setting->pi, dll_setting->db_en,
18 dll_setting->db_sel);
19}
20
21struct db_limit {
22 u8 tap0;
23 u8 tap1;
24 u8 pi0;
25 u8 pi1;
26};
27
28static void set_db(const struct sysinfo *s, struct dll_setting *dq_dqs_setting)
29{
Elyes HAOUAS88607a42018-10-05 10:36:45 +020030 struct db_limit limit;
Arthur Heymans95c48cb2017-11-04 08:07:06 +010031
32 switch (s->selected_timings.mem_clk) {
33 default:
34 case MEM_CLOCK_800MHz:
35 limit.tap0 = 3;
36 limit.tap1 = 10;
37 limit.pi0 = 2;
38 limit.pi1 = 3;
39 break;
40 case MEM_CLOCK_1066MHz:
41 limit.tap0 = 2;
42 limit.tap1 = 8;
43 limit.pi0 = 6;
44 limit.pi1 = 7;
45 break;
46 case MEM_CLOCK_1333MHz:
47 limit.tap0 = 3;
48 limit.tap1 = 11;
49 /* TO CHECK: Might be reverse since this makes little sense */
50 limit.pi0 = 6;
51 limit.pi1 = 4;
52 break;
53 }
54
55 if (dq_dqs_setting->tap < limit.tap0) {
56 dq_dqs_setting->db_en = 1;
57 dq_dqs_setting->db_sel = 1;
58 } else if ((dq_dqs_setting->tap == limit.tap0)
59 && (dq_dqs_setting->pi < limit.pi0)) {
60 dq_dqs_setting->db_en = 1;
61 dq_dqs_setting->db_sel = 1;
62 } else if (dq_dqs_setting->tap < limit.tap1) {
63 dq_dqs_setting->db_en = 0;
64 dq_dqs_setting->db_sel = 0;
65 } else if ((dq_dqs_setting->tap == limit.tap1)
66 && (dq_dqs_setting->pi < limit.pi1)) {
67 dq_dqs_setting->db_en = 0;
68 dq_dqs_setting->db_sel = 0;
69 } else {
70 dq_dqs_setting->db_en = 1;
71 dq_dqs_setting->db_sel = 0;
72 }
73}
74
Elyes HAOUAS68ec3eb2019-06-22 09:21:18 +020075static const u8 max_tap[3] = {12, 10, 13};
Arthur Heymans95c48cb2017-11-04 08:07:06 +010076
Angel Ponsdd7ce4e2021-03-26 23:21:02 +010077static int increment_dq_dqs(const struct sysinfo *s, struct dll_setting *dq_dqs_setting)
Arthur Heymans95c48cb2017-11-04 08:07:06 +010078{
Angel Ponsdd7ce4e2021-03-26 23:21:02 +010079 u8 max_tap_val = max_tap[s->selected_timings.mem_clk - MEM_CLOCK_800MHz];
Arthur Heymans95c48cb2017-11-04 08:07:06 +010080
81 if (dq_dqs_setting->pi < 6) {
82 dq_dqs_setting->pi += 1;
83 } else if (dq_dqs_setting->tap < max_tap_val) {
84 dq_dqs_setting->pi = 0;
85 dq_dqs_setting->tap += 1;
86 } else if (dq_dqs_setting->clk_delay < 2) {
87 dq_dqs_setting->pi = 0;
88 dq_dqs_setting->tap = 0;
89 dq_dqs_setting->clk_delay += 1;
90 } else if (dq_dqs_setting->coarse < 1) {
91 dq_dqs_setting->pi = 0;
92 dq_dqs_setting->tap = 0;
93 dq_dqs_setting->clk_delay -= 1;
94 dq_dqs_setting->coarse += 1;
95 } else {
96 return CB_ERR;
97 }
98 set_db(s, dq_dqs_setting);
99 return CB_SUCCESS;
100}
101
Angel Ponsdd7ce4e2021-03-26 23:21:02 +0100102static int decrement_dq_dqs(const struct sysinfo *s, struct dll_setting *dq_dqs_setting)
Arthur Heymansb5170c32017-12-25 20:13:28 +0100103{
Angel Ponsdd7ce4e2021-03-26 23:21:02 +0100104 u8 max_tap_val = max_tap[s->selected_timings.mem_clk - MEM_CLOCK_800MHz];
Arthur Heymansb5170c32017-12-25 20:13:28 +0100105
106 if (dq_dqs_setting->pi > 0) {
107 dq_dqs_setting->pi -= 1;
108 } else if (dq_dqs_setting->tap > 0) {
109 dq_dqs_setting->pi = 6;
110 dq_dqs_setting->tap -= 1;
111 } else if (dq_dqs_setting->clk_delay > 0) {
112 dq_dqs_setting->pi = 6;
113 dq_dqs_setting->tap = max_tap_val;
114 dq_dqs_setting->clk_delay -= 1;
115 } else if (dq_dqs_setting->coarse > 0) {
116 dq_dqs_setting->pi = 6;
117 dq_dqs_setting->tap = max_tap_val;
118 dq_dqs_setting->clk_delay += 1;
119 dq_dqs_setting->coarse -= 1;
120 } else {
121 return CB_ERR;
122 }
123 set_db(s, dq_dqs_setting);
124 return CB_SUCCESS;
125}
126
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100127#define WT_PATTERN_SIZE 80
128
129static const u32 write_training_schedule[WT_PATTERN_SIZE] = {
130 0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
131 0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
132 0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
133 0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
134 0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
135 0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
136 0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
137 0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
138 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
139 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
140 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
141 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
142 0x03030303, 0x04040404, 0x09090909, 0x10101010,
143 0x21212121, 0x40404040, 0x81818181, 0x00000000,
144 0x03030303, 0x04040404, 0x09090909, 0x10101010,
145 0x21212121, 0x40404040, 0x81818181, 0x00000000,
146 0xfdfdfdfd, 0xfafafafa, 0xf7f7f7f7, 0xeeeeeeee,
147 0xdfdfdfdf, 0xbebebebe, 0x7f7f7f7f, 0xfefefefe,
148 0xfdfdfdfd, 0xfafafafa, 0xf7f7f7f7, 0xeeeeeeee,
149 0xdfdfdfdf, 0xbebebebe, 0x7f7f7f7f, 0xfefefefe,
150};
151
152enum training_modes {
153 SUCCEEDING = 0,
154 FAILING = 1
155};
156
Angel Ponsdd7ce4e2021-03-26 23:21:02 +0100157static u8 test_dq_aligned(const struct sysinfo *s, const u8 channel)
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100158{
159 u32 address;
160 int rank, lane;
161 u8 count, count1;
162 u8 data[8];
163 u8 lane_error = 0;
164
165 FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, channel, rank) {
166 address = test_address(channel, rank);
167 for (count = 0; count < WT_PATTERN_SIZE; count++) {
168 for (count1 = 0; count1 < WT_PATTERN_SIZE; count1++) {
169 if ((count1 % 16) == 0)
170 MCHBAR32(0xf90) = 1;
Angel Ponsdd7ce4e2021-03-26 23:21:02 +0100171 const u32 pattern = write_training_schedule[count1];
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100172 write32((u32 *)address + 8 * count1, pattern);
Angel Ponsdd7ce4e2021-03-26 23:21:02 +0100173 write32((u32 *)address + 8 * count1 + 4, pattern);
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100174 }
175
176 const u32 good = write_training_schedule[count];
177 write32(&data[0], read32((u32 *)address + 8 * count));
Angel Ponsdd7ce4e2021-03-26 23:21:02 +0100178 write32(&data[4], read32((u32 *)address + 8 * count + 4));
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100179 FOR_EACH_BYTELANE(lane) {
180 u8 expected = (good >> ((lane % 4) * 8)) & 0xff;
181 if (data[lane] != expected)
182 lane_error |= 1 << lane;
183 }
184 }
185 }
186 return lane_error;
187}
188
189#define CONSISTENCY 10
190
191/*
192 * This function finds either failing or succeeding writes by increasing DQ.
193 * When it has found a failing or succeeding setting it will increase DQ
194 * another 10 times to make sure the result is consistent.
195 * This is probably done because lanes cannot be trained independent from
196 * each other.
197 */
198static int find_dq_limit(const struct sysinfo *s, const u8 channel,
199 struct dll_setting dq_setting[TOTAL_BYTELANES],
200 u8 dq_lim[TOTAL_BYTELANES],
201 const enum training_modes expected_result)
202{
203 int status = CB_SUCCESS;
204 int lane;
205 u8 test_result;
206 u8 pass_count[TOTAL_BYTELANES];
Angel Pons5c3160e2021-03-26 22:48:42 +0100207 u8 success_mask = 0xff;
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100208
209 printk(RAM_DEBUG, "Looking for %s writes on channel %d\n",
210 expected_result == FAILING ? "failing" : "succeeding", channel);
211 memset(pass_count, 0, sizeof(pass_count));
212
Angel Pons5c3160e2021-03-26 22:48:42 +0100213 while (success_mask) {
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100214 test_result = test_dq_aligned(s, channel);
215 FOR_EACH_BYTELANE(lane) {
216 if (((test_result >> lane) & 1) != expected_result) {
217 status = increment_dq_dqs(s, &dq_setting[lane]);
218 dqset(channel, lane, &dq_setting[lane]);
219 dq_lim[lane]++;
220 } else if (pass_count[lane] < CONSISTENCY) {
221 status = increment_dq_dqs(s, &dq_setting[lane]);
222 dqset(channel, lane, &dq_setting[lane]);
223 dq_lim[lane]++;
224 pass_count[lane]++;
225 } else if (pass_count[lane] == CONSISTENCY) {
Angel Pons5c3160e2021-03-26 22:48:42 +0100226 success_mask &= ~(1 << lane);
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100227 }
228 if (status == CB_ERR) {
Angel Ponsdd7ce4e2021-03-26 23:21:02 +0100229 printk(BIOS_CRIT,
230 "Could not find a case of %s writes on CH%d, lane %d\n",
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100231 expected_result == FAILING ? "failing"
232 : "succeeding", channel, lane);
233 return CB_ERR;
234 }
235 }
236 }
237 return CB_SUCCESS;
238}
239
240/*
241 * This attempts to find the ideal delay for DQ to account for the skew between
242 * the DQ and the DQS signal.
243 * The training works this way:
244 * - start from the DQS delay values (DQ is always later than DQS)
245 * - increment the DQ delay until a succeeding write is found on all bytelayes,
246 * on all ranks on a channel and save these values
247 * - again increment the DQ delay until write start to fail on all bytelanes and
248 * save that value
249 * - use the mean between the saved succeeding and failing value
250 * - note: bytelanes cannot be trained independently, so the delays need to be
251 * adjusted and tested for all of them at the same time
252 */
253int do_write_training(struct sysinfo *s)
254{
255 int i;
256 u8 channel, lane;
257 u8 dq_lower[TOTAL_BYTELANES];
258 u8 dq_upper[TOTAL_BYTELANES];
259 struct dll_setting dq_setting[TOTAL_BYTELANES];
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100260
261 printk(BIOS_DEBUG, "Starting DQ write training\n");
262
263 FOR_EACH_POPULATED_CHANNEL(s->dimms, channel) {
264 printk(BIOS_DEBUG, "Doing DQ write training on CH%d\n", channel);
265
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100266 /* Start all lanes at DQS values */
267 FOR_EACH_BYTELANE(lane) {
268 dqset(channel, lane, &s->dqs_settings[channel][lane]);
269 s->dq_settings[channel][lane] = s->dqs_settings[channel][lane];
270 }
271 memset(dq_lower, 0, sizeof(dq_lower));
Angel Pons9d20c842021-01-13 12:39:37 +0100272 /* Start from DQS settings */
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100273 memcpy(dq_setting, s->dqs_settings[channel], sizeof(dq_setting));
274
Angel Ponsdd7ce4e2021-03-26 23:21:02 +0100275 if (find_dq_limit(s, channel, dq_setting, dq_lower, SUCCEEDING)) {
276 printk(BIOS_CRIT, "Could not find working lower limit DQ setting\n");
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100277 return CB_ERR;
278 }
279
280 memcpy(dq_upper, dq_lower, sizeof(dq_lower));
281
Angel Ponsdd7ce4e2021-03-26 23:21:02 +0100282 if (find_dq_limit(s, channel, dq_setting, dq_upper, FAILING)) {
283 printk(BIOS_WARNING, "Could not find failing upper limit DQ setting\n");
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100284 return CB_ERR;
285 }
286
287 FOR_EACH_BYTELANE(lane) {
288 dq_lower[lane] -= CONSISTENCY - 1;
289 dq_upper[lane] -= CONSISTENCY - 1;
290 u8 dq_center = (dq_upper[lane] + dq_lower[lane]) / 2;
291
Angel Ponsdd7ce4e2021-03-26 23:21:02 +0100292 printk(RAM_DEBUG,
293 "Centered value for DQ DLL: ch%d, lane %d, #steps = %d\n",
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100294 channel, lane, dq_center);
295 for (i = 0; i < dq_center; i++) {
296 /* Should never happen */
297 if (increment_dq_dqs(s, &s->dq_settings[channel][lane])
298 == CB_ERR)
299 printk(BIOS_ERR,
300 "Huh? write training overflowed!!\n");
301 }
302 }
303
304 /* Reset DQ DLL settings and increment with centered value*/
305 printk(BIOS_DEBUG, "Final DQ timings on CH%d\n", channel);
306 FOR_EACH_BYTELANE(lane) {
307 printk(BIOS_DEBUG, "\tlane%d: ", lane);
308 print_dll_setting(&s->dq_settings[channel][lane], 1);
309 dqset(channel, lane, &s->dq_settings[channel][lane]);
310 }
311 }
312 printk(BIOS_DEBUG, "Done DQ write training\n");
313 return CB_SUCCESS;
314}
315
316#define RT_PATTERN_SIZE 40
317
318static const u32 read_training_schedule[RT_PATTERN_SIZE] = {
319 0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
320 0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
321 0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
322 0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
323 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
324 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
325 0x03030303, 0x04040404, 0x09090909, 0x10101010,
326 0x21212121, 0x40404040, 0x81818181, 0x00000000,
327 0xfdfdfdfd, 0xfafafafa, 0xf7f7f7f7, 0xeeeeeeee,
328 0xdfdfdfdf, 0xbebebebe, 0x7f7f7f7f, 0xfefefefe
329};
330
331static int rt_increment_dqs(struct rt_dqs_setting *setting)
332{
333 if (setting->pi < 7) {
334 setting->pi++;
335 } else if (setting->tap < 14) {
336 setting->pi = 0;
337 setting->tap++;
338 } else {
339 return CB_ERR;
340 }
341 return CB_SUCCESS;
342}
343
344static u8 test_dqs_aligned(const struct sysinfo *s, const u8 channel)
345{
346 int i, rank, lane;
347 volatile u8 data[8];
348 u32 address;
349 u8 bytelane_error = 0;
350
351 FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, channel, rank) {
352 address = test_address(channel, rank);
353 for (i = 0; i < RT_PATTERN_SIZE; i++) {
354 const u32 good = read_training_schedule[i];
355 write32(&data[0], read32((u32 *)address + i * 8));
356 write32(&data[4], read32((u32 *)address + i * 8 + 4));
357
358 FOR_EACH_BYTELANE(lane) {
359 if (data[lane] != (good & 0xff))
360 bytelane_error |= 1 << lane;
361 }
362 }
363 }
364 return bytelane_error;
365}
366
367static int rt_find_dqs_limit(struct sysinfo *s, u8 channel,
368 struct rt_dqs_setting dqs_setting[TOTAL_BYTELANES],
369 u8 dqs_lim[TOTAL_BYTELANES],
370 const enum training_modes expected_result)
371{
372 int lane;
373 u8 test_result;
374 int status = CB_SUCCESS;
375
376 FOR_EACH_BYTELANE(lane)
377 rt_set_dqs(channel, lane, 0, &dqs_setting[lane]);
378
Elyes HAOUAS6aa9d662020-08-04 13:20:13 +0200379 while (status == CB_SUCCESS) {
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100380 test_result = test_dqs_aligned(s, channel);
381 if (test_result == (expected_result == SUCCEEDING ? 0 : 0xff))
382 return CB_SUCCESS;
383 FOR_EACH_BYTELANE(lane) {
384 if (((test_result >> lane) & 1) != expected_result) {
385 status = rt_increment_dqs(&dqs_setting[lane]);
386 dqs_lim[lane]++;
387 rt_set_dqs(channel, lane, 0, &dqs_setting[lane]);
388 }
389 }
390 }
391
392 if (expected_result == SUCCEEDING) {
Angel Ponsdd7ce4e2021-03-26 23:21:02 +0100393 printk(BIOS_CRIT, "Could not find RT DQS setting\n");
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100394 return CB_ERR;
395 } else {
Angel Ponsdd7ce4e2021-03-26 23:21:02 +0100396 printk(RAM_DEBUG, "Read succeeded over all DQS settings, continuing\n");
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100397 return CB_SUCCESS;
398 }
399}
400
401#define RT_LOOPS 3
402
403/*
404 * This attempts to find the ideal delay for DQS on reads (rx).
405 * The training works this way:
406 * - start from the lowest possible delay (0) on all bytelanes
407 * - increment the DQS rx delays until a succeeding write is found on all
408 * bytelayes, on all ranks on a channel and save these values
409 * - again increment the DQS rx delay until write start to fail on all bytelanes
410 * and save that value
411 * - use the mean between the saved succeeding and failing value
412 * - note0: bytelanes cannot be trained independently, so the delays need to be
413 * adjusted and tested for all of them at the same time
Arthur Heymans8ddd7d12018-09-11 22:26:13 +0200414 * - note1: At this stage all ranks effectively use the rank0's rt_dqs settings,
415 * but later on their respective settings are used (TODO where is the
416 * 'switch' register??). So programming the results for all ranks at the end
417 * of the training. Programming on all ranks instead of all populated ranks,
418 * seems to be required, most likely because the signals can't really be generated
419 * separately.
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100420 */
421int do_read_training(struct sysinfo *s)
422{
423 int loop, channel, i, lane, rank;
424 u32 address, content;
425 u8 dqs_lower[TOTAL_BYTELANES];
426 u8 dqs_upper[TOTAL_BYTELANES];
427 struct rt_dqs_setting dqs_setting[TOTAL_BYTELANES];
428 u16 saved_dqs_center[TOTAL_CHANNELS][TOTAL_BYTELANES];
429
Elyes HAOUASa342f392018-10-17 10:56:26 +0200430 memset(saved_dqs_center, 0, sizeof(saved_dqs_center));
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100431
432 printk(BIOS_DEBUG, "Starting DQS read training\n");
433
434 for (loop = 0; loop < RT_LOOPS; loop++) {
435 FOR_EACH_POPULATED_CHANNEL(s->dimms, channel) {
436 printk(RAM_DEBUG, "Doing DQS read training on CH%d\n",
437 channel);
438
439 /* Write pattern to strobe address */
440 FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, channel, rank) {
441 address = test_address(channel, rank);
442 for (i = 0; i < RT_PATTERN_SIZE; i++) {
443 content = read_training_schedule[i];
444 write32((u32 *)address + 8 * i, content);
445 write32((u32 *)address + 8 * i + 4, content);
446 }
447 }
448
449 memset(dqs_lower, 0, sizeof(dqs_lower));
450 memset(&dqs_setting, 0, sizeof(dqs_setting));
451 if (rt_find_dqs_limit(s, channel, dqs_setting, dqs_lower,
452 SUCCEEDING)) {
453 printk(BIOS_CRIT,
454 "Could not find working lower limit DQS setting\n");
455 return CB_ERR;
456 }
457
458 FOR_EACH_BYTELANE(lane)
459 dqs_upper[lane] = dqs_lower[lane];
460
461 if (rt_find_dqs_limit(s, channel, dqs_setting, dqs_upper,
462 FAILING)) {
463 printk(BIOS_CRIT,
464 "Could not find failing upper limit DQ setting\n");
465 return CB_ERR;
466 }
467
468 printk(RAM_DEBUG, "Centered values, loop %d:\n", loop);
469 FOR_EACH_BYTELANE(lane) {
470 u8 center = (dqs_lower[lane] + dqs_upper[lane]) / 2;
471 printk(RAM_DEBUG, "\t lane%d: #%d\n", lane, center);
472 saved_dqs_center[channel][lane] += center;
473 }
474 } /* END FOR_EACH_POPULATED_CHANNEL */
475 } /* end RT_LOOPS */
476
477 memset(s->rt_dqs, 0, sizeof(s->rt_dqs));
478
479 FOR_EACH_POPULATED_CHANNEL(s->dimms, channel) {
480 printk(BIOS_DEBUG, "Final timings on CH%d:\n", channel);
481 FOR_EACH_BYTELANE(lane) {
482 saved_dqs_center[channel][lane] /= RT_LOOPS;
483 while (saved_dqs_center[channel][lane]--) {
Elyes HAOUAS5ba154a2020-08-04 13:27:52 +0200484 if (rt_increment_dqs(&s->rt_dqs[channel][lane])
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100485 == CB_ERR)
486 /* Should never happen */
487 printk(BIOS_ERR,
488 "Huh? read training overflowed!!\n");
489 }
Arthur Heymans8ddd7d12018-09-11 22:26:13 +0200490 /* Later on separate settings for each rank are used so program
491 all of them */
492 FOR_EACH_RANK_IN_CHANNEL(rank)
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100493 rt_set_dqs(channel, lane, rank,
494 &s->rt_dqs[channel][lane]);
495 printk(BIOS_DEBUG, "\tlane%d: %d.%d\n",
496 lane, s->rt_dqs[channel][lane].tap,
497 s->rt_dqs[channel][lane].pi);
498 }
499 }
500 printk(BIOS_DEBUG, "Done DQS read training\n");
501 return CB_SUCCESS;
502}
Arthur Heymansb5170c32017-12-25 20:13:28 +0100503
504/* Enable write leveling on selected rank and disable output on other ranks */
505static void set_rank_write_level(struct sysinfo *s, u8 channel, u8 config,
506 u8 config_rank, u8 target_rank, int wl_enable)
507{
508 u32 emrs1;
509
510 /* Is shifted by bits 2 later so u8 can be used to reduce size */
Elyes HAOUAS68ec3eb2019-06-22 09:21:18 +0200511 static const u8 emrs1_lut[8][4][4] = { /* [Config][Leveling Rank][Rank] */
Angel Pons9d20c842021-01-13 12:39:37 +0100512 { /* Config 0: 2R2R */
Arthur Heymansb5170c32017-12-25 20:13:28 +0100513 {0x11, 0x00, 0x91, 0x00},
514 {0x00, 0x11, 0x91, 0x00},
515 {0x91, 0x00, 0x11, 0x00},
516 {0x91, 0x00, 0x00, 0x11}
517 },
Angel Pons9d20c842021-01-13 12:39:37 +0100518 { /* Config 1: 2R1R */
Arthur Heymansb5170c32017-12-25 20:13:28 +0100519 {0x11, 0x00, 0x91, 0x00},
520 {0x00, 0x11, 0x91, 0x00},
521 {0x91, 0x00, 0x11, 0x00},
522 {0x00, 0x00, 0x00, 0x00}
523 },
Angel Pons9d20c842021-01-13 12:39:37 +0100524 { /* Config 2: 1R2R */
Arthur Heymansb5170c32017-12-25 20:13:28 +0100525 {0x11, 0x00, 0x91, 0x00},
526 {0x00, 0x00, 0x00, 0x00},
527 {0x91, 0x00, 0x11, 0x00},
528 {0x91, 0x00, 0x00, 0x11}
529 },
Angel Pons9d20c842021-01-13 12:39:37 +0100530 { /* Config 3: 1R1R */
Arthur Heymansb5170c32017-12-25 20:13:28 +0100531 {0x11, 0x00, 0x91, 0x00},
532 {0x00, 0x00, 0x00, 0x00},
533 {0x91, 0x00, 0x11, 0x00},
534 {0x00, 0x00, 0x00, 0x00}
535 },
Angel Pons9d20c842021-01-13 12:39:37 +0100536 { /* Config 4: 2R0R */
Arthur Heymansb5170c32017-12-25 20:13:28 +0100537 {0x11, 0x00, 0x00, 0x00},
538 {0x00, 0x11, 0x00, 0x00},
539 {0x00, 0x00, 0x00, 0x00},
540 {0x00, 0x00, 0x00, 0x00}
541 },
Angel Pons9d20c842021-01-13 12:39:37 +0100542 { /* Config 5: 0R2R */
Arthur Heymansb5170c32017-12-25 20:13:28 +0100543 {0x00, 0x00, 0x00, 0x00},
544 {0x00, 0x00, 0x00, 0x00},
545 {0x00, 0x00, 0x11, 0x00},
546 {0x00, 0x00, 0x00, 0x11}
547 },
Angel Pons9d20c842021-01-13 12:39:37 +0100548 { /* Config 6: 1R0R */
Arthur Heymansb5170c32017-12-25 20:13:28 +0100549 {0x11, 0x00, 0x00, 0x00},
550 {0x00, 0x00, 0x00, 0x00},
551 {0x00, 0x00, 0x00, 0x00},
552 {0x00, 0x00, 0x00, 0x00}
553 },
Angel Pons9d20c842021-01-13 12:39:37 +0100554 { /* Config 7: 0R1R */
Arthur Heymansb5170c32017-12-25 20:13:28 +0100555 {0x00, 0x00, 0x00, 0x00},
556 {0x00, 0x00, 0x00, 0x00},
557 {0x00, 0x00, 0x11, 0x00},
558 {0x00, 0x00, 0x00, 0x00}
559 }
560 };
561
562 if (wl_enable) {
563 printk(RAM_DEBUG, "Entering WL mode\n");
564 printk(RAM_DEBUG, "Using WL ODT values\n");
565 emrs1 = emrs1_lut[config][target_rank][config_rank];
566 } else {
567 printk(RAM_DEBUG, "Exiting WL mode\n");
568 emrs1 = ddr3_emrs1_rtt_nom_config[s->dimm_config[channel]][config_rank];
569 }
570 printk(RAM_DEBUG, "Setting ODT for rank%d to ", config_rank);
571 switch (emrs1) {
572 case 0:
573 printk(RAM_DEBUG, "High-Z\n");
574 break;
575 case 0x11:
576 printk(RAM_DEBUG, "40 Ohm\n");
577 break;
578 case 0x81:
579 printk(RAM_DEBUG, "30 Ohm\n");
580 break;
581 case 0x80:
582 printk(RAM_DEBUG, "20 Ohm\n");
583 break;
584 case 0x10:
585 printk(RAM_DEBUG, "120 Ohm\n");
586 break;
587 case 0x01:
588 printk(RAM_DEBUG, "60 Ohm\n");
589 break;
590 default:
591 printk(BIOS_WARNING, "ODT value Undefined!\n");
592 break;
593 }
594
595 emrs1 <<= 2;
596 /* Set output drive strength to 34 Ohm during write levelling */
597 emrs1 |= (1 << 1);
598
599 if (wl_enable && (target_rank != config_rank)) {
600 printk(RAM_DEBUG, "Disabling output for rank%d\n", config_rank);
601 emrs1 |= (1 << 12);
602 }
603 if (wl_enable && (target_rank == config_rank)) {
604 printk(RAM_DEBUG, "Enabling WL for rank%d\n", config_rank);
605 emrs1 |= (1 << 7);
606 }
607 send_jedec_cmd(s, config_rank, channel, EMRS1_CMD, emrs1);
608}
609
610#define N_SAMPLES 5
611
612static void sample_dq(const struct sysinfo *s, u8 channel, u8 rank,
613 u8 high_found[8]) {
614 u32 address = test_address(channel, rank);
615 int samples, lane;
616
617 memset(high_found, 0, TOTAL_BYTELANES * sizeof(high_found[0]));
618 for (samples = 0; samples < N_SAMPLES; samples++) {
619 write32((u32 *)address, 0x12341234);
620 write32((u32 *)address + 4, 0x12341234);
621 udelay(5);
622 FOR_EACH_BYTELANE(lane) {
623 u8 dq_high = (MCHBAR8(0x561 + 0x400 * channel
624 + (lane * 4)) >> 7) & 1;
625 high_found[lane] += dq_high;
626 }
627 }
628}
629
630static enum cb_err increment_to_dqs_edge(struct sysinfo *s, u8 channel, u8 rank)
631{
632 int lane;
633 u8 saved_24d;
634 struct dll_setting dqs_setting[TOTAL_BYTELANES];
635 u8 bytelane_ok = 0;
636 u8 dq_sample[TOTAL_BYTELANES];
637
638 memcpy(dqs_setting, s->dqs_settings[channel], sizeof(dqs_setting));
639 FOR_EACH_BYTELANE(lane)
640 dqsset(channel, lane, &dqs_setting[lane]);
641
642 saved_24d = MCHBAR8(0x24d + 0x400 * channel);
643
644 /* Loop 0: Find DQ sample low, by decreasing */
645 while (bytelane_ok != 0xff) {
646 sample_dq(s, channel, rank, dq_sample);
647 FOR_EACH_BYTELANE(lane) {
648 if (bytelane_ok & (1 << lane))
649 continue;
650
Angel Ponsdd7ce4e2021-03-26 23:21:02 +0100651 printk(RAM_SPEW, "%d, %d, %02d, %d, lane%d sample: %d\n",
Arthur Heymansb5170c32017-12-25 20:13:28 +0100652 dqs_setting[lane].coarse,
653 dqs_setting[lane].clk_delay,
654 dqs_setting[lane].tap,
655 dqs_setting[lane].pi,
656 lane,
657 dq_sample[lane]);
658
659 if (dq_sample[lane] > 0) {
660 if (decrement_dq_dqs(s, &dqs_setting[lane])) {
661 printk(BIOS_EMERG,
662 "DQS setting channel%d, "
663 "lane %d reached a minimum!\n",
664 channel, lane);
665 return CB_ERR;
666 }
667 } else {
668 bytelane_ok |= (1 << lane);
669 }
670 dqsset(channel, lane, &dqs_setting[lane]);
671 }
672 }
673
674 printk(RAM_DEBUG, "DQS settings on PASS #0:\n");
675 FOR_EACH_BYTELANE(lane) {
676 printk(RAM_DEBUG, "lane %d: ", lane);
677 print_dll_setting(&dqs_setting[lane], 0);
678 }
679
680 /* Loop 1: Find DQ sample high, by increasing */
681 bytelane_ok = 0;
682 while (bytelane_ok != 0xff) {
683 sample_dq(s, channel, rank, dq_sample);
684 FOR_EACH_BYTELANE(lane) {
685 if (bytelane_ok & (1 << lane))
686 continue;
687
688 printk(RAM_SPEW, "%d, %d, %02d, %d, lane%d sample: %d\n",
689 dqs_setting[lane].coarse,
690 dqs_setting[lane].clk_delay,
691 dqs_setting[lane].tap,
692 dqs_setting[lane].pi,
693 lane,
694 dq_sample[lane]);
695
696 if (dq_sample[lane] == N_SAMPLES) {
697 bytelane_ok |= (1 << lane);
698 } else {
699 if (increment_dq_dqs(s, &dqs_setting[lane])) {
700 printk(BIOS_EMERG,
701 "DQS setting channel%d, "
702 "lane %d reached a maximum!\n",
703 channel, lane);
704 return CB_ERR;
705 }
706 }
707 dqsset(channel, lane, &dqs_setting[lane]);
708 }
709 }
710
711 printk(RAM_DEBUG, "DQS settings on PASS #1:\n");
712 FOR_EACH_BYTELANE(lane) {
713 printk(RAM_DEBUG, "lane %d: ", lane);
714 print_dll_setting(&dqs_setting[lane], 0);
715 }
716
717 printk(BIOS_DEBUG, "final WL DQS settings on CH%d\n", channel);
718 FOR_EACH_BYTELANE(lane) {
719 printk(BIOS_DEBUG, "\tlane%d: ", lane);
720 print_dll_setting(&dqs_setting[lane], 1);
721 s->dqs_settings[channel][lane] = dqs_setting[lane];
722 }
723
724 MCHBAR8(0x24d + 0x400 * channel) = saved_24d;
725 return CB_SUCCESS;
726}
727
728/*
729 * DDR3 uses flyby topology where the clock signal takes a different path
730 * than the data signal, to allow for better signal intergrity.
731 * Therefore the delay on the data signals needs to account for this.
Angel Pons9e58afe2021-01-13 18:23:41 +0100732 * This is done by sampling the DQS write (tx) signal back over the DQ
733 * signal and looking for delay values where the sample transitions
Arthur Heymansb5170c32017-12-25 20:13:28 +0100734 * from high to low.
735 * Here the following is done:
Angel Pons9e58afe2021-01-13 18:23:41 +0100736 * - Enable write levelling on the first populated rank.
737 * - Disable output on other populated ranks.
738 * - Start from safe DQS (tx) delays. Other transitions can be
739 * found at different starting values but are generally bad.
Arthur Heymansb5170c32017-12-25 20:13:28 +0100740 * - loop0: decrease DQS (tx) delays until low is sampled,
741 * loop1: increase DQS (tx) delays until high is sampled,
Angel Pons9e58afe2021-01-13 18:23:41 +0100742 * This way, we are sure to have hit a low-high transition.
743 * - Put all ranks in normal mode of operation again.
744 * Note: All ranks need to be leveled together.
Arthur Heymansb5170c32017-12-25 20:13:28 +0100745 */
746void search_write_leveling(struct sysinfo *s)
747{
748 int i, ch, count;
749 u8 config, rank0, rank1, lane;
750 struct dll_setting dq_setting;
751
Angel Ponsa6daff12021-01-13 18:27:31 +0100752 const u8 chanconfig_lut[16] = {0, 6, 4, 6, 7, 3, 1, 3, 5, 2, 0, 2, 7, 3, 1, 3};
Arthur Heymansb5170c32017-12-25 20:13:28 +0100753
Angel Ponsa6daff12021-01-13 18:27:31 +0100754 const u8 odt_force[8][4] = { /* [Config][leveling rank] */
Arthur Heymansb5170c32017-12-25 20:13:28 +0100755 {0x5, 0x6, 0x5, 0x9},
756 {0x5, 0x6, 0x5, 0x0},
757 {0x5, 0x0, 0x5, 0x9},
758 {0x5, 0x0, 0x5, 0x0},
759 {0x1, 0x2, 0x0, 0x0},
760 {0x0, 0x0, 0x4, 0x8},
761 {0x1, 0x0, 0x0, 0x0},
762 {0x0, 0x0, 0x4, 0x0}
763 };
764
765 printk(BIOS_DEBUG, "Starting write levelling.\n");
766
767 FOR_EACH_POPULATED_CHANNEL(s->dimms, ch) {
768 printk(BIOS_DEBUG, "\tCH%d\n", ch);
769 config = chanconfig_lut[s->dimm_config[ch]];
770
Angel Ponsdd7ce4e2021-03-26 23:21:02 +0100771 MCHBAR8(0x5d8 + 0x400 * ch) = MCHBAR8(0x5d8 + 0x400 * ch) & ~0x0e;
Arthur Heymansb5170c32017-12-25 20:13:28 +0100772 MCHBAR16(0x5c4 + 0x400 * ch) = (MCHBAR16(0x5c4 + 0x400 * ch) &
773 ~0x3fff) | 0x3fff;
774 MCHBAR8(0x265 + 0x400 * ch) =
775 MCHBAR8(0x265 + 0x400 * ch) & ~0x1f;
776 /* find the first populated rank */
777 FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, ch, rank0)
778 break;
779
780 /* Enable WL for the first populated rank and disable output
781 for others */
782 FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, ch, rank1)
783 set_rank_write_level(s, ch, config, rank1, rank0, 1);
784
785 MCHBAR8(0x298 + 2 + 0x400 * ch) =
786 (MCHBAR8(0x298 + 2 + 0x400 * ch) & ~0x0f)
787 | odt_force[config][rank0];
Angel Ponsdd7ce4e2021-03-26 23:21:02 +0100788 MCHBAR8(0x271 + 0x400 * ch) = (MCHBAR8(0x271 + 0x400 * ch) & ~0x7e) | 0x4e;
789 MCHBAR8(0x5d9 + 0x400 * ch) = (MCHBAR8(0x5d9 + 0x400 * ch) & ~0x04) | 0x04;
790 MCHBAR32(0x1a0) = (MCHBAR32(0x1a0) & ~0x07ffffff) | 0x00014000;
Arthur Heymansb5170c32017-12-25 20:13:28 +0100791
792 if (increment_to_dqs_edge(s, ch, rank0))
793 die("Write Leveling failed!");
794
795 MCHBAR8(0x298 + 2 + 0x400 * ch) =
796 MCHBAR8(0x298 + 2 + 0x400 * ch) & ~0x0f;
797 MCHBAR8(0x271 + 0x400 * ch) =
798 (MCHBAR8(0x271 + 0x400 * ch) & ~0x7e)
799 | 0x0e;
800 MCHBAR8(0x5d9 + 0x400 * ch) =
801 (MCHBAR8(0x5d9 + 0x400 * ch) & ~0x04);
802 MCHBAR32(0x1a0) = (MCHBAR32(0x1a0)
803 & ~0x07ffffff) | 0x00555801;
804
805 /* Disable WL on the trained rank */
806 set_rank_write_level(s, ch, config, rank0, rank0, 0);
807 send_jedec_cmd(s, rank0, ch, NORMALOP_CMD, 1 << 12);
808
Angel Ponsdd7ce4e2021-03-26 23:21:02 +0100809 MCHBAR8(0x5d8 + 0x400 * ch) = (MCHBAR8(0x5d8 + 0x400 * ch) & ~0x0e) | 0x0e;
Arthur Heymansb5170c32017-12-25 20:13:28 +0100810 MCHBAR16(0x5c4 + 0x400 * ch) = (MCHBAR16(0x5c4 + 0x400 * ch)
811 & ~0x3fff) | 0x1807;
812 MCHBAR8(0x265 + 0x400 * ch) = MCHBAR8(0x265 + 0x400 * ch) & ~0x1f;
813
814 /* Disable write level mode for all ranks */
815 FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, ch, rank0)
816 set_rank_write_level(s, ch, config, rank0, rank0, 0);
817 }
818
819 MCHBAR8(0x5dc) = (MCHBAR8(0x5dc) & ~0x80) | 0x80;
820
821 /* Increment DQ (rx) dll setting by a standard amount past DQS,
822 This is further trained in write training. */
823 switch (s->selected_timings.mem_clk) {
824 default:
825 case MEM_CLOCK_800MHz:
826 count = 39;
827 break;
828 case MEM_CLOCK_1066MHz:
829 count = 32;
830 break;
831 case MEM_CLOCK_1333MHz:
832 count = 42;
833 break;
834 }
835
836 FOR_EACH_POPULATED_CHANNEL_AND_BYTELANE(s->dimms, ch, lane) {
837 dq_setting = s->dqs_settings[ch][lane];
838 for (i = 0; i < count; i++)
839 if (increment_dq_dqs(s, &dq_setting))
840 die("Can't further increase DQ past DQS delay");
841 dqset(ch, lane, &dq_setting);
842 }
843
844 printk(BIOS_DEBUG, "Done write levelling.\n");
845}