blob: e4cc888875d26c928f4156d5d3bd3aa9fb7e8c94 [file] [log] [blame]
Patrick Georgiac959032020-05-05 22:49:26 +02001/* SPDX-License-Identifier: GPL-2.0-or-later */
Arthur Heymans95c48cb2017-11-04 08:07:06 +01002
Kyösti Mälkki13f66502019-03-03 08:01:05 +02003#include <device/mmio.h>
Arthur Heymans95c48cb2017-11-04 08:07:06 +01004#include <console/console.h>
Arthur Heymansb5170c32017-12-25 20:13:28 +01005#include <delay.h>
Arthur Heymans2aeb2a12021-07-02 10:05:09 +02006#include <stdint.h>
Arthur Heymans95c48cb2017-11-04 08:07:06 +01007#include <string.h>
8#include <types.h>
Angel Pons41e66ac2020-09-15 13:17:23 +02009#include "raminit.h"
Arthur Heymans95c48cb2017-11-04 08:07:06 +010010#include "x4x.h"
Arthur Heymans95c48cb2017-11-04 08:07:06 +010011
Angel Ponsdd7ce4e2021-03-26 23:21:02 +010012static void print_dll_setting(const struct dll_setting *dll_setting, u8 default_verbose)
Arthur Heymans95c48cb2017-11-04 08:07:06 +010013{
14 u8 debug_level = default_verbose ? BIOS_DEBUG : RAM_DEBUG;
15
16 printk(debug_level, "%d.%d.%d.%d:%d.%d\n", dll_setting->coarse,
17 dll_setting->clk_delay, dll_setting->tap,
18 dll_setting->pi, dll_setting->db_en,
19 dll_setting->db_sel);
20}
21
22struct db_limit {
23 u8 tap0;
24 u8 tap1;
25 u8 pi0;
26 u8 pi1;
27};
28
29static void set_db(const struct sysinfo *s, struct dll_setting *dq_dqs_setting)
30{
Elyes HAOUAS88607a42018-10-05 10:36:45 +020031 struct db_limit limit;
Arthur Heymans95c48cb2017-11-04 08:07:06 +010032
33 switch (s->selected_timings.mem_clk) {
34 default:
35 case MEM_CLOCK_800MHz:
36 limit.tap0 = 3;
37 limit.tap1 = 10;
38 limit.pi0 = 2;
39 limit.pi1 = 3;
40 break;
41 case MEM_CLOCK_1066MHz:
42 limit.tap0 = 2;
43 limit.tap1 = 8;
44 limit.pi0 = 6;
45 limit.pi1 = 7;
46 break;
47 case MEM_CLOCK_1333MHz:
48 limit.tap0 = 3;
49 limit.tap1 = 11;
50 /* TO CHECK: Might be reverse since this makes little sense */
51 limit.pi0 = 6;
52 limit.pi1 = 4;
53 break;
54 }
55
56 if (dq_dqs_setting->tap < limit.tap0) {
57 dq_dqs_setting->db_en = 1;
58 dq_dqs_setting->db_sel = 1;
59 } else if ((dq_dqs_setting->tap == limit.tap0)
60 && (dq_dqs_setting->pi < limit.pi0)) {
61 dq_dqs_setting->db_en = 1;
62 dq_dqs_setting->db_sel = 1;
63 } else if (dq_dqs_setting->tap < limit.tap1) {
64 dq_dqs_setting->db_en = 0;
65 dq_dqs_setting->db_sel = 0;
66 } else if ((dq_dqs_setting->tap == limit.tap1)
67 && (dq_dqs_setting->pi < limit.pi1)) {
68 dq_dqs_setting->db_en = 0;
69 dq_dqs_setting->db_sel = 0;
70 } else {
71 dq_dqs_setting->db_en = 1;
72 dq_dqs_setting->db_sel = 0;
73 }
74}
75
Elyes HAOUAS68ec3eb2019-06-22 09:21:18 +020076static const u8 max_tap[3] = {12, 10, 13};
Arthur Heymans95c48cb2017-11-04 08:07:06 +010077
Angel Ponsdd7ce4e2021-03-26 23:21:02 +010078static int increment_dq_dqs(const struct sysinfo *s, struct dll_setting *dq_dqs_setting)
Arthur Heymans95c48cb2017-11-04 08:07:06 +010079{
Angel Ponsdd7ce4e2021-03-26 23:21:02 +010080 u8 max_tap_val = max_tap[s->selected_timings.mem_clk - MEM_CLOCK_800MHz];
Arthur Heymans95c48cb2017-11-04 08:07:06 +010081
82 if (dq_dqs_setting->pi < 6) {
83 dq_dqs_setting->pi += 1;
84 } else if (dq_dqs_setting->tap < max_tap_val) {
85 dq_dqs_setting->pi = 0;
86 dq_dqs_setting->tap += 1;
87 } else if (dq_dqs_setting->clk_delay < 2) {
88 dq_dqs_setting->pi = 0;
89 dq_dqs_setting->tap = 0;
90 dq_dqs_setting->clk_delay += 1;
91 } else if (dq_dqs_setting->coarse < 1) {
92 dq_dqs_setting->pi = 0;
93 dq_dqs_setting->tap = 0;
94 dq_dqs_setting->clk_delay -= 1;
95 dq_dqs_setting->coarse += 1;
96 } else {
97 return CB_ERR;
98 }
99 set_db(s, dq_dqs_setting);
100 return CB_SUCCESS;
101}
102
Angel Ponsdd7ce4e2021-03-26 23:21:02 +0100103static int decrement_dq_dqs(const struct sysinfo *s, struct dll_setting *dq_dqs_setting)
Arthur Heymansb5170c32017-12-25 20:13:28 +0100104{
Angel Ponsdd7ce4e2021-03-26 23:21:02 +0100105 u8 max_tap_val = max_tap[s->selected_timings.mem_clk - MEM_CLOCK_800MHz];
Arthur Heymansb5170c32017-12-25 20:13:28 +0100106
107 if (dq_dqs_setting->pi > 0) {
108 dq_dqs_setting->pi -= 1;
109 } else if (dq_dqs_setting->tap > 0) {
110 dq_dqs_setting->pi = 6;
111 dq_dqs_setting->tap -= 1;
112 } else if (dq_dqs_setting->clk_delay > 0) {
113 dq_dqs_setting->pi = 6;
114 dq_dqs_setting->tap = max_tap_val;
115 dq_dqs_setting->clk_delay -= 1;
116 } else if (dq_dqs_setting->coarse > 0) {
117 dq_dqs_setting->pi = 6;
118 dq_dqs_setting->tap = max_tap_val;
119 dq_dqs_setting->clk_delay += 1;
120 dq_dqs_setting->coarse -= 1;
121 } else {
122 return CB_ERR;
123 }
124 set_db(s, dq_dqs_setting);
125 return CB_SUCCESS;
126}
127
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100128#define WT_PATTERN_SIZE 80
129
130static const u32 write_training_schedule[WT_PATTERN_SIZE] = {
131 0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
132 0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
133 0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
134 0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
135 0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
136 0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
137 0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
138 0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
139 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
140 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
141 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
142 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
143 0x03030303, 0x04040404, 0x09090909, 0x10101010,
144 0x21212121, 0x40404040, 0x81818181, 0x00000000,
145 0x03030303, 0x04040404, 0x09090909, 0x10101010,
146 0x21212121, 0x40404040, 0x81818181, 0x00000000,
147 0xfdfdfdfd, 0xfafafafa, 0xf7f7f7f7, 0xeeeeeeee,
148 0xdfdfdfdf, 0xbebebebe, 0x7f7f7f7f, 0xfefefefe,
149 0xfdfdfdfd, 0xfafafafa, 0xf7f7f7f7, 0xeeeeeeee,
150 0xdfdfdfdf, 0xbebebebe, 0x7f7f7f7f, 0xfefefefe,
151};
152
153enum training_modes {
154 SUCCEEDING = 0,
155 FAILING = 1
156};
157
Angel Ponsdd7ce4e2021-03-26 23:21:02 +0100158static u8 test_dq_aligned(const struct sysinfo *s, const u8 channel)
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100159{
160 u32 address;
161 int rank, lane;
162 u8 count, count1;
163 u8 data[8];
164 u8 lane_error = 0;
165
166 FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, channel, rank) {
167 address = test_address(channel, rank);
168 for (count = 0; count < WT_PATTERN_SIZE; count++) {
169 for (count1 = 0; count1 < WT_PATTERN_SIZE; count1++) {
170 if ((count1 % 16) == 0)
Angel Ponsa5146f32021-03-27 09:35:57 +0100171 mchbar_write32(0xf90, 1);
Angel Ponsdd7ce4e2021-03-26 23:21:02 +0100172 const u32 pattern = write_training_schedule[count1];
Arthur Heymans4d06ff02021-07-02 10:05:09 +0200173 write32p(address + 8 * count1, pattern);
174 write32p(address + 8 * count1 + 4, pattern);
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100175 }
176
177 const u32 good = write_training_schedule[count];
Arthur Heymans4d06ff02021-07-02 10:05:09 +0200178 write32(&data[0], read32p(address + 8 * count));
179 write32(&data[4], read32p(address + 8 * count + 4));
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100180 FOR_EACH_BYTELANE(lane) {
181 u8 expected = (good >> ((lane % 4) * 8)) & 0xff;
182 if (data[lane] != expected)
183 lane_error |= 1 << lane;
184 }
185 }
186 }
187 return lane_error;
188}
189
190#define CONSISTENCY 10
191
192/*
193 * This function finds either failing or succeeding writes by increasing DQ.
194 * When it has found a failing or succeeding setting it will increase DQ
195 * another 10 times to make sure the result is consistent.
196 * This is probably done because lanes cannot be trained independent from
197 * each other.
198 */
199static int find_dq_limit(const struct sysinfo *s, const u8 channel,
200 struct dll_setting dq_setting[TOTAL_BYTELANES],
201 u8 dq_lim[TOTAL_BYTELANES],
202 const enum training_modes expected_result)
203{
204 int status = CB_SUCCESS;
205 int lane;
206 u8 test_result;
207 u8 pass_count[TOTAL_BYTELANES];
Angel Pons5c3160e2021-03-26 22:48:42 +0100208 u8 success_mask = 0xff;
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100209
210 printk(RAM_DEBUG, "Looking for %s writes on channel %d\n",
211 expected_result == FAILING ? "failing" : "succeeding", channel);
212 memset(pass_count, 0, sizeof(pass_count));
213
Angel Pons5c3160e2021-03-26 22:48:42 +0100214 while (success_mask) {
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100215 test_result = test_dq_aligned(s, channel);
216 FOR_EACH_BYTELANE(lane) {
217 if (((test_result >> lane) & 1) != expected_result) {
218 status = increment_dq_dqs(s, &dq_setting[lane]);
219 dqset(channel, lane, &dq_setting[lane]);
220 dq_lim[lane]++;
221 } else if (pass_count[lane] < CONSISTENCY) {
222 status = increment_dq_dqs(s, &dq_setting[lane]);
223 dqset(channel, lane, &dq_setting[lane]);
224 dq_lim[lane]++;
225 pass_count[lane]++;
226 } else if (pass_count[lane] == CONSISTENCY) {
Angel Pons5c3160e2021-03-26 22:48:42 +0100227 success_mask &= ~(1 << lane);
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100228 }
229 if (status == CB_ERR) {
Angel Ponsdd7ce4e2021-03-26 23:21:02 +0100230 printk(BIOS_CRIT,
231 "Could not find a case of %s writes on CH%d, lane %d\n",
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100232 expected_result == FAILING ? "failing"
233 : "succeeding", channel, lane);
234 return CB_ERR;
235 }
236 }
237 }
238 return CB_SUCCESS;
239}
240
241/*
242 * This attempts to find the ideal delay for DQ to account for the skew between
243 * the DQ and the DQS signal.
244 * The training works this way:
245 * - start from the DQS delay values (DQ is always later than DQS)
246 * - increment the DQ delay until a succeeding write is found on all bytelayes,
247 * on all ranks on a channel and save these values
248 * - again increment the DQ delay until write start to fail on all bytelanes and
249 * save that value
250 * - use the mean between the saved succeeding and failing value
251 * - note: bytelanes cannot be trained independently, so the delays need to be
252 * adjusted and tested for all of them at the same time
253 */
254int do_write_training(struct sysinfo *s)
255{
256 int i;
257 u8 channel, lane;
258 u8 dq_lower[TOTAL_BYTELANES];
259 u8 dq_upper[TOTAL_BYTELANES];
260 struct dll_setting dq_setting[TOTAL_BYTELANES];
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100261
262 printk(BIOS_DEBUG, "Starting DQ write training\n");
263
264 FOR_EACH_POPULATED_CHANNEL(s->dimms, channel) {
265 printk(BIOS_DEBUG, "Doing DQ write training on CH%d\n", channel);
266
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100267 /* Start all lanes at DQS values */
268 FOR_EACH_BYTELANE(lane) {
269 dqset(channel, lane, &s->dqs_settings[channel][lane]);
270 s->dq_settings[channel][lane] = s->dqs_settings[channel][lane];
271 }
272 memset(dq_lower, 0, sizeof(dq_lower));
Angel Pons9d20c842021-01-13 12:39:37 +0100273 /* Start from DQS settings */
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100274 memcpy(dq_setting, s->dqs_settings[channel], sizeof(dq_setting));
275
Angel Ponsdd7ce4e2021-03-26 23:21:02 +0100276 if (find_dq_limit(s, channel, dq_setting, dq_lower, SUCCEEDING)) {
277 printk(BIOS_CRIT, "Could not find working lower limit DQ setting\n");
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100278 return CB_ERR;
279 }
280
281 memcpy(dq_upper, dq_lower, sizeof(dq_lower));
282
Angel Ponsdd7ce4e2021-03-26 23:21:02 +0100283 if (find_dq_limit(s, channel, dq_setting, dq_upper, FAILING)) {
284 printk(BIOS_WARNING, "Could not find failing upper limit DQ setting\n");
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100285 return CB_ERR;
286 }
287
288 FOR_EACH_BYTELANE(lane) {
289 dq_lower[lane] -= CONSISTENCY - 1;
290 dq_upper[lane] -= CONSISTENCY - 1;
291 u8 dq_center = (dq_upper[lane] + dq_lower[lane]) / 2;
292
Angel Ponsdd7ce4e2021-03-26 23:21:02 +0100293 printk(RAM_DEBUG,
294 "Centered value for DQ DLL: ch%d, lane %d, #steps = %d\n",
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100295 channel, lane, dq_center);
296 for (i = 0; i < dq_center; i++) {
297 /* Should never happen */
298 if (increment_dq_dqs(s, &s->dq_settings[channel][lane])
299 == CB_ERR)
300 printk(BIOS_ERR,
301 "Huh? write training overflowed!!\n");
302 }
303 }
304
305 /* Reset DQ DLL settings and increment with centered value*/
306 printk(BIOS_DEBUG, "Final DQ timings on CH%d\n", channel);
307 FOR_EACH_BYTELANE(lane) {
308 printk(BIOS_DEBUG, "\tlane%d: ", lane);
309 print_dll_setting(&s->dq_settings[channel][lane], 1);
310 dqset(channel, lane, &s->dq_settings[channel][lane]);
311 }
312 }
313 printk(BIOS_DEBUG, "Done DQ write training\n");
314 return CB_SUCCESS;
315}
316
317#define RT_PATTERN_SIZE 40
318
319static const u32 read_training_schedule[RT_PATTERN_SIZE] = {
320 0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
321 0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
322 0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
323 0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
324 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
325 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
326 0x03030303, 0x04040404, 0x09090909, 0x10101010,
327 0x21212121, 0x40404040, 0x81818181, 0x00000000,
328 0xfdfdfdfd, 0xfafafafa, 0xf7f7f7f7, 0xeeeeeeee,
329 0xdfdfdfdf, 0xbebebebe, 0x7f7f7f7f, 0xfefefefe
330};
331
332static int rt_increment_dqs(struct rt_dqs_setting *setting)
333{
334 if (setting->pi < 7) {
335 setting->pi++;
336 } else if (setting->tap < 14) {
337 setting->pi = 0;
338 setting->tap++;
339 } else {
340 return CB_ERR;
341 }
342 return CB_SUCCESS;
343}
344
345static u8 test_dqs_aligned(const struct sysinfo *s, const u8 channel)
346{
347 int i, rank, lane;
348 volatile u8 data[8];
349 u32 address;
350 u8 bytelane_error = 0;
351
352 FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, channel, rank) {
353 address = test_address(channel, rank);
354 for (i = 0; i < RT_PATTERN_SIZE; i++) {
355 const u32 good = read_training_schedule[i];
Arthur Heymans4d06ff02021-07-02 10:05:09 +0200356 write32(&data[0], read32p(address + i * 8));
357 write32(&data[4], read32p(address + i * 8 + 4));
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100358
359 FOR_EACH_BYTELANE(lane) {
360 if (data[lane] != (good & 0xff))
361 bytelane_error |= 1 << lane;
362 }
363 }
364 }
365 return bytelane_error;
366}
367
368static int rt_find_dqs_limit(struct sysinfo *s, u8 channel,
369 struct rt_dqs_setting dqs_setting[TOTAL_BYTELANES],
370 u8 dqs_lim[TOTAL_BYTELANES],
371 const enum training_modes expected_result)
372{
373 int lane;
374 u8 test_result;
375 int status = CB_SUCCESS;
376
377 FOR_EACH_BYTELANE(lane)
378 rt_set_dqs(channel, lane, 0, &dqs_setting[lane]);
379
Elyes HAOUAS6aa9d662020-08-04 13:20:13 +0200380 while (status == CB_SUCCESS) {
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100381 test_result = test_dqs_aligned(s, channel);
382 if (test_result == (expected_result == SUCCEEDING ? 0 : 0xff))
383 return CB_SUCCESS;
384 FOR_EACH_BYTELANE(lane) {
385 if (((test_result >> lane) & 1) != expected_result) {
386 status = rt_increment_dqs(&dqs_setting[lane]);
387 dqs_lim[lane]++;
388 rt_set_dqs(channel, lane, 0, &dqs_setting[lane]);
389 }
390 }
391 }
392
393 if (expected_result == SUCCEEDING) {
Angel Ponsdd7ce4e2021-03-26 23:21:02 +0100394 printk(BIOS_CRIT, "Could not find RT DQS setting\n");
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100395 return CB_ERR;
396 } else {
Angel Ponsdd7ce4e2021-03-26 23:21:02 +0100397 printk(RAM_DEBUG, "Read succeeded over all DQS settings, continuing\n");
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100398 return CB_SUCCESS;
399 }
400}
401
402#define RT_LOOPS 3
403
404/*
405 * This attempts to find the ideal delay for DQS on reads (rx).
406 * The training works this way:
407 * - start from the lowest possible delay (0) on all bytelanes
408 * - increment the DQS rx delays until a succeeding write is found on all
409 * bytelayes, on all ranks on a channel and save these values
410 * - again increment the DQS rx delay until write start to fail on all bytelanes
411 * and save that value
412 * - use the mean between the saved succeeding and failing value
413 * - note0: bytelanes cannot be trained independently, so the delays need to be
414 * adjusted and tested for all of them at the same time
Arthur Heymans8ddd7d12018-09-11 22:26:13 +0200415 * - note1: At this stage all ranks effectively use the rank0's rt_dqs settings,
416 * but later on their respective settings are used (TODO where is the
417 * 'switch' register??). So programming the results for all ranks at the end
418 * of the training. Programming on all ranks instead of all populated ranks,
419 * seems to be required, most likely because the signals can't really be generated
420 * separately.
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100421 */
422int do_read_training(struct sysinfo *s)
423{
424 int loop, channel, i, lane, rank;
425 u32 address, content;
426 u8 dqs_lower[TOTAL_BYTELANES];
427 u8 dqs_upper[TOTAL_BYTELANES];
428 struct rt_dqs_setting dqs_setting[TOTAL_BYTELANES];
429 u16 saved_dqs_center[TOTAL_CHANNELS][TOTAL_BYTELANES];
430
Elyes HAOUASa342f392018-10-17 10:56:26 +0200431 memset(saved_dqs_center, 0, sizeof(saved_dqs_center));
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100432
433 printk(BIOS_DEBUG, "Starting DQS read training\n");
434
435 for (loop = 0; loop < RT_LOOPS; loop++) {
436 FOR_EACH_POPULATED_CHANNEL(s->dimms, channel) {
437 printk(RAM_DEBUG, "Doing DQS read training on CH%d\n",
438 channel);
439
440 /* Write pattern to strobe address */
441 FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, channel, rank) {
442 address = test_address(channel, rank);
443 for (i = 0; i < RT_PATTERN_SIZE; i++) {
444 content = read_training_schedule[i];
Arthur Heymans4d06ff02021-07-02 10:05:09 +0200445 write32p(address + 8 * i, content);
446 write32p(address + 8 * i + 4, content);
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100447 }
448 }
449
450 memset(dqs_lower, 0, sizeof(dqs_lower));
451 memset(&dqs_setting, 0, sizeof(dqs_setting));
452 if (rt_find_dqs_limit(s, channel, dqs_setting, dqs_lower,
453 SUCCEEDING)) {
454 printk(BIOS_CRIT,
455 "Could not find working lower limit DQS setting\n");
456 return CB_ERR;
457 }
458
459 FOR_EACH_BYTELANE(lane)
460 dqs_upper[lane] = dqs_lower[lane];
461
462 if (rt_find_dqs_limit(s, channel, dqs_setting, dqs_upper,
463 FAILING)) {
464 printk(BIOS_CRIT,
465 "Could not find failing upper limit DQ setting\n");
466 return CB_ERR;
467 }
468
469 printk(RAM_DEBUG, "Centered values, loop %d:\n", loop);
470 FOR_EACH_BYTELANE(lane) {
471 u8 center = (dqs_lower[lane] + dqs_upper[lane]) / 2;
472 printk(RAM_DEBUG, "\t lane%d: #%d\n", lane, center);
473 saved_dqs_center[channel][lane] += center;
474 }
475 } /* END FOR_EACH_POPULATED_CHANNEL */
476 } /* end RT_LOOPS */
477
478 memset(s->rt_dqs, 0, sizeof(s->rt_dqs));
479
480 FOR_EACH_POPULATED_CHANNEL(s->dimms, channel) {
481 printk(BIOS_DEBUG, "Final timings on CH%d:\n", channel);
482 FOR_EACH_BYTELANE(lane) {
483 saved_dqs_center[channel][lane] /= RT_LOOPS;
484 while (saved_dqs_center[channel][lane]--) {
Elyes HAOUAS5ba154a2020-08-04 13:27:52 +0200485 if (rt_increment_dqs(&s->rt_dqs[channel][lane])
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100486 == CB_ERR)
487 /* Should never happen */
488 printk(BIOS_ERR,
489 "Huh? read training overflowed!!\n");
490 }
Arthur Heymans8ddd7d12018-09-11 22:26:13 +0200491 /* Later on separate settings for each rank are used so program
492 all of them */
493 FOR_EACH_RANK_IN_CHANNEL(rank)
Arthur Heymans95c48cb2017-11-04 08:07:06 +0100494 rt_set_dqs(channel, lane, rank,
495 &s->rt_dqs[channel][lane]);
496 printk(BIOS_DEBUG, "\tlane%d: %d.%d\n",
497 lane, s->rt_dqs[channel][lane].tap,
498 s->rt_dqs[channel][lane].pi);
499 }
500 }
501 printk(BIOS_DEBUG, "Done DQS read training\n");
502 return CB_SUCCESS;
503}
Arthur Heymansb5170c32017-12-25 20:13:28 +0100504
505/* Enable write leveling on selected rank and disable output on other ranks */
506static void set_rank_write_level(struct sysinfo *s, u8 channel, u8 config,
507 u8 config_rank, u8 target_rank, int wl_enable)
508{
509 u32 emrs1;
510
511 /* Is shifted by bits 2 later so u8 can be used to reduce size */
Elyes HAOUAS68ec3eb2019-06-22 09:21:18 +0200512 static const u8 emrs1_lut[8][4][4] = { /* [Config][Leveling Rank][Rank] */
Angel Pons9d20c842021-01-13 12:39:37 +0100513 { /* Config 0: 2R2R */
Arthur Heymansb5170c32017-12-25 20:13:28 +0100514 {0x11, 0x00, 0x91, 0x00},
515 {0x00, 0x11, 0x91, 0x00},
516 {0x91, 0x00, 0x11, 0x00},
517 {0x91, 0x00, 0x00, 0x11}
518 },
Angel Pons9d20c842021-01-13 12:39:37 +0100519 { /* Config 1: 2R1R */
Arthur Heymansb5170c32017-12-25 20:13:28 +0100520 {0x11, 0x00, 0x91, 0x00},
521 {0x00, 0x11, 0x91, 0x00},
522 {0x91, 0x00, 0x11, 0x00},
523 {0x00, 0x00, 0x00, 0x00}
524 },
Angel Pons9d20c842021-01-13 12:39:37 +0100525 { /* Config 2: 1R2R */
Arthur Heymansb5170c32017-12-25 20:13:28 +0100526 {0x11, 0x00, 0x91, 0x00},
527 {0x00, 0x00, 0x00, 0x00},
528 {0x91, 0x00, 0x11, 0x00},
529 {0x91, 0x00, 0x00, 0x11}
530 },
Angel Pons9d20c842021-01-13 12:39:37 +0100531 { /* Config 3: 1R1R */
Arthur Heymansb5170c32017-12-25 20:13:28 +0100532 {0x11, 0x00, 0x91, 0x00},
533 {0x00, 0x00, 0x00, 0x00},
534 {0x91, 0x00, 0x11, 0x00},
535 {0x00, 0x00, 0x00, 0x00}
536 },
Angel Pons9d20c842021-01-13 12:39:37 +0100537 { /* Config 4: 2R0R */
Arthur Heymansb5170c32017-12-25 20:13:28 +0100538 {0x11, 0x00, 0x00, 0x00},
539 {0x00, 0x11, 0x00, 0x00},
540 {0x00, 0x00, 0x00, 0x00},
541 {0x00, 0x00, 0x00, 0x00}
542 },
Angel Pons9d20c842021-01-13 12:39:37 +0100543 { /* Config 5: 0R2R */
Arthur Heymansb5170c32017-12-25 20:13:28 +0100544 {0x00, 0x00, 0x00, 0x00},
545 {0x00, 0x00, 0x00, 0x00},
546 {0x00, 0x00, 0x11, 0x00},
547 {0x00, 0x00, 0x00, 0x11}
548 },
Angel Pons9d20c842021-01-13 12:39:37 +0100549 { /* Config 6: 1R0R */
Arthur Heymansb5170c32017-12-25 20:13:28 +0100550 {0x11, 0x00, 0x00, 0x00},
551 {0x00, 0x00, 0x00, 0x00},
552 {0x00, 0x00, 0x00, 0x00},
553 {0x00, 0x00, 0x00, 0x00}
554 },
Angel Pons9d20c842021-01-13 12:39:37 +0100555 { /* Config 7: 0R1R */
Arthur Heymansb5170c32017-12-25 20:13:28 +0100556 {0x00, 0x00, 0x00, 0x00},
557 {0x00, 0x00, 0x00, 0x00},
558 {0x00, 0x00, 0x11, 0x00},
559 {0x00, 0x00, 0x00, 0x00}
560 }
561 };
562
563 if (wl_enable) {
564 printk(RAM_DEBUG, "Entering WL mode\n");
565 printk(RAM_DEBUG, "Using WL ODT values\n");
566 emrs1 = emrs1_lut[config][target_rank][config_rank];
567 } else {
568 printk(RAM_DEBUG, "Exiting WL mode\n");
569 emrs1 = ddr3_emrs1_rtt_nom_config[s->dimm_config[channel]][config_rank];
570 }
571 printk(RAM_DEBUG, "Setting ODT for rank%d to ", config_rank);
572 switch (emrs1) {
573 case 0:
574 printk(RAM_DEBUG, "High-Z\n");
575 break;
576 case 0x11:
577 printk(RAM_DEBUG, "40 Ohm\n");
578 break;
579 case 0x81:
580 printk(RAM_DEBUG, "30 Ohm\n");
581 break;
582 case 0x80:
583 printk(RAM_DEBUG, "20 Ohm\n");
584 break;
585 case 0x10:
586 printk(RAM_DEBUG, "120 Ohm\n");
587 break;
588 case 0x01:
589 printk(RAM_DEBUG, "60 Ohm\n");
590 break;
591 default:
592 printk(BIOS_WARNING, "ODT value Undefined!\n");
593 break;
594 }
595
596 emrs1 <<= 2;
597 /* Set output drive strength to 34 Ohm during write levelling */
598 emrs1 |= (1 << 1);
599
600 if (wl_enable && (target_rank != config_rank)) {
601 printk(RAM_DEBUG, "Disabling output for rank%d\n", config_rank);
602 emrs1 |= (1 << 12);
603 }
604 if (wl_enable && (target_rank == config_rank)) {
605 printk(RAM_DEBUG, "Enabling WL for rank%d\n", config_rank);
606 emrs1 |= (1 << 7);
607 }
608 send_jedec_cmd(s, config_rank, channel, EMRS1_CMD, emrs1);
609}
610
611#define N_SAMPLES 5
612
613static void sample_dq(const struct sysinfo *s, u8 channel, u8 rank,
614 u8 high_found[8]) {
615 u32 address = test_address(channel, rank);
616 int samples, lane;
617
618 memset(high_found, 0, TOTAL_BYTELANES * sizeof(high_found[0]));
619 for (samples = 0; samples < N_SAMPLES; samples++) {
Arthur Heymans4d06ff02021-07-02 10:05:09 +0200620 write32p(address, 0x12341234);
621 write32p(address + 4, 0x12341234);
Arthur Heymansb5170c32017-12-25 20:13:28 +0100622 udelay(5);
623 FOR_EACH_BYTELANE(lane) {
Angel Ponsa5146f32021-03-27 09:35:57 +0100624 u8 dq_high = (mchbar_read8(0x561 + 0x400 * channel
Arthur Heymansb5170c32017-12-25 20:13:28 +0100625 + (lane * 4)) >> 7) & 1;
626 high_found[lane] += dq_high;
627 }
628 }
629}
630
631static enum cb_err increment_to_dqs_edge(struct sysinfo *s, u8 channel, u8 rank)
632{
633 int lane;
634 u8 saved_24d;
635 struct dll_setting dqs_setting[TOTAL_BYTELANES];
636 u8 bytelane_ok = 0;
637 u8 dq_sample[TOTAL_BYTELANES];
638
639 memcpy(dqs_setting, s->dqs_settings[channel], sizeof(dqs_setting));
640 FOR_EACH_BYTELANE(lane)
641 dqsset(channel, lane, &dqs_setting[lane]);
642
Angel Ponsa5146f32021-03-27 09:35:57 +0100643 saved_24d = mchbar_read8(0x24d + 0x400 * channel);
Arthur Heymansb5170c32017-12-25 20:13:28 +0100644
645 /* Loop 0: Find DQ sample low, by decreasing */
646 while (bytelane_ok != 0xff) {
647 sample_dq(s, channel, rank, dq_sample);
648 FOR_EACH_BYTELANE(lane) {
649 if (bytelane_ok & (1 << lane))
650 continue;
651
Angel Ponsdd7ce4e2021-03-26 23:21:02 +0100652 printk(RAM_SPEW, "%d, %d, %02d, %d, lane%d sample: %d\n",
Arthur Heymansb5170c32017-12-25 20:13:28 +0100653 dqs_setting[lane].coarse,
654 dqs_setting[lane].clk_delay,
655 dqs_setting[lane].tap,
656 dqs_setting[lane].pi,
657 lane,
658 dq_sample[lane]);
659
Angel Pons70dc0a82021-03-26 23:23:22 +0100660 if (dq_sample[lane] == 0) {
Arthur Heymansb5170c32017-12-25 20:13:28 +0100661 bytelane_ok |= (1 << lane);
Angel Pons70dc0a82021-03-26 23:23:22 +0100662 } else if (decrement_dq_dqs(s, &dqs_setting[lane])) {
663 printk(BIOS_EMERG,
664 "DQS setting channel%d, lane %d reached a minimum!\n",
665 channel, lane);
666 return CB_ERR;
Arthur Heymansb5170c32017-12-25 20:13:28 +0100667 }
668 dqsset(channel, lane, &dqs_setting[lane]);
669 }
670 }
671
672 printk(RAM_DEBUG, "DQS settings on PASS #0:\n");
673 FOR_EACH_BYTELANE(lane) {
674 printk(RAM_DEBUG, "lane %d: ", lane);
675 print_dll_setting(&dqs_setting[lane], 0);
676 }
677
678 /* Loop 1: Find DQ sample high, by increasing */
679 bytelane_ok = 0;
680 while (bytelane_ok != 0xff) {
681 sample_dq(s, channel, rank, dq_sample);
682 FOR_EACH_BYTELANE(lane) {
683 if (bytelane_ok & (1 << lane))
684 continue;
685
686 printk(RAM_SPEW, "%d, %d, %02d, %d, lane%d sample: %d\n",
687 dqs_setting[lane].coarse,
688 dqs_setting[lane].clk_delay,
689 dqs_setting[lane].tap,
690 dqs_setting[lane].pi,
691 lane,
692 dq_sample[lane]);
693
694 if (dq_sample[lane] == N_SAMPLES) {
695 bytelane_ok |= (1 << lane);
Angel Pons70dc0a82021-03-26 23:23:22 +0100696 } else if (increment_dq_dqs(s, &dqs_setting[lane])) {
697 printk(BIOS_EMERG,
698 "DQS setting channel%d, lane %d reached a maximum!\n",
699 channel, lane);
700 return CB_ERR;
Arthur Heymansb5170c32017-12-25 20:13:28 +0100701 }
702 dqsset(channel, lane, &dqs_setting[lane]);
703 }
704 }
705
706 printk(RAM_DEBUG, "DQS settings on PASS #1:\n");
707 FOR_EACH_BYTELANE(lane) {
708 printk(RAM_DEBUG, "lane %d: ", lane);
709 print_dll_setting(&dqs_setting[lane], 0);
710 }
711
712 printk(BIOS_DEBUG, "final WL DQS settings on CH%d\n", channel);
713 FOR_EACH_BYTELANE(lane) {
714 printk(BIOS_DEBUG, "\tlane%d: ", lane);
715 print_dll_setting(&dqs_setting[lane], 1);
716 s->dqs_settings[channel][lane] = dqs_setting[lane];
717 }
718
Angel Ponsa5146f32021-03-27 09:35:57 +0100719 mchbar_write8(0x24d + 0x400 * channel, saved_24d);
Arthur Heymansb5170c32017-12-25 20:13:28 +0100720 return CB_SUCCESS;
721}
722
723/*
724 * DDR3 uses flyby topology where the clock signal takes a different path
725 * than the data signal, to allow for better signal intergrity.
726 * Therefore the delay on the data signals needs to account for this.
Angel Pons9e58afe2021-01-13 18:23:41 +0100727 * This is done by sampling the DQS write (tx) signal back over the DQ
728 * signal and looking for delay values where the sample transitions
Arthur Heymansb5170c32017-12-25 20:13:28 +0100729 * from high to low.
730 * Here the following is done:
Angel Pons9e58afe2021-01-13 18:23:41 +0100731 * - Enable write levelling on the first populated rank.
732 * - Disable output on other populated ranks.
733 * - Start from safe DQS (tx) delays. Other transitions can be
734 * found at different starting values but are generally bad.
Arthur Heymansb5170c32017-12-25 20:13:28 +0100735 * - loop0: decrease DQS (tx) delays until low is sampled,
736 * loop1: increase DQS (tx) delays until high is sampled,
Angel Pons9e58afe2021-01-13 18:23:41 +0100737 * This way, we are sure to have hit a low-high transition.
738 * - Put all ranks in normal mode of operation again.
739 * Note: All ranks need to be leveled together.
Arthur Heymansb5170c32017-12-25 20:13:28 +0100740 */
741void search_write_leveling(struct sysinfo *s)
742{
743 int i, ch, count;
744 u8 config, rank0, rank1, lane;
745 struct dll_setting dq_setting;
746
Angel Ponsa6daff12021-01-13 18:27:31 +0100747 const u8 chanconfig_lut[16] = {0, 6, 4, 6, 7, 3, 1, 3, 5, 2, 0, 2, 7, 3, 1, 3};
Arthur Heymansb5170c32017-12-25 20:13:28 +0100748
Angel Ponsa6daff12021-01-13 18:27:31 +0100749 const u8 odt_force[8][4] = { /* [Config][leveling rank] */
Arthur Heymansb5170c32017-12-25 20:13:28 +0100750 {0x5, 0x6, 0x5, 0x9},
751 {0x5, 0x6, 0x5, 0x0},
752 {0x5, 0x0, 0x5, 0x9},
753 {0x5, 0x0, 0x5, 0x0},
754 {0x1, 0x2, 0x0, 0x0},
755 {0x0, 0x0, 0x4, 0x8},
756 {0x1, 0x0, 0x0, 0x0},
757 {0x0, 0x0, 0x4, 0x0}
758 };
759
760 printk(BIOS_DEBUG, "Starting write levelling.\n");
761
762 FOR_EACH_POPULATED_CHANNEL(s->dimms, ch) {
763 printk(BIOS_DEBUG, "\tCH%d\n", ch);
764 config = chanconfig_lut[s->dimm_config[ch]];
765
Angel Ponsa5146f32021-03-27 09:35:57 +0100766 mchbar_clrbits8(0x5d8 + 0x400 * ch, 0x0e);
767 mchbar_clrsetbits16(0x5c4 + 0x400 * ch, 0x3fff, 0x3fff);
768 mchbar_clrbits8(0x265 + 0x400 * ch, 0x1f);
Arthur Heymansb5170c32017-12-25 20:13:28 +0100769 /* find the first populated rank */
770 FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, ch, rank0)
771 break;
772
773 /* Enable WL for the first populated rank and disable output
774 for others */
775 FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, ch, rank1)
776 set_rank_write_level(s, ch, config, rank1, rank0, 1);
777
Angel Ponsa5146f32021-03-27 09:35:57 +0100778 mchbar_clrsetbits8(0x298 + 2 + 0x400 * ch, 0x0f, odt_force[config][rank0]);
779 mchbar_clrsetbits8(0x271 + 0x400 * ch, 0x7e, 0x4e);
780 mchbar_setbits8(0x5d9 + 0x400 * ch, 1 << 2);
781 mchbar_clrsetbits32(0x1a0, 0x07ffffff, 0x00014000);
Arthur Heymansb5170c32017-12-25 20:13:28 +0100782
783 if (increment_to_dqs_edge(s, ch, rank0))
784 die("Write Leveling failed!");
785
Angel Ponsa5146f32021-03-27 09:35:57 +0100786 mchbar_clrbits8(0x298 + 2 + 0x400 * ch, 0x0f);
787 mchbar_clrsetbits8(0x271 + 0x400 * ch, 0x7e, 0x0e);
788 mchbar_clrbits8(0x5d9 + 0x400 * ch, 1 << 2);
789 mchbar_clrsetbits32(0x1a0, 0x07ffffff, 0x00555801);
Arthur Heymansb5170c32017-12-25 20:13:28 +0100790
791 /* Disable WL on the trained rank */
792 set_rank_write_level(s, ch, config, rank0, rank0, 0);
793 send_jedec_cmd(s, rank0, ch, NORMALOP_CMD, 1 << 12);
794
Angel Ponsa5146f32021-03-27 09:35:57 +0100795 mchbar_setbits8(0x5d8 + 0x400 * ch, 0x0e);
796 mchbar_clrsetbits16(0x5c4 + 0x400 * ch, 0x3fff, 0x1807);
797 mchbar_clrbits8(0x265 + 0x400 * ch, 0x1f);
Arthur Heymansb5170c32017-12-25 20:13:28 +0100798
799 /* Disable write level mode for all ranks */
800 FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, ch, rank0)
801 set_rank_write_level(s, ch, config, rank0, rank0, 0);
802 }
803
Angel Ponsa5146f32021-03-27 09:35:57 +0100804 mchbar_setbits8(0x5dc, 1 << 7);
Arthur Heymansb5170c32017-12-25 20:13:28 +0100805
806 /* Increment DQ (rx) dll setting by a standard amount past DQS,
807 This is further trained in write training. */
808 switch (s->selected_timings.mem_clk) {
809 default:
810 case MEM_CLOCK_800MHz:
811 count = 39;
812 break;
813 case MEM_CLOCK_1066MHz:
814 count = 32;
815 break;
816 case MEM_CLOCK_1333MHz:
817 count = 42;
818 break;
819 }
820
821 FOR_EACH_POPULATED_CHANNEL_AND_BYTELANE(s->dimms, ch, lane) {
822 dq_setting = s->dqs_settings[ch][lane];
823 for (i = 0; i < count; i++)
824 if (increment_dq_dqs(s, &dq_setting))
825 die("Can't further increase DQ past DQS delay");
826 dqset(ch, lane, &dq_setting);
827 }
828
829 printk(BIOS_DEBUG, "Done write levelling.\n");
830}