blob: 4397ebaccbced484c1d6b555416310413985dc4c [file] [log] [blame]
Zheng Baoeb75f652010-04-23 17:32:48 +00001/*
2 * This file is part of the coreboot project.
3 *
4 * Copyright (C) 2010 Advanced Micro Devices, Inc.
Timothy Pearsonb8a355d2015-09-05 17:55:58 -05005 * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering
Zheng Baoeb75f652010-04-23 17:32:48 +00006 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
Zheng Baoeb75f652010-04-23 17:32:48 +000016 */
17
18/******************************************************************************
19 Description: Receiver En and DQS Timing Training feature for DDR 3 MCT
20******************************************************************************/
21
Timothy Pearson730a0432015-10-16 13:51:51 -050022static int32_t abs(int32_t val);
23static void dqsTrainRcvrEn_SW_Fam10(struct MCTStatStruc *pMCTstat,
24 struct DCTStatStruc *pDCTstat, u8 Pass);
25static void dqsTrainRcvrEn_SW_Fam15(struct MCTStatStruc *pMCTstat,
Zheng Baoeb75f652010-04-23 17:32:48 +000026 struct DCTStatStruc *pDCTstat, u8 Pass);
Zheng Baoeb75f652010-04-23 17:32:48 +000027static void mct_InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat,
28 struct DCTStatStruc *pDCTstat);
29static void InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat,
30 struct DCTStatStruc *pDCTstat, u8 Channel);
31static void CalcEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat,
32 struct DCTStatStruc *pDCTstat, u8 Channel);
Timothy Pearsonb8a355d2015-09-05 17:55:58 -050033static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u16 DQSRcvEnDly);
Timothy Pearson730a0432015-10-16 13:51:51 -050034static uint32_t fenceDynTraining_D(struct MCTStatStruc *pMCTstat,
Zheng Baoeb75f652010-04-23 17:32:48 +000035 struct DCTStatStruc *pDCTstat, u8 dct);
36static void mct_DisableDQSRcvEn_D(struct DCTStatStruc *pDCTstat);
37
38/* Warning: These must be located so they do not cross a logical 16-bit
39 segment boundary! */
Paul Menzel42409e82013-05-04 18:07:13 +020040static const u32 TestPattern0_D[] = {
Timothy Pearsonb8a355d2015-09-05 17:55:58 -050041 0x55555555, 0x55555555, 0x55555555, 0x55555555,
42 0x55555555, 0x55555555, 0x55555555, 0x55555555,
43 0x55555555, 0x55555555, 0x55555555, 0x55555555,
44 0x55555555, 0x55555555, 0x55555555, 0x55555555,
Zheng Baoeb75f652010-04-23 17:32:48 +000045};
Paul Menzel42409e82013-05-04 18:07:13 +020046static const u32 TestPattern1_D[] = {
Timothy Pearsonb8a355d2015-09-05 17:55:58 -050047 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
48 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
49 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
50 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
Zheng Baoeb75f652010-04-23 17:32:48 +000051};
Paul Menzel42409e82013-05-04 18:07:13 +020052static const u32 TestPattern2_D[] = {
Zheng Baoeb75f652010-04-23 17:32:48 +000053 0x12345678, 0x87654321, 0x23456789, 0x98765432,
54 0x59385824, 0x30496724, 0x24490795, 0x99938733,
55 0x40385642, 0x38465245, 0x29432163, 0x05067894,
56 0x12349045, 0x98723467, 0x12387634, 0x34587623,
57};
58
59static void SetupRcvrPattern(struct MCTStatStruc *pMCTstat,
60 struct DCTStatStruc *pDCTstat, u32 *buffer, u8 pass)
61{
62 /*
63 * 1. Copy the alpha and Beta patterns from ROM to Cache,
64 * aligning on 16 byte boundary
65 * 2. Set the ptr to DCTStatstruc.PtrPatternBufA for Alpha
66 * 3. Set the ptr to DCTStatstruc.PtrPatternBufB for Beta
67 */
68 u32 *buf_a;
69 u32 *buf_b;
70 u32 *p_A;
71 u32 *p_B;
72 u8 i;
73
74 buf_a = (u32 *)(((u32)buffer + 0x10) & (0xfffffff0));
75 buf_b = buf_a + 32; /* ?? */
76 p_A = (u32 *)SetupDqsPattern_1PassB(pass);
77 p_B = (u32 *)SetupDqsPattern_1PassA(pass);
78
79 for(i=0;i<16;i++) {
80 buf_a[i] = p_A[i];
81 buf_b[i] = p_B[i];
82 }
83
84 pDCTstat->PtrPatternBufA = (u32)buf_a;
85 pDCTstat->PtrPatternBufB = (u32)buf_b;
86}
87
88void mct_TrainRcvrEn_D(struct MCTStatStruc *pMCTstat,
89 struct DCTStatStruc *pDCTstat, u8 Pass)
90{
Timothy Pearson730a0432015-10-16 13:51:51 -050091 if(mct_checkNumberOfDqsRcvEn_1Pass(Pass)) {
92 if (is_fam15h())
93 dqsTrainRcvrEn_SW_Fam15(pMCTstat, pDCTstat, Pass);
94 else
95 dqsTrainRcvrEn_SW_Fam10(pMCTstat, pDCTstat, Pass);
96 }
Zheng Baoeb75f652010-04-23 17:32:48 +000097}
98
Timothy Pearson730a0432015-10-16 13:51:51 -050099static uint16_t fam15_receiver_enable_training_seed(struct DCTStatStruc *pDCTstat, uint8_t dct, uint8_t dimm, uint8_t rank, uint8_t package_type)
100{
101 uint32_t dword;
102 uint16_t seed = 0;
103
104 /* FIXME
105 * Mainboards need to be able to specify the maximum number of DIMMs installable per channel
106 * For now assume a maximum of 2 DIMMs per channel can be installed
107 */
108 uint8_t MaxDimmsInstallable = 2;
109
110 uint8_t channel = dct;
111 if (package_type == PT_GR) {
112 /* Get the internal node number */
113 dword = Get_NB32(pDCTstat->dev_nbmisc, 0xe8);
114 dword = (dword >> 30) & 0x3;
115 if (dword == 1) {
116 channel += 2;
117 }
118 }
119
120 if (pDCTstat->Status & (1 << SB_Registered)) {
121 if (package_type == PT_GR) {
122 /* Socket G34: Fam15h BKDG v3.14 Table 99 */
123 if (MaxDimmsInstallable == 1) {
124 if (channel == 0)
125 seed = 0x43;
126 else if (channel == 1)
127 seed = 0x3f;
128 else if (channel == 2)
129 seed = 0x3a;
130 else if (channel == 3)
131 seed = 0x35;
132 } else if (MaxDimmsInstallable == 2) {
133 if (channel == 0)
134 seed = 0x54;
135 else if (channel == 1)
136 seed = 0x4d;
137 else if (channel == 2)
138 seed = 0x45;
139 else if (channel == 3)
140 seed = 0x40;
141 } else if (MaxDimmsInstallable == 3) {
142 if (channel == 0)
143 seed = 0x6b;
144 else if (channel == 1)
145 seed = 0x5e;
146 else if (channel == 2)
147 seed = 0x4b;
148 else if (channel == 3)
149 seed = 0x3d;
150 }
151 } else if (package_type == PT_C3) {
152 /* Socket C32: Fam15h BKDG v3.14 Table 100 */
153 if ((MaxDimmsInstallable == 1) || (MaxDimmsInstallable == 2)) {
154 if (channel == 0)
155 seed = 0x3f;
156 else if (channel == 1)
157 seed = 0x3e;
158 } else if (MaxDimmsInstallable == 3) {
159 if (channel == 0)
160 seed = 0x47;
161 else if (channel == 1)
162 seed = 0x38;
163 }
164 }
165 } else if (pDCTstat->Status & (1 << SB_LoadReduced)) {
166 if (package_type == PT_GR) {
167 /* Socket G34: Fam15h BKDG v3.14 Table 99 */
168 if (MaxDimmsInstallable == 1) {
169 if (channel == 0)
170 seed = 0x123;
171 else if (channel == 1)
172 seed = 0x122;
173 else if (channel == 2)
174 seed = 0x112;
175 else if (channel == 3)
176 seed = 0x102;
177 }
178 } else if (package_type == PT_C3) {
179 /* Socket C32: Fam15h BKDG v3.14 Table 100 */
180 if (channel == 0)
181 seed = 0x132;
182 else if (channel == 1)
183 seed = 0x122;
184 }
185 } else {
186 if (package_type == PT_GR) {
187 /* Socket G34: Fam15h BKDG v3.14 Table 99 */
188 if (MaxDimmsInstallable == 1) {
189 if (channel == 0)
190 seed = 0x3e;
191 else if (channel == 1)
192 seed = 0x38;
193 else if (channel == 2)
194 seed = 0x37;
195 else if (channel == 3)
196 seed = 0x31;
197 } else if (MaxDimmsInstallable == 2) {
198 if (channel == 0)
199 seed = 0x51;
200 else if (channel == 1)
201 seed = 0x4a;
202 else if (channel == 2)
203 seed = 0x46;
204 else if (channel == 3)
205 seed = 0x3f;
206 } else if (MaxDimmsInstallable == 3) {
207 if (channel == 0)
208 seed = 0x5e;
209 else if (channel == 1)
210 seed = 0x52;
211 else if (channel == 2)
212 seed = 0x48;
213 else if (channel == 3)
214 seed = 0x3c;
215 }
216 } else if (package_type == PT_C3) {
217 /* Socket C32: Fam15h BKDG v3.14 Table 100 */
218 if ((MaxDimmsInstallable == 1) || (MaxDimmsInstallable == 2)) {
219 if (channel == 0)
220 seed = 0x39;
221 else if (channel == 1)
222 seed = 0x32;
223 } else if (MaxDimmsInstallable == 3) {
224 if (channel == 0)
225 seed = 0x45;
226 else if (channel == 1)
227 seed = 0x37;
228 }
229 } else if (package_type == PT_M2) {
230 /* Socket AM3: Fam15h BKDG v3.14 Table 101 */
231 seed = 0x3a;
232 }
233 }
234
235 return seed;
236}
237
238static void read_dqs_write_timing_control_registers(uint16_t* current_total_delay, uint32_t dev, uint8_t dct, uint8_t dimm, uint32_t index_reg)
Timothy Pearsonb8a355d2015-09-05 17:55:58 -0500239{
240 uint8_t lane;
241 uint32_t dword;
242
243 for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
244 uint32_t wdt_reg;
245 if ((lane == 0) || (lane == 1))
246 wdt_reg = 0x30;
247 if ((lane == 2) || (lane == 3))
248 wdt_reg = 0x31;
249 if ((lane == 4) || (lane == 5))
250 wdt_reg = 0x40;
251 if ((lane == 6) || (lane == 7))
252 wdt_reg = 0x41;
253 if (lane == 8)
254 wdt_reg = 0x32;
255 wdt_reg += dimm * 3;
Timothy Pearson730a0432015-10-16 13:51:51 -0500256 dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, wdt_reg);
Timothy Pearsonb8a355d2015-09-05 17:55:58 -0500257 if ((lane == 7) || (lane == 5) || (lane == 3) || (lane == 1))
258 current_total_delay[lane] = (dword & 0x00ff0000) >> 16;
259 if ((lane == 8) || (lane == 6) || (lane == 4) || (lane == 2) || (lane == 0))
260 current_total_delay[lane] = dword & 0x000000ff;
261 }
262}
263
Timothy Pearson730a0432015-10-16 13:51:51 -0500264#ifdef UNUSED_CODE
265static void write_dqs_write_timing_control_registers(uint16_t* current_total_delay, uint32_t dev, uint8_t dct, uint8_t dimm, uint32_t index_reg)
Timothy Pearsonb8a355d2015-09-05 17:55:58 -0500266{
267 uint8_t lane;
268 uint32_t dword;
269
Timothy Pearson730a0432015-10-16 13:51:51 -0500270 for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
271 uint32_t ret_reg;
272 if ((lane == 0) || (lane == 1))
273 ret_reg = 0x30;
274 if ((lane == 2) || (lane == 3))
275 ret_reg = 0x31;
276 if ((lane == 4) || (lane == 5))
277 ret_reg = 0x40;
278 if ((lane == 6) || (lane == 7))
279 ret_reg = 0x41;
280 if (lane == 8)
281 ret_reg = 0x32;
282 ret_reg += dimm * 3;
283 dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, ret_reg);
284 if ((lane == 7) || (lane == 5) || (lane == 3) || (lane == 1)) {
285 dword &= ~(0xff << 16);
286 dword |= (current_total_delay[lane] & 0xff) << 16;
287 }
288 if ((lane == 8) || (lane == 6) || (lane == 4) || (lane == 2) || (lane == 0)) {
289 dword &= ~0xff;
290 dword |= current_total_delay[lane] & 0xff;
291 }
292 Set_NB32_index_wait_DCT(dev, dct, index_reg, ret_reg, dword);
293 }
294}
295#endif
296
297static void write_write_data_timing_control_registers(uint16_t* current_total_delay, uint32_t dev, uint8_t dct, uint8_t dimm, uint32_t index_reg)
298{
299 uint8_t lane;
300 uint32_t dword;
301
302 for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
303 uint32_t wdt_reg;
304
305 /* Calculate Write Data Timing register location */
306 if ((lane == 0) || (lane == 1) || (lane == 2) || (lane == 3))
307 wdt_reg = 0x1;
308 if ((lane == 4) || (lane == 5) || (lane == 6) || (lane == 7))
309 wdt_reg = 0x2;
310 if (lane == 8)
311 wdt_reg = 0x3;
312 wdt_reg |= (dimm << 8);
313
314 /* Set Write Data Timing register values */
315 dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, wdt_reg);
316 if ((lane == 7) || (lane == 3)) {
317 dword &= ~(0x7f << 24);
318 dword |= (current_total_delay[lane] & 0x7f) << 24;
319 }
320 if ((lane == 6) || (lane == 2)) {
321 dword &= ~(0x7f << 16);
322 dword |= (current_total_delay[lane] & 0x7f) << 16;
323 }
324 if ((lane == 5) || (lane == 1)) {
325 dword &= ~(0x7f << 8);
326 dword |= (current_total_delay[lane] & 0x7f) << 8;
327 }
328 if ((lane == 8) || (lane == 4) || (lane == 0)) {
329 dword &= ~0x7f;
330 dword |= current_total_delay[lane] & 0x7f;
331 }
332 Set_NB32_index_wait_DCT(dev, dct, index_reg, wdt_reg, dword);
333 }
334}
335
336static void read_dqs_receiver_enable_control_registers(uint16_t* current_total_delay, uint32_t dev, uint8_t dct, uint8_t dimm, uint32_t index_reg)
337{
338 uint8_t lane;
339 uint32_t mask;
340 uint32_t dword;
341
342 if (is_fam15h())
343 mask = 0x3ff;
344 else
345 mask = 0x1ff;
346
347 for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
Timothy Pearsonb8a355d2015-09-05 17:55:58 -0500348 uint32_t ret_reg;
349 if ((lane == 0) || (lane == 1))
350 ret_reg = 0x10;
351 if ((lane == 2) || (lane == 3))
352 ret_reg = 0x11;
353 if ((lane == 4) || (lane == 5))
354 ret_reg = 0x20;
355 if ((lane == 6) || (lane == 7))
356 ret_reg = 0x21;
Timothy Pearson730a0432015-10-16 13:51:51 -0500357 if (lane == 8)
358 ret_reg = 0x12;
Timothy Pearsonb8a355d2015-09-05 17:55:58 -0500359 ret_reg += dimm * 3;
Timothy Pearson730a0432015-10-16 13:51:51 -0500360 dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, ret_reg);
Timothy Pearsonb8a355d2015-09-05 17:55:58 -0500361 if ((lane == 7) || (lane == 5) || (lane == 3) || (lane == 1)) {
Timothy Pearson730a0432015-10-16 13:51:51 -0500362 current_total_delay[lane] = (dword & (mask << 16)) >> 16;
Timothy Pearsonb8a355d2015-09-05 17:55:58 -0500363 }
Timothy Pearson730a0432015-10-16 13:51:51 -0500364 if ((lane == 8) || (lane == 6) || (lane == 4) || (lane == 2) || (lane == 0)) {
365 current_total_delay[lane] = dword & mask;
Timothy Pearsonb8a355d2015-09-05 17:55:58 -0500366 }
Timothy Pearson730a0432015-10-16 13:51:51 -0500367 }
368}
369
370static void write_dqs_receiver_enable_control_registers(uint16_t* current_total_delay, uint32_t dev, uint8_t dct, uint8_t dimm, uint32_t index_reg)
371{
372 uint8_t lane;
373 uint32_t mask;
374 uint32_t dword;
375
376 if (is_fam15h())
377 mask = 0x3ff;
378 else
379 mask = 0x1ff;
380
381 for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
382 uint32_t ret_reg;
383 if ((lane == 0) || (lane == 1))
384 ret_reg = 0x10;
385 if ((lane == 2) || (lane == 3))
386 ret_reg = 0x11;
387 if ((lane == 4) || (lane == 5))
388 ret_reg = 0x20;
389 if ((lane == 6) || (lane == 7))
390 ret_reg = 0x21;
391 if (lane == 8)
392 ret_reg = 0x12;
393 ret_reg += dimm * 3;
394 dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, ret_reg);
395 if ((lane == 7) || (lane == 5) || (lane == 3) || (lane == 1)) {
396 dword &= ~(mask << 16);
397 dword |= (current_total_delay[lane] & mask) << 16;
398 }
399 if ((lane == 8) || (lane == 6) || (lane == 4) || (lane == 2) || (lane == 0)) {
400 dword &= ~mask;
401 dword |= current_total_delay[lane] & mask;
402 }
403 Set_NB32_index_wait_DCT(dev, dct, index_reg, ret_reg, dword);
404 }
405}
406
407static void read_dram_phase_recovery_control_registers(uint16_t* current_total_delay, uint32_t dev, uint8_t dct, uint8_t dimm, uint32_t index_reg)
408{
409 uint8_t lane;
410 uint32_t dword;
411
412 for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
413 uint32_t prc_reg;
414
415 /* Calculate DRAM Phase Recovery Control register location */
416 if ((lane == 0) || (lane == 1) || (lane == 2) || (lane == 3))
417 prc_reg = 0x50;
418 if ((lane == 4) || (lane == 5) || (lane == 6) || (lane == 7))
419 prc_reg = 0x51;
420 if (lane == 8)
421 prc_reg = 0x52;
422
423 dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, prc_reg);
424 if ((lane == 7) || (lane == 3)) {
425 current_total_delay[lane] = (dword >> 24) & 0x7f;
426 }
427 if ((lane == 6) || (lane == 2)) {
428 current_total_delay[lane] = (dword >> 16) & 0x7f;
429 }
430 if ((lane == 5) || (lane == 1)) {
431 current_total_delay[lane] = (dword >> 8) & 0x7f;
432 }
433 if ((lane == 8) || (lane == 4) || (lane == 0)) {
434 current_total_delay[lane] = dword & 0x7f;
435 }
436 }
437}
438
439static void write_dram_phase_recovery_control_registers(uint16_t* current_total_delay, uint32_t dev, uint8_t dct, uint8_t dimm, uint32_t index_reg)
440{
441 uint8_t lane;
442 uint32_t dword;
443
444 for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
445 uint32_t prc_reg;
446
447 /* Calculate DRAM Phase Recovery Control register location */
448 if ((lane == 0) || (lane == 1) || (lane == 2) || (lane == 3))
449 prc_reg = 0x50;
450 if ((lane == 4) || (lane == 5) || (lane == 6) || (lane == 7))
451 prc_reg = 0x51;
452 if (lane == 8)
453 prc_reg = 0x52;
454
455 /* Set DRAM Phase Recovery Control register values */
456 dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, prc_reg);
457 if ((lane == 7) || (lane == 3)) {
458 dword &= ~(0x7f << 24);
459 dword |= (current_total_delay[lane] & 0x7f) << 24;
460 }
461 if ((lane == 6) || (lane == 2)) {
462 dword &= ~(0x7f << 16);
463 dword |= (current_total_delay[lane] & 0x7f) << 16;
464 }
465 if ((lane == 5) || (lane == 1)) {
466 dword &= ~(0x7f << 8);
467 dword |= (current_total_delay[lane] & 0x7f) << 8;
468 }
469 if ((lane == 8) || (lane == 4) || (lane == 0)) {
470 dword &= ~0x7f;
471 dword |= current_total_delay[lane] & 0x7f;
472 }
473 Set_NB32_index_wait_DCT(dev, dct, index_reg, prc_reg, dword);
474 }
475}
476
477static void read_read_dqs_timing_control_registers(uint16_t* current_total_delay, uint32_t dev, uint8_t dct, uint8_t dimm, uint32_t index_reg)
478{
479 uint8_t lane;
480 uint32_t dword;
481
482 for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
483 uint32_t rdt_reg;
484
485 /* Calculate DRAM Read DQS Timing register location */
486 if ((lane == 0) || (lane == 1) || (lane == 2) || (lane == 3))
487 rdt_reg = 0x5;
488 if ((lane == 4) || (lane == 5) || (lane == 6) || (lane == 7))
489 rdt_reg = 0x6;
490 if (lane == 8)
491 rdt_reg = 0x7;
492 rdt_reg |= (dimm << 8);
493
494 dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, rdt_reg);
495 if ((lane == 7) || (lane == 3)) {
496 current_total_delay[lane] = (dword >> 24) & 0x3f;
497 }
498 if ((lane == 6) || (lane == 2)) {
499 current_total_delay[lane] = (dword >> 16) & 0x3f;
500 }
501 if ((lane == 5) || (lane == 1)) {
502 current_total_delay[lane] = (dword >> 8) & 0x3f;
503 }
504 if ((lane == 8) || (lane == 4) || (lane == 0)) {
505 current_total_delay[lane] = dword & 0x3f;
506 }
507
508 if (is_fam15h())
509 current_total_delay[lane] >>= 1;
Timothy Pearsonb8a355d2015-09-05 17:55:58 -0500510 }
511}
512
513static uint32_t convert_testaddr_and_channel_to_address(struct DCTStatStruc *pDCTstat, uint32_t testaddr, uint8_t channel)
514{
515 SetUpperFSbase(testaddr);
516 testaddr <<= 8;
517
518 if((pDCTstat->Status & (1<<SB_128bitmode)) && channel ) {
519 testaddr += 8; /* second channel */
520 }
521
522 return testaddr;
523}
524
Timothy Pearson730a0432015-10-16 13:51:51 -0500525/* DQS Receiver Enable Training (Family 10h)
526 * Algorithm detailed in:
527 * The Fam10h BKDG Rev. 3.62 section 2.8.9.9.2
Timothy Pearsonb8a355d2015-09-05 17:55:58 -0500528 */
Timothy Pearson730a0432015-10-16 13:51:51 -0500529static void dqsTrainRcvrEn_SW_Fam10(struct MCTStatStruc *pMCTstat,
Zheng Baoeb75f652010-04-23 17:32:48 +0000530 struct DCTStatStruc *pDCTstat, u8 Pass)
531{
Timothy Pearsonb8a355d2015-09-05 17:55:58 -0500532 u8 Channel;
533 u8 _2Ranks;
Zheng Baoeb75f652010-04-23 17:32:48 +0000534 u8 Addl_Index = 0;
535 u8 Receiver;
536 u8 _DisableDramECC = 0, _Wrap32Dis = 0, _SSE2 = 0;
Timothy Pearsonb8a355d2015-09-05 17:55:58 -0500537 u16 CTLRMaxDelay;
538 u16 MaxDelay_CH[2];
Zheng Baoeb75f652010-04-23 17:32:48 +0000539 u32 TestAddr0, TestAddr1, TestAddr0B, TestAddr1B;
540 u32 PatternBuffer[64+4]; /* FIXME: need increase 8? */
541 u32 Errors;
542
543 u32 val;
544 u32 reg;
545 u32 dev;
546 u32 index_reg;
547 u32 ch_start, ch_end, ch;
548 u32 msr;
549 u32 cr4;
550 u32 lo, hi;
551
Timothy Pearsonb8a355d2015-09-05 17:55:58 -0500552 uint32_t dword;
Timothy Pearson730a0432015-10-16 13:51:51 -0500553 uint8_t dimm;
Timothy Pearsonb8a355d2015-09-05 17:55:58 -0500554 uint8_t rank;
555 uint8_t lane;
556 uint16_t current_total_delay[MAX_BYTE_LANES];
557 uint16_t candidate_total_delay[8];
558 uint8_t data_test_pass_sr[2][8]; /* [rank][lane] */
559 uint8_t data_test_pass[8]; /* [lane] */
560 uint8_t data_test_pass_prev[8]; /* [lane] */
561 uint8_t window_det_toggle[8];
562 uint8_t trained[8];
563 uint64_t result_qword1;
564 uint64_t result_qword2;
565
Zheng Baoeb75f652010-04-23 17:32:48 +0000566 u8 valid;
Zheng Baoeb75f652010-04-23 17:32:48 +0000567
568 print_debug_dqs("\nTrainRcvEn: Node", pDCTstat->Node_ID, 0);
569 print_debug_dqs("TrainRcvEn: Pass", Pass, 0);
570
571 dev = pDCTstat->dev_dct;
572 ch_start = 0;
573 if(!pDCTstat->GangedMode) {
574 ch_end = 2;
575 } else {
576 ch_end = 1;
577 }
578
579 for (ch = ch_start; ch < ch_end; ch++) {
Timothy Pearson730a0432015-10-16 13:51:51 -0500580 reg = 0x78;
581 val = Get_NB32_DCT(dev, ch, reg);
Zheng Baoeb75f652010-04-23 17:32:48 +0000582 val &= ~(0x3ff << 22);
Timothy Pearson730a0432015-10-16 13:51:51 -0500583 val |= (0x0c8 << 22); /* MaxRdLatency = 0xc8 */
584 Set_NB32_DCT(dev, ch, reg, val);
Zheng Baoeb75f652010-04-23 17:32:48 +0000585 }
586
Zheng Baoeb75f652010-04-23 17:32:48 +0000587 if (Pass == FirstPass) {
588 mct_InitDQSPos4RcvrEn_D(pMCTstat, pDCTstat);
589 } else {
590 pDCTstat->DimmTrainFail = 0;
591 pDCTstat->CSTrainFail = ~pDCTstat->CSPresent;
592 }
593
594 cr4 = read_cr4();
595 if(cr4 & ( 1 << 9)) { /* save the old value */
596 _SSE2 = 1;
597 }
598 cr4 |= (1 << 9); /* OSFXSR enable SSE2 */
599 write_cr4(cr4);
600
601 msr = HWCR;
602 _RDMSR(msr, &lo, &hi);
603 /* FIXME: Why use SSEDIS */
604 if(lo & (1 << 17)) { /* save the old value */
605 _Wrap32Dis = 1;
606 }
607 lo |= (1 << 17); /* HWCR.wrap32dis */
608 lo &= ~(1 << 15); /* SSEDIS */
609 _WRMSR(msr, lo, hi); /* Setting wrap32dis allows 64-bit memory references in real mode */
610
611 _DisableDramECC = mct_DisableDimmEccEn_D(pMCTstat, pDCTstat);
612
613 SetupRcvrPattern(pMCTstat, pDCTstat, PatternBuffer, Pass);
614
615 Errors = 0;
616 dev = pDCTstat->dev_dct;
Zheng Baoeb75f652010-04-23 17:32:48 +0000617
618 for (Channel = 0; Channel < 2; Channel++) {
619 print_debug_dqs("\tTrainRcvEn51: Node ", pDCTstat->Node_ID, 1);
620 print_debug_dqs("\tTrainRcvEn51: Channel ", Channel, 1);
621 pDCTstat->Channel = Channel;
622
Timothy Pearsonb8a355d2015-09-05 17:55:58 -0500623 CTLRMaxDelay = 0;
Zheng Baoeb75f652010-04-23 17:32:48 +0000624 MaxDelay_CH[Channel] = 0;
Timothy Pearson730a0432015-10-16 13:51:51 -0500625 index_reg = 0x98;
Zheng Baoeb75f652010-04-23 17:32:48 +0000626
627 Receiver = mct_InitReceiver_D(pDCTstat, Channel);
Timothy Pearsonb8a355d2015-09-05 17:55:58 -0500628 /* There are four receiver pairs, loosely associated with chipselects.
629 * This is essentially looping over each DIMM.
630 */
Zheng Baoeb75f652010-04-23 17:32:48 +0000631 for (; Receiver < 8; Receiver += 2) {
632 Addl_Index = (Receiver >> 1) * 3 + 0x10;
Timothy Pearson730a0432015-10-16 13:51:51 -0500633 dimm = (Receiver >> 1);
Zheng Baoeb75f652010-04-23 17:32:48 +0000634
635 print_debug_dqs("\t\tTrainRcvEnd52: index ", Addl_Index, 2);
636
Timothy Pearsonb8a355d2015-09-05 17:55:58 -0500637 if (!mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, Receiver)) {
Zheng Baoeb75f652010-04-23 17:32:48 +0000638 continue;
639 }
640
Timothy Pearsonb8a355d2015-09-05 17:55:58 -0500641 /* Clear data structures */
642 for (lane = 0; lane < 8; lane++) {
643 data_test_pass_prev[lane] = 0;
644 trained[lane] = 0;
645 }
646
647 /* 2.8.9.9.2 (1, 6)
648 * Retrieve gross and fine timing fields from write DQS registers
649 */
Timothy Pearson730a0432015-10-16 13:51:51 -0500650 read_dqs_write_timing_control_registers(current_total_delay, dev, Channel, dimm, index_reg);
Timothy Pearsonb8a355d2015-09-05 17:55:58 -0500651
652 /* 2.8.9.9.2 (1)
653 * Program the Write Data Timing and Write ECC Timing register to
654 * the values stored in the DQS Write Timing Control register
655 * for each lane
656 */
Timothy Pearson730a0432015-10-16 13:51:51 -0500657 write_write_data_timing_control_registers(current_total_delay, dev, Channel, dimm, index_reg);
Timothy Pearsonb8a355d2015-09-05 17:55:58 -0500658
659 /* 2.8.9.9.2 (2)
660 * Program the Read DQS Timing Control and the Read DQS ECC Timing Control registers
661 * to 1/2 MEMCLK for all lanes
662 */
663 for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
664 uint32_t rdt_reg;
665 if ((lane == 0) || (lane == 1) || (lane == 2) || (lane == 3))
666 rdt_reg = 0x5;
667 if ((lane == 4) || (lane == 5) || (lane == 6) || (lane == 7))
668 rdt_reg = 0x6;
669 if (lane == 8)
670 rdt_reg = 0x7;
Timothy Pearson730a0432015-10-16 13:51:51 -0500671 rdt_reg |= (dimm << 8);
Timothy Pearsonb8a355d2015-09-05 17:55:58 -0500672 if (lane == 8)
673 dword = 0x0000003f;
674 else
675 dword = 0x3f3f3f3f;
Timothy Pearson730a0432015-10-16 13:51:51 -0500676 Set_NB32_index_wait_DCT(dev, Channel, index_reg, rdt_reg, dword);
Timothy Pearsonb8a355d2015-09-05 17:55:58 -0500677 }
678
679 /* 2.8.9.9.2 (3)
680 * Select two test addresses for each rank present
681 */
Zheng Baoeb75f652010-04-23 17:32:48 +0000682 TestAddr0 = mct_GetRcvrSysAddr_D(pMCTstat, pDCTstat, Channel, Receiver, &valid);
Timothy Pearsonb8a355d2015-09-05 17:55:58 -0500683 if (!valid) { /* Address not supported on current CS */
Zheng Baoeb75f652010-04-23 17:32:48 +0000684 continue;
685 }
686
687 TestAddr0B = TestAddr0 + (BigPagex8_RJ8 << 3);
688
689 if(mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, Receiver+1)) {
690 TestAddr1 = mct_GetRcvrSysAddr_D(pMCTstat, pDCTstat, Channel, Receiver+1, &valid);
691 if(!valid) { /* Address not supported on current CS */
692 continue;
693 }
694 TestAddr1B = TestAddr1 + (BigPagex8_RJ8 << 3);
695 _2Ranks = 1;
696 } else {
697 _2Ranks = TestAddr1 = TestAddr1B = 0;
698 }
699
700 print_debug_dqs("\t\tTrainRcvEn53: TestAddr0 ", TestAddr0, 2);
701 print_debug_dqs("\t\tTrainRcvEn53: TestAddr0B ", TestAddr0B, 2);
702 print_debug_dqs("\t\tTrainRcvEn53: TestAddr1 ", TestAddr1, 2);
703 print_debug_dqs("\t\tTrainRcvEn53: TestAddr1B ", TestAddr1B, 2);
704
Timothy Pearsonb8a355d2015-09-05 17:55:58 -0500705 /* 2.8.9.9.2 (4, 5)
Timothy Pearson730a0432015-10-16 13:51:51 -0500706 * Write 1 cache line of the appropriate test pattern to each test address
Zheng Baoeb75f652010-04-23 17:32:48 +0000707 */
Timothy Pearsonb8a355d2015-09-05 17:55:58 -0500708 mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0, 0); /* rank 0 of DIMM, testpattern 0 */
709 mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0B, 1); /* rank 0 of DIMM, testpattern 1 */
710 if (_2Ranks) {
711 mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1, 0); /*rank 1 of DIMM, testpattern 0 */
712 mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1B, 1); /*rank 1 of DIMM, testpattern 1 */
Zheng Baoeb75f652010-04-23 17:32:48 +0000713 }
Zheng Baoeb75f652010-04-23 17:32:48 +0000714
Timothy Pearsonb8a355d2015-09-05 17:55:58 -0500715#if DQS_TRAIN_DEBUG > 0
716 for (lane = 0; lane < 8; lane++) {
717 print_debug_dqs("\t\tTrainRcvEn54: lane: ", lane, 2);
718 print_debug_dqs("\t\tTrainRcvEn54: current_total_delay ", current_total_delay[lane], 2);
719 }
720#endif
Zheng Baoeb75f652010-04-23 17:32:48 +0000721
Timothy Pearsonb8a355d2015-09-05 17:55:58 -0500722 /* 2.8.9.9.2 (6)
723 * Write gross and fine timing fields to read DQS registers
724 */
Timothy Pearson730a0432015-10-16 13:51:51 -0500725 write_dqs_receiver_enable_control_registers(current_total_delay, dev, Channel, dimm, index_reg);
Zheng Baoeb75f652010-04-23 17:32:48 +0000726
Timothy Pearsonb8a355d2015-09-05 17:55:58 -0500727 /* 2.8.9.9.2 (7)
728 * Loop over all delay values up to 1 MEMCLK (0x40 delay steps) from the initial delay values
729 *
730 * FIXME
731 * It is not clear if training should be discontinued if any test failures occur in the first
732 * 1 MEMCLK window, or if it should be discontinued if no successes occur in the first 1 MEMCLK
733 * window. Therefore, loop over up to 2 MEMCLK (0x80 delay steps) to be on the safe side.
734 */
735 uint16_t current_delay_step;
736
737 for (current_delay_step = 0; current_delay_step < 0x80; current_delay_step++) {
738 print_debug_dqs("\t\t\tTrainRcvEn541: current_delay_step ", current_delay_step, 3);
739
740 /* 2.8.9.9.2 (7 D)
741 * Terminate if all lanes are trained
Zheng Baoeb75f652010-04-23 17:32:48 +0000742 */
Timothy Pearsonb8a355d2015-09-05 17:55:58 -0500743 uint8_t all_lanes_trained = 1;
744 for (lane = 0; lane < 8; lane++)
745 if (!trained[lane])
746 all_lanes_trained = 0;
Zheng Baoeb75f652010-04-23 17:32:48 +0000747
Timothy Pearsonb8a355d2015-09-05 17:55:58 -0500748 if (all_lanes_trained)
749 break;
Zheng Baoeb75f652010-04-23 17:32:48 +0000750
Timothy Pearsonb8a355d2015-09-05 17:55:58 -0500751 /* 2.8.9.9.2 (7 A)
Timothy Pearson730a0432015-10-16 13:51:51 -0500752 * Loop over all ranks
753 */
Timothy Pearsonb8a355d2015-09-05 17:55:58 -0500754 for (rank = 0; rank < (_2Ranks + 1); rank++) {
755 /* 2.8.9.9.2 (7 A a-d)
756 * Read the first test address of the current rank
757 * Store the first data beat for analysis
758 * Reset read pointer in the DRAM controller FIFO
759 * Read the second test address of the current rank
760 * Store the first data beat for analysis
761 * Reset read pointer in the DRAM controller FIFO
762 */
763 if (rank & 1) {
764 /* 2.8.9.9.2 (7 D)
765 * Invert read instructions to alternate data read order on the bus
766 */
767 proc_IOCLFLUSH_D((rank == 0)?TestAddr0B:TestAddr1B);
768 result_qword2 = read64_fs(convert_testaddr_and_channel_to_address(pDCTstat, (rank == 0)?TestAddr0B:TestAddr1B, Channel));
Timothy Pearson730a0432015-10-16 13:51:51 -0500769 write_dqs_receiver_enable_control_registers(current_total_delay, dev, Channel, dimm, index_reg);
Timothy Pearsonb8a355d2015-09-05 17:55:58 -0500770 proc_IOCLFLUSH_D((rank == 0)?TestAddr0:TestAddr1);
771 result_qword1 = read64_fs(convert_testaddr_and_channel_to_address(pDCTstat, (rank == 0)?TestAddr0:TestAddr1, Channel));
Timothy Pearson730a0432015-10-16 13:51:51 -0500772 write_dqs_receiver_enable_control_registers(current_total_delay, dev, Channel, dimm, index_reg);
Timothy Pearsonb8a355d2015-09-05 17:55:58 -0500773 } else {
774 proc_IOCLFLUSH_D((rank == 0)?TestAddr0:TestAddr1);
775 result_qword1 = read64_fs(convert_testaddr_and_channel_to_address(pDCTstat, (rank == 0)?TestAddr0:TestAddr1, Channel));
Timothy Pearson730a0432015-10-16 13:51:51 -0500776 write_dqs_receiver_enable_control_registers(current_total_delay, dev, Channel, dimm, index_reg);
Timothy Pearsonb8a355d2015-09-05 17:55:58 -0500777 proc_IOCLFLUSH_D((rank == 0)?TestAddr0B:TestAddr1B);
778 result_qword2 = read64_fs(convert_testaddr_and_channel_to_address(pDCTstat, (rank == 0)?TestAddr0B:TestAddr1B, Channel));
Timothy Pearson730a0432015-10-16 13:51:51 -0500779 write_dqs_receiver_enable_control_registers(current_total_delay, dev, Channel, dimm, index_reg);
Timothy Pearsonb8a355d2015-09-05 17:55:58 -0500780 }
781 /* 2.8.9.9.2 (7 A e)
782 * Compare both read patterns and flag passing ranks/lanes
783 */
784 uint8_t result_lane_byte1;
785 uint8_t result_lane_byte2;
786 for (lane = 0; lane < 8; lane++) {
787 if (trained[lane] == 1) {
788#if DQS_TRAIN_DEBUG > 0
789 print_debug_dqs("\t\t\t\t\t\t\t\t lane already trained: ", lane, 4);
790#endif
791 continue;
792 }
Zheng Baoeb75f652010-04-23 17:32:48 +0000793
Timothy Pearsonb8a355d2015-09-05 17:55:58 -0500794 result_lane_byte1 = (result_qword1 >> (lane * 8)) & 0xff;
795 result_lane_byte2 = (result_qword2 >> (lane * 8)) & 0xff;
796 if ((result_lane_byte1 == 0x55) && (result_lane_byte2 == 0xaa))
797 data_test_pass_sr[rank][lane] = 1;
798 else
799 data_test_pass_sr[rank][lane] = 0;
800#if DQS_TRAIN_DEBUG > 0
801 print_debug_dqs_pair("\t\t\t\t\t\t\t\t ", 0x55, " | ", result_lane_byte1, 4);
802 print_debug_dqs_pair("\t\t\t\t\t\t\t\t ", 0xaa, " | ", result_lane_byte2, 4);
803#endif
Zheng Baoeb75f652010-04-23 17:32:48 +0000804 }
805 }
Zheng Baoeb75f652010-04-23 17:32:48 +0000806
Timothy Pearsonb8a355d2015-09-05 17:55:58 -0500807 /* 2.8.9.9.2 (7 B)
808 * If DIMM is dual rank, only use delays that pass testing for both ranks
809 */
810 for (lane = 0; lane < 8; lane++) {
811 if (_2Ranks) {
812 if ((data_test_pass_sr[0][lane]) && (data_test_pass_sr[1][lane]))
813 data_test_pass[lane] = 1;
814 else
815 data_test_pass[lane] = 0;
816 } else {
817 data_test_pass[lane] = data_test_pass_sr[0][lane];
818 }
819 }
Zheng Baoeb75f652010-04-23 17:32:48 +0000820
Timothy Pearsonb8a355d2015-09-05 17:55:58 -0500821 /* 2.8.9.9.2 (7 E)
822 * For each lane, update the DQS receiver delay setting in support of next iteration
823 */
824 for (lane = 0; lane < 8; lane++) {
825 if (trained[lane] == 1)
826 continue;
Zheng Baoeb75f652010-04-23 17:32:48 +0000827
Timothy Pearsonb8a355d2015-09-05 17:55:58 -0500828 /* 2.8.9.9.2 (7 C a)
829 * Save the total delay of the first success after a failure for later use
830 */
831 if ((data_test_pass[lane] == 1) && (data_test_pass_prev[lane] == 0)) {
832 candidate_total_delay[lane] = current_total_delay[lane];
833 window_det_toggle[lane] = 0;
834 }
835
836 /* 2.8.9.9.2 (7 C b)
837 * If the current delay failed testing add 1/8 UI to the current delay
838 */
839 if (data_test_pass[lane] == 0)
840 current_total_delay[lane] += 0x4;
841
842 /* 2.8.9.9.2 (7 C c)
843 * If the current delay passed testing alternately add either 1/32 UI or 1/4 UI to the current delay
844 * If 1.25 UI of delay have been added with no failures the lane is considered trained
845 */
846 if (data_test_pass[lane] == 1) {
847 /* See if lane is trained */
848 if ((current_total_delay[lane] - candidate_total_delay[lane]) >= 0x28) {
849 trained[lane] = 1;
850
851 /* Calculate and set final lane delay value
852 * The final delay is the candidate delay + 7/8 UI
853 */
854 current_total_delay[lane] = candidate_total_delay[lane] + 0x1c;
855 } else {
856 if (window_det_toggle[lane] == 0) {
857 current_total_delay[lane] += 0x1;
858 window_det_toggle[lane] = 1;
859 } else {
860 current_total_delay[lane] += 0x8;
861 window_det_toggle[lane] = 0;
862 }
Zheng Baoeb75f652010-04-23 17:32:48 +0000863 }
864 }
865 }
866
Timothy Pearsonb8a355d2015-09-05 17:55:58 -0500867 /* Update delays in hardware */
Timothy Pearson730a0432015-10-16 13:51:51 -0500868 write_dqs_receiver_enable_control_registers(current_total_delay, dev, Channel, dimm, index_reg);
Timothy Pearsonb8a355d2015-09-05 17:55:58 -0500869
870 /* Save previous results for comparison in the next iteration */
871 for (lane = 0; lane < 8; lane++)
872 data_test_pass_prev[lane] = data_test_pass[lane];
873 }
874
875#if DQS_TRAIN_DEBUG > 0
876 for (lane = 0; lane < 8; lane++)
877 print_debug_dqs_pair("\t\tTrainRcvEn55: Lane ", lane, " current_total_delay ", current_total_delay[lane], 2);
878#endif
879
880 /* Find highest delay value and save for later use */
881 for (lane = 0; lane < 8; lane++)
882 if (current_total_delay[lane] > CTLRMaxDelay)
883 CTLRMaxDelay = current_total_delay[lane];
884
885 /* See if any lanes failed training, and set error flags appropriately
886 * For all trained lanes, save delay values for later use
887 */
888 for (lane = 0; lane < 8; lane++) {
889 if (trained[lane]) {
890 pDCTstat->CH_D_B_RCVRDLY[Channel][Receiver >> 1][lane] = current_total_delay[lane];
891 } else {
892 printk(BIOS_WARNING, "TrainRcvrEn: WARNING: Lane %d of receiver %d on channel %d failed training!\n", lane, Receiver, Channel);
893
894 /* Set error flags */
895 pDCTstat->ErrStatus |= 1 << SB_NORCVREN;
896 Errors |= 1 << SB_NORCVREN;
897 pDCTstat->ErrCode = SC_FatalErr;
898 pDCTstat->CSTrainFail |= 1 << Receiver;
899 pDCTstat->DimmTrainFail |= 1 << (Receiver + Channel);
Zheng Baoeb75f652010-04-23 17:32:48 +0000900 }
Zheng Baoeb75f652010-04-23 17:32:48 +0000901 }
902
Timothy Pearsonb8a355d2015-09-05 17:55:58 -0500903 /* 2.8.9.9.2 (8)
904 * Flush the receiver FIFO
905 * Write one full cache line of non-0x55/0xaa data to one of the test addresses, then read it back to flush the FIFO
906 */
Zheng Baoeb75f652010-04-23 17:32:48 +0000907
Timothy Pearsonb8a355d2015-09-05 17:55:58 -0500908 WriteLNTestPattern(TestAddr0 << 8, (uint8_t *)TestPattern2_D, 1);
909 mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0);
910 }
Zheng Baoeb75f652010-04-23 17:32:48 +0000911 MaxDelay_CH[Channel] = CTLRMaxDelay;
Timothy Pearsonb8a355d2015-09-05 17:55:58 -0500912 }
Zheng Baoeb75f652010-04-23 17:32:48 +0000913
914 CTLRMaxDelay = MaxDelay_CH[0];
915 if (MaxDelay_CH[1] > CTLRMaxDelay)
916 CTLRMaxDelay = MaxDelay_CH[1];
917
918 for (Channel = 0; Channel < 2; Channel++) {
919 mct_SetMaxLatency_D(pDCTstat, Channel, CTLRMaxDelay); /* program Ch A/B MaxAsyncLat to correspond with max delay */
920 }
921
Timothy Pearson730a0432015-10-16 13:51:51 -0500922 for (Channel = 0; Channel < 2; Channel++) {
923 ResetDCTWrPtr_D(dev, Channel, index_reg, Addl_Index);
924 }
925
926 if(_DisableDramECC) {
927 mct_EnableDimmEccEn_D(pMCTstat, pDCTstat, _DisableDramECC);
928 }
929
930 if (Pass == FirstPass) {
931 /*Disable DQSRcvrEn training mode */
932 mct_DisableDQSRcvEn_D(pDCTstat);
933 }
934
935 if(!_Wrap32Dis) {
936 msr = HWCR;
937 _RDMSR(msr, &lo, &hi);
938 lo &= ~(1<<17); /* restore HWCR.wrap32dis */
939 _WRMSR(msr, lo, hi);
940 }
941 if(!_SSE2){
942 cr4 = read_cr4();
943 cr4 &= ~(1<<9); /* restore cr4.OSFXSR */
944 write_cr4(cr4);
945 }
946
947#if DQS_TRAIN_DEBUG > 0
948 {
949 u8 ChannelDTD;
950 printk(BIOS_DEBUG, "TrainRcvrEn: CH_MaxRdLat:\n");
951 for(ChannelDTD = 0; ChannelDTD<2; ChannelDTD++) {
952 printk(BIOS_DEBUG, "Channel:%x: %x\n",
953 ChannelDTD, pDCTstat->CH_MaxRdLat[ChannelDTD]);
954 }
955 }
956#endif
957
958#if DQS_TRAIN_DEBUG > 0
959 {
960 u16 valDTD;
961 u8 ChannelDTD, ReceiverDTD;
962 u8 i;
963 u16 *p;
964
965 printk(BIOS_DEBUG, "TrainRcvrEn: CH_D_B_RCVRDLY:\n");
966 for(ChannelDTD = 0; ChannelDTD < 2; ChannelDTD++) {
967 printk(BIOS_DEBUG, "Channel:%x\n", ChannelDTD);
968 for(ReceiverDTD = 0; ReceiverDTD<8; ReceiverDTD+=2) {
969 printk(BIOS_DEBUG, "\t\tReceiver:%x:", ReceiverDTD);
970 p = pDCTstat->CH_D_B_RCVRDLY[ChannelDTD][ReceiverDTD>>1];
971 for (i=0;i<8; i++) {
972 valDTD = p[i];
973 printk(BIOS_DEBUG, " %03x", valDTD);
974 }
975 printk(BIOS_DEBUG, "\n");
976 }
977 }
978 }
979#endif
980
981 printk(BIOS_DEBUG, "TrainRcvrEn: Status %x\n", pDCTstat->Status);
982 printk(BIOS_DEBUG, "TrainRcvrEn: ErrStatus %x\n", pDCTstat->ErrStatus);
983 printk(BIOS_DEBUG, "TrainRcvrEn: ErrCode %x\n", pDCTstat->ErrCode);
984 printk(BIOS_DEBUG, "TrainRcvrEn: Done\n\n");
985}
986
987/* DQS Receiver Enable Training Pattern Generation (Family 15h)
988 * Algorithm detailed in:
989 * The Fam15h BKDG Rev. 3.14 section 2.10.5.8.2 (4)
990 */
991static void generate_dram_receiver_enable_training_pattern_fam15(struct MCTStatStruc *pMCTstat,
992 struct DCTStatStruc *pDCTstat, uint8_t dct, uint8_t Receiver)
993{
994 uint32_t dword;
995 uint32_t dev = pDCTstat->dev_dct;
996
997 /* 2.10.5.7.1.1
998 * It appears that the DCT only supports 8-beat burst length mode,
999 * so do nothing here...
1000 */
1001
1002 /* Wait for CmdSendInProg == 0 */
1003 do {
1004 dword = Get_NB32_DCT(dev, dct, 0x250);
1005 } while (dword & (0x1 << 12));
1006
1007 /* Set CmdTestEnable = 1 */
1008 dword = Get_NB32_DCT(dev, dct, 0x250);
1009 dword |= (0x1 << 2);
1010 Set_NB32_DCT(dev, dct, 0x250, dword);
1011
1012 /* 2.10.5.8.6.1.1 Send Activate Command */
1013 dword = Get_NB32_DCT(dev, dct, 0x28c);
1014 dword &= ~(0xff << 22); /* CmdChipSelect = Receiver */
1015 dword |= ((0x1 << Receiver) << 22);
1016 dword &= ~(0x7 << 19); /* CmdBank = 0 */
1017 dword &= ~(0x3ffff); /* CmdAddress = 0 */
1018 dword |= (0x1 << 31); /* SendActCmd = 1 */
1019 Set_NB32_DCT(dev, dct, 0x28c, dword);
1020
1021 /* Wait for SendActCmd == 0 */
1022 do {
1023 dword = Get_NB32_DCT(dev, dct, 0x28c);
1024 } while (dword & (0x1 << 31));
1025
1026 /* Wait 75 MEMCLKs. */
1027 precise_memclk_delay_fam15(pMCTstat, pDCTstat, dct, 75);
1028
1029 /* 2.10.5.8.6.1.2 */
1030 Set_NB32_DCT(dev, dct, 0x274, 0x0); /* DQMask = 0 */
1031 Set_NB32_DCT(dev, dct, 0x278, 0x0);
1032
1033 dword = Get_NB32_DCT(dev, dct, 0x27c);
1034 dword &= ~(0xff); /* EccMask = 0 */
1035 if (pDCTstat->DimmECCPresent == 0)
1036 dword |= 0xff; /* EccMask = 0xff */
1037 Set_NB32_DCT(dev, dct, 0x27c, dword);
1038
1039 /* 2.10.5.8.6.1.2 */
1040 dword = Get_NB32_DCT(dev, dct, 0x270);
1041 dword &= ~(0x7ffff); /* DataPrbsSeed = 55555 */
1042// dword |= (0x55555);
1043 dword |= (0x44443); /* Use AGESA seed */
1044 Set_NB32_DCT(dev, dct, 0x270, dword);
1045
1046 /* 2.10.5.8.2 (4) */
1047 dword = Get_NB32_DCT(dev, dct, 0x260);
1048 dword &= ~(0x1fffff); /* CmdCount = 192 */
1049 dword |= 192;
1050 Set_NB32_DCT(dev, dct, 0x260, dword);
1051
1052#if 0
1053 /* TODO: This applies to Fam15h model 10h and above only */
1054 /* Program Bubble Count and CmdStreamLen */
1055 dword = Get_NB32_DCT(dev, dct, 0x25c);
1056 dword &= ~(0x3ff << 12); /* BubbleCnt = 0 */
1057 dword &= ~(0x3ff << 22); /* BubbleCnt2 = 0 */
1058 dword &= ~(0xff); /* CmdStreamLen = 1 */
1059 dword |= 0x1;
1060 Set_NB32_DCT(dev, dct, 0x25c, dword);
1061#endif
1062
1063 /* Configure Target A */
1064 dword = Get_NB32_DCT(dev, dct, 0x254);
1065 dword &= ~(0x7 << 24); /* TgtChipSelect = Receiver */
1066 dword |= (Receiver & 0x7) << 24;
1067 dword &= ~(0x7 << 21); /* TgtBank = 0 */
1068 dword &= ~(0x3ff); /* TgtAddress = 0 */
1069 Set_NB32_DCT(dev, dct, 0x254, dword);
1070
1071 dword = Get_NB32_DCT(dev, dct, 0x250);
1072 dword |= (0x1 << 3); /* ResetAllErr = 1 */
1073 dword &= ~(0x1 << 4); /* StopOnErr = 0 */
1074 dword &= ~(0x3 << 8); /* CmdTgt = 0 (Target A) */
1075 dword &= ~(0x7 << 5); /* CmdType = 0 (Read) */
1076 dword |= (0x1 << 11); /* SendCmd = 1 */
1077 Set_NB32_DCT(dev, dct, 0x250, dword);
1078
1079 /* 2.10.5.8.6.1.2 Wait for TestStatus == 1 and CmdSendInProg == 0 */
1080 do {
1081 dword = Get_NB32_DCT(dev, dct, 0x250);
1082 } while ((dword & (0x1 << 12)) || (!(dword & (0x1 << 10))));
1083
1084 dword = Get_NB32_DCT(dev, dct, 0x250);
1085 dword &= ~(0x1 << 11); /* SendCmd = 0 */
1086 Set_NB32_DCT(dev, dct, 0x250, dword);
1087
1088 /* 2.10.5.8.6.1.1 Send Precharge Command */
1089 /* Wait 25 MEMCLKs. */
1090 precise_memclk_delay_fam15(pMCTstat, pDCTstat, dct, 25);
1091
1092 dword = Get_NB32_DCT(dev, dct, 0x28c);
1093 dword &= ~(0xff << 22); /* CmdChipSelect = Receiver */
1094 dword |= ((0x1 << Receiver) << 22);
1095 dword &= ~(0x7 << 19); /* CmdBank = 0 */
1096 dword &= ~(0x3ffff); /* CmdAddress = 0x400 */
1097 dword |= 0x400;
1098 dword |= (0x1 << 30); /* SendPchgCmd = 1 */
1099 Set_NB32_DCT(dev, dct, 0x28c, dword);
1100
1101 /* Wait for SendPchgCmd == 0 */
1102 do {
1103 dword = Get_NB32_DCT(dev, dct, 0x28c);
1104 } while (dword & (0x1 << 30));
1105
1106 /* Wait 25 MEMCLKs. */
1107 precise_memclk_delay_fam15(pMCTstat, pDCTstat, dct, 25);
1108
1109 /* Set CmdTestEnable = 0 */
1110 dword = Get_NB32_DCT(dev, dct, 0x250);
1111 dword &= ~(0x1 << 2);
1112 Set_NB32_DCT(dev, dct, 0x250, dword);
1113}
1114
1115/* DQS Receiver Enable Training (Family 15h)
1116 * Algorithm detailed in:
1117 * The Fam15h BKDG Rev. 3.14 section 2.10.5.8.2
1118 * This algorithm runs once at the lowest supported MEMCLK,
1119 * then once again at the highest supported MEMCLK.
1120 */
1121static void dqsTrainRcvrEn_SW_Fam15(struct MCTStatStruc *pMCTstat,
1122 struct DCTStatStruc *pDCTstat, u8 Pass)
1123{
1124 u8 Channel;
1125 u8 _2Ranks;
1126 u8 Addl_Index = 0;
1127 u8 Receiver;
1128 u8 _DisableDramECC = 0, _Wrap32Dis = 0, _SSE2 = 0;
1129 u32 Errors;
1130
1131 u32 val;
1132 u32 dev;
1133 u32 index_reg;
1134 u32 ch_start, ch_end, ch;
1135 u32 msr;
1136 u32 cr4;
1137 u32 lo, hi;
1138
1139 uint32_t dword;
1140 uint8_t dimm;
1141 uint8_t rank;
1142 uint8_t lane;
1143 uint8_t mem_clk;
1144 uint16_t initial_seed;
1145 uint16_t current_total_delay[MAX_BYTE_LANES];
1146 uint16_t dqs_ret_pass1_total_delay[MAX_BYTE_LANES];
1147 uint16_t rank0_current_total_delay[MAX_BYTE_LANES];
1148 uint16_t phase_recovery_delays[MAX_BYTE_LANES];
1149 uint16_t seed[MAX_BYTE_LANES];
1150 uint16_t seed_gross[MAX_BYTE_LANES];
1151 uint16_t seed_fine[MAX_BYTE_LANES];
1152 uint16_t seed_pre_gross[MAX_BYTE_LANES];
1153
1154 uint8_t package_type = mctGet_NVbits(NV_PACK_TYPE);
1155 uint16_t fam15h_freq_tab[] = {0, 0, 0, 0, 333, 0, 400, 0, 0, 0, 533, 0, 0, 0, 667, 0, 0, 0, 800, 0, 0, 0, 933};
1156
1157 print_debug_dqs("\nTrainRcvEn: Node", pDCTstat->Node_ID, 0);
1158 print_debug_dqs("TrainRcvEn: Pass", Pass, 0);
1159
1160 dev = pDCTstat->dev_dct;
1161 index_reg = 0x98;
1162 ch_start = 0;
1163 ch_end = 2;
1164
1165 for (ch = ch_start; ch < ch_end; ch++) {
1166 uint8_t max_rd_latency = 0x55;
1167 uint8_t p_state;
1168
1169 /* 2.10.5.6 */
1170 fam15EnableTrainingMode(pMCTstat, pDCTstat, ch, 1);
1171
1172 /* 2.10.5.2 */
1173 for (p_state = 0; p_state < 3; p_state++) {
1174 val = Get_NB32_DCT_NBPstate(dev, ch, p_state, 0x210);
1175 val &= ~(0x3ff << 22); /* MaxRdLatency = max_rd_latency */
1176 val |= (max_rd_latency & 0x3ff) << 22;
1177 Set_NB32_DCT_NBPstate(dev, ch, p_state, 0x210, val);
1178 }
1179 }
1180
1181 if (Pass != FirstPass) {
1182 pDCTstat->DimmTrainFail = 0;
1183 pDCTstat->CSTrainFail = ~pDCTstat->CSPresent;
1184 }
1185
1186 cr4 = read_cr4();
1187 if(cr4 & ( 1 << 9)) { /* save the old value */
1188 _SSE2 = 1;
1189 }
1190 cr4 |= (1 << 9); /* OSFXSR enable SSE2 */
1191 write_cr4(cr4);
1192
1193 msr = HWCR;
1194 _RDMSR(msr, &lo, &hi);
1195 /* FIXME: Why use SSEDIS */
1196 if(lo & (1 << 17)) { /* save the old value */
1197 _Wrap32Dis = 1;
1198 }
1199 lo |= (1 << 17); /* HWCR.wrap32dis */
1200 lo &= ~(1 << 15); /* SSEDIS */
1201 _WRMSR(msr, lo, hi); /* Setting wrap32dis allows 64-bit memory references in real mode */
1202
1203 _DisableDramECC = mct_DisableDimmEccEn_D(pMCTstat, pDCTstat);
1204
1205 Errors = 0;
1206 dev = pDCTstat->dev_dct;
1207
1208 for (Channel = 0; Channel < 2; Channel++) {
1209 print_debug_dqs("\tTrainRcvEn51: Node ", pDCTstat->Node_ID, 1);
1210 print_debug_dqs("\tTrainRcvEn51: Channel ", Channel, 1);
1211 pDCTstat->Channel = Channel;
1212
1213 mem_clk = Get_NB32_DCT(dev, Channel, 0x94) & 0x1f;
1214
1215 Receiver = mct_InitReceiver_D(pDCTstat, Channel);
1216 /* There are four receiver pairs, loosely associated with chipselects.
1217 * This is essentially looping over each DIMM.
1218 */
1219 for (; Receiver < 8; Receiver += 2) {
1220 Addl_Index = (Receiver >> 1) * 3 + 0x10;
1221 dimm = (Receiver >> 1);
1222
1223 print_debug_dqs("\t\tTrainRcvEnd52: index ", Addl_Index, 2);
1224
1225 if (!mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, Receiver)) {
1226 continue;
1227 }
1228
1229 /* Retrieve the total delay values from pass 1 of DQS receiver enable training */
1230 if (Pass != FirstPass) {
1231 read_dqs_receiver_enable_control_registers(dqs_ret_pass1_total_delay, dev, Channel, dimm, index_reg);
1232 }
1233
1234 /* 2.10.5.8.2
1235 * Loop over all ranks
1236 */
1237 if (mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, Receiver+1))
1238 _2Ranks = 1;
1239 else
1240 _2Ranks = 0;
1241 for (rank = 0; rank < (_2Ranks + 1); rank++) {
1242 /* 2.10.5.8.2 (1)
1243 * Specify the target DIMM to be trained
1244 * Set TrNibbleSel = 0
1245 *
1246 * TODO: Add support for x4 DIMMs
1247 */
1248 dword = Get_NB32_index_wait_DCT(dev, Channel, index_reg, 0x00000008);
1249 dword &= ~(0x3 << 4); /* TrDimmSel */
1250 dword |= ((dimm & 0x3) << 4);
1251 dword &= ~(0x1 << 2); /* TrNibbleSel */
1252 Set_NB32_index_wait_DCT(dev, Channel, index_reg, 0x00000008, dword);
1253
1254 /* 2.10.5.8.2 (2)
1255 * Retrieve gross and fine timing fields from write DQS registers
1256 */
1257 read_dqs_write_timing_control_registers(current_total_delay, dev, Channel, dimm, index_reg);
1258
1259 /* 2.10.5.8.2.1
1260 * Generate the DQS Receiver Enable Training Seed Values
1261 */
1262 if (Pass == FirstPass) {
1263 initial_seed = fam15_receiver_enable_training_seed(pDCTstat, Channel, dimm, rank, package_type);
1264
1265 /* Adjust seed for the minimum platform supported frequency */
1266 initial_seed = (uint16_t) (((((uint64_t) initial_seed) *
1267 fam15h_freq_tab[mem_clk] * 100) / (mctGet_NVbits(NV_MIN_MEMCLK) * 100)));
1268
1269 for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
1270 uint16_t wl_pass1_delay;
1271 wl_pass1_delay = current_total_delay[lane];
1272
1273 seed[lane] = initial_seed + wl_pass1_delay;
1274 }
1275 } else {
1276 uint8_t addr_prelaunch = 0; /* TODO: Fetch the correct value from RC2[0] */
1277 uint16_t register_delay;
1278 int16_t seed_prescaling;
1279
1280 memcpy(current_total_delay, dqs_ret_pass1_total_delay, sizeof(current_total_delay));
1281 if ((pDCTstat->Status & (1 << SB_Registered))) {
1282 if (addr_prelaunch)
1283 register_delay = 0x30;
1284 else
1285 register_delay = 0x20;
1286 } else if ((pDCTstat->Status & (1 << SB_LoadReduced))) {
1287 /* TODO
1288 * Load reduced DIMM support unimplemented
1289 */
1290 register_delay = 0x0;
1291 } else {
1292 register_delay = 0x0;
1293 }
1294
1295 for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
1296 seed_prescaling = current_total_delay[lane] - register_delay - 0x20;
1297 seed[lane] = (uint16_t) (register_delay + ((((uint64_t) seed_prescaling) * fam15h_freq_tab[mem_clk] * 100) / (mctGet_NVbits(NV_MIN_MEMCLK) * 100)));
1298 }
1299 }
1300
1301 for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
1302 seed_gross[lane] = (seed[lane] >> 5) & 0x1f;
1303 seed_fine[lane] = seed[lane] & 0x1f;
1304
1305 /*if (seed_gross[lane] == 0)
1306 seed_pre_gross[lane] = 0;
1307 else */if (seed_gross[lane] & 0x1)
1308 seed_pre_gross[lane] = 1;
1309 else
1310 seed_pre_gross[lane] = 2;
1311
1312 /* Calculate phase recovery delays */
1313 phase_recovery_delays[lane] = ((seed_pre_gross[lane] & 0x1f) << 5) | (seed_fine[lane] & 0x1f);
1314
1315 /* Set the gross delay.
1316 * NOTE: While the BKDG states to only program DqsRcvEnGrossDelay, this appears
1317 * to have been a misprint as DqsRcvEnFineDelay should be set to zero as well.
1318 */
1319 current_total_delay[lane] = ((seed_gross[lane] & 0x1f) << 5);
1320 }
1321
1322 /* 2.10.5.8.2 (2) / 2.10.5.8.2.1 (5 6)
1323 * Program PhRecFineDly and PhRecGrossDly
1324 */
1325 write_dram_phase_recovery_control_registers(phase_recovery_delays, dev, Channel, dimm, index_reg);
1326
1327 /* 2.10.5.8.2 (2) / 2.10.5.8.2.1 (7)
1328 * Program the DQS Receiver Enable delay values for each lane
1329 */
1330 write_dqs_receiver_enable_control_registers(current_total_delay, dev, Channel, dimm, index_reg);
1331
1332 /* 2.10.5.8.2 (3)
1333 * Program DqsRcvTrEn = 1
1334 */
1335 dword = Get_NB32_index_wait_DCT(dev, Channel, index_reg, 0x00000008);
1336 dword |= (0x1 << 13);
1337 Set_NB32_index_wait_DCT(dev, Channel, index_reg, 0x00000008, dword);
1338
1339 /* 2.10.5.8.2 (4)
1340 * Issue 192 read requests to the target rank
1341 */
1342 generate_dram_receiver_enable_training_pattern_fam15(pMCTstat, pDCTstat, Channel, Receiver + (rank & 0x1));
1343
1344 /* 2.10.5.8.2 (5)
1345 * Program DqsRcvTrEn = 0
1346 */
1347 dword = Get_NB32_index_wait_DCT(dev, Channel, index_reg, 0x00000008);
1348 dword &= ~(0x1 << 13);
1349 Set_NB32_index_wait_DCT(dev, Channel, index_reg, 0x00000008, dword);
1350
1351 /* 2.10.5.8.2 (6)
1352 * Read PhRecGrossDly, PhRecFineDly
1353 */
1354 read_dram_phase_recovery_control_registers(phase_recovery_delays, dev, Channel, dimm, index_reg);
1355
1356 /* 2.10.5.8.2 (7)
1357 * Calculate and program the DQS Receiver Enable delay values
1358 */
1359 for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
1360 current_total_delay[lane] = (phase_recovery_delays[lane] & 0x1f);
1361 current_total_delay[lane] |= ((seed_gross[lane] + ((phase_recovery_delays[lane] >> 5) & 0x1f) - seed_pre_gross[lane] + 1) << 5);
1362 if (lane == 8)
1363 pDCTstat->CH_D_BC_RCVRDLY[Channel][dimm] = current_total_delay[lane];
1364 else
1365 pDCTstat->CH_D_B_RCVRDLY[Channel][dimm][lane] = current_total_delay[lane];
1366 }
1367 write_dqs_receiver_enable_control_registers(current_total_delay, dev, Channel, dimm, index_reg);
1368
1369 if (rank == 0) {
1370 /* Back up the Rank 0 delays for later use */
1371 memcpy(rank0_current_total_delay, current_total_delay, sizeof(current_total_delay));
1372 }
1373
1374 if (rank == 1) {
1375 /* 2.10.5.8.2 (8)
1376 * Compute the average delay across both ranks and program the result into
1377 * the DQS Receiver Enable delay registers
1378 */
1379 for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
1380 current_total_delay[lane] = (rank0_current_total_delay[lane] + current_total_delay[lane]) / 2;
1381 if (lane == 8)
1382 pDCTstat->CH_D_BC_RCVRDLY[Channel][dimm] = current_total_delay[lane];
1383 else
1384 pDCTstat->CH_D_B_RCVRDLY[Channel][dimm][lane] = current_total_delay[lane];
1385 }
1386 write_dqs_receiver_enable_control_registers(current_total_delay, dev, Channel, dimm, index_reg);
1387 }
1388 }
1389
1390#if DQS_TRAIN_DEBUG > 0
1391 for (lane = 0; lane < 8; lane++)
1392 print_debug_dqs_pair("\t\tTrainRcvEn55: Lane ", lane, " current_total_delay ", current_total_delay[lane], 2);
1393#endif
1394 }
1395 }
1396
1397 /* Calculate and program MaxRdLatency */
1398 Calc_SetMaxRdLatency_D_Fam15(pMCTstat, pDCTstat, Channel);
Zheng Baoeb75f652010-04-23 17:32:48 +00001399
1400 if(_DisableDramECC) {
1401 mct_EnableDimmEccEn_D(pMCTstat, pDCTstat, _DisableDramECC);
1402 }
1403
1404 if (Pass == FirstPass) {
1405 /*Disable DQSRcvrEn training mode */
1406 mct_DisableDQSRcvEn_D(pDCTstat);
1407 }
1408
1409 if(!_Wrap32Dis) {
1410 msr = HWCR;
1411 _RDMSR(msr, &lo, &hi);
1412 lo &= ~(1<<17); /* restore HWCR.wrap32dis */
1413 _WRMSR(msr, lo, hi);
1414 }
1415 if(!_SSE2){
1416 cr4 = read_cr4();
1417 cr4 &= ~(1<<9); /* restore cr4.OSFXSR */
1418 write_cr4(cr4);
1419 }
1420
1421#if DQS_TRAIN_DEBUG > 0
1422 {
Timothy Pearsonb8a355d2015-09-05 17:55:58 -05001423 u8 ChannelDTD;
Zheng Baoeb75f652010-04-23 17:32:48 +00001424 printk(BIOS_DEBUG, "TrainRcvrEn: CH_MaxRdLat:\n");
Timothy Pearsonb8a355d2015-09-05 17:55:58 -05001425 for(ChannelDTD = 0; ChannelDTD<2; ChannelDTD++) {
Zheng Baoeb75f652010-04-23 17:32:48 +00001426 printk(BIOS_DEBUG, "Channel:%x: %x\n",
Timothy Pearsonb8a355d2015-09-05 17:55:58 -05001427 ChannelDTD, pDCTstat->CH_MaxRdLat[ChannelDTD]);
Zheng Baoeb75f652010-04-23 17:32:48 +00001428 }
1429 }
1430#endif
1431
1432#if DQS_TRAIN_DEBUG > 0
1433 {
Timothy Pearsonb8a355d2015-09-05 17:55:58 -05001434 u16 valDTD;
1435 u8 ChannelDTD, ReceiverDTD;
Zheng Baoeb75f652010-04-23 17:32:48 +00001436 u8 i;
Timothy Pearsonb8a355d2015-09-05 17:55:58 -05001437 u16 *p;
Zheng Baoeb75f652010-04-23 17:32:48 +00001438
1439 printk(BIOS_DEBUG, "TrainRcvrEn: CH_D_B_RCVRDLY:\n");
Timothy Pearsonb8a355d2015-09-05 17:55:58 -05001440 for(ChannelDTD = 0; ChannelDTD < 2; ChannelDTD++) {
1441 printk(BIOS_DEBUG, "Channel:%x\n", ChannelDTD);
1442 for(ReceiverDTD = 0; ReceiverDTD<8; ReceiverDTD+=2) {
1443 printk(BIOS_DEBUG, "\t\tReceiver:%x:", ReceiverDTD);
1444 p = pDCTstat->CH_D_B_RCVRDLY[ChannelDTD][ReceiverDTD>>1];
Zheng Baoeb75f652010-04-23 17:32:48 +00001445 for (i=0;i<8; i++) {
Timothy Pearsonb8a355d2015-09-05 17:55:58 -05001446 valDTD = p[i];
1447 printk(BIOS_DEBUG, " %03x", valDTD);
Zheng Baoeb75f652010-04-23 17:32:48 +00001448 }
1449 printk(BIOS_DEBUG, "\n");
1450 }
1451 }
1452 }
1453#endif
1454
1455 printk(BIOS_DEBUG, "TrainRcvrEn: Status %x\n", pDCTstat->Status);
1456 printk(BIOS_DEBUG, "TrainRcvrEn: ErrStatus %x\n", pDCTstat->ErrStatus);
1457 printk(BIOS_DEBUG, "TrainRcvrEn: ErrCode %x\n", pDCTstat->ErrCode);
1458 printk(BIOS_DEBUG, "TrainRcvrEn: Done\n\n");
1459}
1460
1461u8 mct_InitReceiver_D(struct DCTStatStruc *pDCTstat, u8 dct)
1462{
1463 if (pDCTstat->DIMMValidDCT[dct] == 0 ) {
1464 return 8;
1465 } else {
1466 return 0;
1467 }
1468}
1469
Zheng Baoeb75f652010-04-23 17:32:48 +00001470static void mct_DisableDQSRcvEn_D(struct DCTStatStruc *pDCTstat)
1471{
1472 u8 ch_end, ch;
1473 u32 reg;
1474 u32 dev;
1475 u32 val;
1476
1477 dev = pDCTstat->dev_dct;
1478 if (pDCTstat->GangedMode) {
1479 ch_end = 1;
1480 } else {
1481 ch_end = 2;
1482 }
1483
1484 for (ch=0; ch<ch_end; ch++) {
Timothy Pearson730a0432015-10-16 13:51:51 -05001485 reg = 0x78;
1486 val = Get_NB32_DCT(dev, ch, reg);
Zheng Baoeb75f652010-04-23 17:32:48 +00001487 val &= ~(1 << DqsRcvEnTrain);
Timothy Pearson730a0432015-10-16 13:51:51 -05001488 Set_NB32_DCT(dev, ch, reg, val);
Zheng Baoeb75f652010-04-23 17:32:48 +00001489 }
1490}
1491
1492/* mct_ModifyIndex_D
1493 * Function only used once so it was inlined.
1494 */
1495
1496/* mct_GetInitFlag_D
1497 * Function only used once so it was inlined.
1498 */
1499
Timothy Pearsonb8a355d2015-09-05 17:55:58 -05001500/* Set F2x[1, 0]9C_x[2B:10] DRAM DQS Receiver Enable Timing Control Registers
1501 * See BKDG Rev. 3.62 page 268 for more information
1502 */
1503void mct_SetRcvrEnDly_D(struct DCTStatStruc *pDCTstat, u16 RcvrEnDly,
Zheng Baoeb75f652010-04-23 17:32:48 +00001504 u8 FinalValue, u8 Channel, u8 Receiver, u32 dev,
1505 u32 index_reg, u8 Addl_Index, u8 Pass)
1506{
1507 u32 index;
1508 u8 i;
Timothy Pearsonb8a355d2015-09-05 17:55:58 -05001509 u16 *p;
Zheng Baoeb75f652010-04-23 17:32:48 +00001510 u32 val;
1511
Timothy Pearsonb8a355d2015-09-05 17:55:58 -05001512 if(RcvrEnDly == 0x1fe) {
1513 /*set the boundary flag */
Zheng Baoeb75f652010-04-23 17:32:48 +00001514 pDCTstat->Status |= 1 << SB_DQSRcvLimit;
1515 }
1516
1517 /* DimmOffset not needed for CH_D_B_RCVRDLY array */
1518 for(i=0; i < 8; i++) {
1519 if(FinalValue) {
1520 /*calculate dimm offset */
1521 p = pDCTstat->CH_D_B_RCVRDLY[Channel][Receiver >> 1];
1522 RcvrEnDly = p[i];
1523 }
1524
1525 /* if flag=0, set DqsRcvEn value to reg. */
1526 /* get the register index from table */
1527 index = Table_DQSRcvEn_Offset[i >> 1];
1528 index += Addl_Index; /* DIMMx DqsRcvEn byte0 */
Timothy Pearson730a0432015-10-16 13:51:51 -05001529 val = Get_NB32_index_wait_DCT(dev, Channel, index_reg, index);
Zheng Baoeb75f652010-04-23 17:32:48 +00001530 if(i & 1) {
1531 /* odd byte lane */
Timothy Pearsonb8a355d2015-09-05 17:55:58 -05001532 val &= ~(0x1ff << 16);
1533 val |= ((RcvrEnDly & 0x1ff) << 16);
Zheng Baoeb75f652010-04-23 17:32:48 +00001534 } else {
1535 /* even byte lane */
Timothy Pearsonb8a355d2015-09-05 17:55:58 -05001536 val &= ~0x1ff;
1537 val |= (RcvrEnDly & 0x1ff);
Zheng Baoeb75f652010-04-23 17:32:48 +00001538 }
Timothy Pearson730a0432015-10-16 13:51:51 -05001539 Set_NB32_index_wait_DCT(dev, Channel, index_reg, index, val);
Zheng Baoeb75f652010-04-23 17:32:48 +00001540 }
1541
1542}
1543
Timothy Pearsonb8a355d2015-09-05 17:55:58 -05001544/* Calculate MaxRdLatency
1545 * Algorithm detailed in the Fam10h BKDG Rev. 3.62 section 2.8.9.9.5
1546 */
1547static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u16 DQSRcvEnDly)
Zheng Baoeb75f652010-04-23 17:32:48 +00001548{
1549 u32 dev;
1550 u32 reg;
Timothy Pearsonb8a355d2015-09-05 17:55:58 -05001551 u32 SubTotal;
Zheng Baoeb75f652010-04-23 17:32:48 +00001552 u32 index_reg;
Zheng Baoeb75f652010-04-23 17:32:48 +00001553 u32 val;
Timothy Pearsonb8a355d2015-09-05 17:55:58 -05001554
1555 uint8_t cpu_val_n;
1556 uint8_t cpu_val_p;
1557
1558 u16 freq_tab[] = {400, 533, 667, 800};
1559
1560 /* Set up processor-dependent values */
1561 if (pDCTstat->LogicalCPUID & AMD_DR_Dx) {
1562 /* Revision D and above */
1563 cpu_val_n = 4;
1564 cpu_val_p = 29;
1565 } else if (pDCTstat->LogicalCPUID & AMD_DR_Cx) {
1566 /* Revision C */
1567 uint8_t package_type = mctGet_NVbits(NV_PACK_TYPE);
1568 if ((package_type == PT_L1) /* Socket F (1207) */
1569 || (package_type == PT_M2) /* Socket AM3 */
1570 || (package_type == PT_S1)) { /* Socket S1g<x> */
1571 cpu_val_n = 10;
1572 cpu_val_p = 11;
1573 } else {
1574 cpu_val_n = 4;
1575 cpu_val_p = 29;
1576 }
1577 } else {
1578 /* Revision B and below */
1579 cpu_val_n = 10;
1580 cpu_val_p = 11;
1581 }
Zheng Baoeb75f652010-04-23 17:32:48 +00001582
1583 if(pDCTstat->GangedMode)
1584 Channel = 0;
1585
1586 dev = pDCTstat->dev_dct;
Timothy Pearson730a0432015-10-16 13:51:51 -05001587 index_reg = 0x98;
Zheng Baoeb75f652010-04-23 17:32:48 +00001588
1589 /* Multiply the CAS Latency by two to get a number of 1/2 MEMCLKs units.*/
Timothy Pearson730a0432015-10-16 13:51:51 -05001590 val = Get_NB32_DCT(dev, Channel, 0x88);
Zheng Baoeb75f652010-04-23 17:32:48 +00001591 SubTotal = ((val & 0x0f) + 4) << 1; /* SubTotal is 1/2 Memclk unit */
1592
1593 /* If registered DIMMs are being used then
1594 * add 1 MEMCLK to the sub-total.
1595 */
Timothy Pearson730a0432015-10-16 13:51:51 -05001596 val = Get_NB32_DCT(dev, Channel, 0x90);
Zheng Baoeb75f652010-04-23 17:32:48 +00001597 if(!(val & (1 << UnBuffDimm)))
1598 SubTotal += 2;
1599
1600 /* If the address prelaunch is setup for 1/2 MEMCLKs then
1601 * add 1, else add 2 to the sub-total.
1602 * if (AddrCmdSetup || CsOdtSetup || CkeSetup) then K := K + 2;
1603 */
Timothy Pearson730a0432015-10-16 13:51:51 -05001604 val = Get_NB32_index_wait_DCT(dev, Channel, index_reg, 0x04);
Zheng Baoeb75f652010-04-23 17:32:48 +00001605 if(!(val & 0x00202020))
1606 SubTotal += 1;
1607 else
1608 SubTotal += 2;
1609
1610 /* If the F2x[1, 0]78[RdPtrInit] field is 4, 5, 6 or 7 MEMCLKs,
1611 * then add 4, 3, 2, or 1 MEMCLKs, respectively to the sub-total. */
Timothy Pearson730a0432015-10-16 13:51:51 -05001612 val = Get_NB32_DCT(dev, Channel, 0x78);
Zheng Baoeb75f652010-04-23 17:32:48 +00001613 SubTotal += 8 - (val & 0x0f);
1614
Timothy Pearsonb8a355d2015-09-05 17:55:58 -05001615 /* Convert bits 7-5 (also referred to as the coarse delay) of
Zheng Baoeb75f652010-04-23 17:32:48 +00001616 * the current (or worst case) DQS receiver enable delay to
1617 * 1/2 MEMCLKs units, rounding up, and add this to the sub-total.
1618 */
Timothy Pearsonb8a355d2015-09-05 17:55:58 -05001619 SubTotal += DQSRcvEnDly >> 5; /* Retrieve gross delay portion of value */
Zheng Baoeb75f652010-04-23 17:32:48 +00001620
Timothy Pearsonb8a355d2015-09-05 17:55:58 -05001621 /* Add "P" to the sub-total. "P" represents part of the
Zheng Baoeb75f652010-04-23 17:32:48 +00001622 * processor specific constant delay value in the DRAM
1623 * clock domain.
1624 */
1625 SubTotal <<= 1; /*scale 1/2 MemClk to 1/4 MemClk */
Timothy Pearsonb8a355d2015-09-05 17:55:58 -05001626 SubTotal += cpu_val_p; /*add "P" 1/2MemClk */
1627 SubTotal >>= 1; /*scale 1/4 MemClk back to 1/2 MemClk */
Zheng Baoeb75f652010-04-23 17:32:48 +00001628
1629 /* Convert the sub-total (in 1/2 MEMCLKs) to northbridge
Timothy Pearsonb8a355d2015-09-05 17:55:58 -05001630 * clocks (NCLKs)
Zheng Baoeb75f652010-04-23 17:32:48 +00001631 */
Timothy Pearsonb8a355d2015-09-05 17:55:58 -05001632 SubTotal *= 200 * ((Get_NB32(pDCTstat->dev_nbmisc, 0xd4) & 0x1f) + 4);
Timothy Pearson730a0432015-10-16 13:51:51 -05001633 SubTotal /= freq_tab[((Get_NB32_DCT(pDCTstat->dev_dct, Channel, 0x94) & 0x7) - 3)];
Timothy Pearsonb8a355d2015-09-05 17:55:58 -05001634 SubTotal = (SubTotal + (2 - 1)) / 2; /* Round up */
Zheng Baoeb75f652010-04-23 17:32:48 +00001635
Timothy Pearsonb8a355d2015-09-05 17:55:58 -05001636 /* Add "N" NCLKs to the sub-total. "N" represents part of the
Zheng Baoeb75f652010-04-23 17:32:48 +00001637 * processor specific constant value in the northbridge
1638 * clock domain.
1639 */
Timothy Pearsonb8a355d2015-09-05 17:55:58 -05001640 SubTotal += (cpu_val_n) / 2;
Zheng Baoeb75f652010-04-23 17:32:48 +00001641
1642 pDCTstat->CH_MaxRdLat[Channel] = SubTotal;
1643 if(pDCTstat->GangedMode) {
1644 pDCTstat->CH_MaxRdLat[1] = SubTotal;
1645 }
1646
1647 /* Program the F2x[1, 0]78[MaxRdLatency] register with
1648 * the total delay value (in NCLKs).
1649 */
Timothy Pearson730a0432015-10-16 13:51:51 -05001650 reg = 0x78;
1651 val = Get_NB32_DCT(dev, Channel, reg);
Zheng Baoeb75f652010-04-23 17:32:48 +00001652 val &= ~(0x3ff << 22);
1653 val |= (SubTotal & 0x3ff) << 22;
1654
1655 /* program MaxRdLatency to correspond with current delay */
Timothy Pearson730a0432015-10-16 13:51:51 -05001656 Set_NB32_DCT(dev, Channel, reg, val);
Zheng Baoeb75f652010-04-23 17:32:48 +00001657}
1658
Zheng Baoeb75f652010-04-23 17:32:48 +00001659static void mct_InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat,
1660 struct DCTStatStruc *pDCTstat)
1661{
1662 /* Initialize the DQS Positions in preparation for
Zheng Bao3d682fe2010-10-08 03:35:12 +00001663 * Receiver Enable Training.
Zheng Baoeb75f652010-04-23 17:32:48 +00001664 * Write Position is 1/2 Memclock Delay
1665 * Read Position is 1/2 Memclock Delay
1666 */
1667 u8 i;
1668 for(i=0;i<2; i++){
1669 InitDQSPos4RcvrEn_D(pMCTstat, pDCTstat, i);
1670 }
1671}
1672
1673static void InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat,
1674 struct DCTStatStruc *pDCTstat, u8 Channel)
1675{
1676 /* Initialize the DQS Positions in preparation for
Zheng Bao3d682fe2010-10-08 03:35:12 +00001677 * Receiver Enable Training.
Zheng Baoeb75f652010-04-23 17:32:48 +00001678 * Write Position is no Delay
1679 * Read Position is 1/2 Memclock Delay
1680 */
1681
1682 u8 i, j;
1683 u32 dword;
1684 u8 dn = 4; /* TODO: Rev C could be 4 */
1685 u32 dev = pDCTstat->dev_dct;
Timothy Pearson730a0432015-10-16 13:51:51 -05001686 u32 index_reg = 0x98;
Zheng Baoeb75f652010-04-23 17:32:48 +00001687
1688 /* FIXME: add Cx support */
1689 dword = 0x00000000;
1690 for(i=1; i<=3; i++) {
1691 for(j=0; j<dn; j++)
1692 /* DIMM0 Write Data Timing Low */
1693 /* DIMM0 Write ECC Timing */
Timothy Pearson730a0432015-10-16 13:51:51 -05001694 Set_NB32_index_wait_DCT(dev, Channel, index_reg, i + 0x100 * j, dword);
Zheng Baoeb75f652010-04-23 17:32:48 +00001695 }
1696
1697 /* errata #180 */
1698 dword = 0x2f2f2f2f;
1699 for(i=5; i<=6; i++) {
1700 for(j=0; j<dn; j++)
1701 /* DIMM0 Read DQS Timing Control Low */
Timothy Pearson730a0432015-10-16 13:51:51 -05001702 Set_NB32_index_wait_DCT(dev, Channel, index_reg, i + 0x100 * j, dword);
Zheng Baoeb75f652010-04-23 17:32:48 +00001703 }
1704
1705 dword = 0x0000002f;
1706 for(j=0; j<dn; j++)
1707 /* DIMM0 Read DQS ECC Timing Control */
Timothy Pearson730a0432015-10-16 13:51:51 -05001708 Set_NB32_index_wait_DCT(dev, Channel, index_reg, 7 + 0x100 * j, dword);
Zheng Baoeb75f652010-04-23 17:32:48 +00001709}
1710
1711void SetEccDQSRcvrEn_D(struct DCTStatStruc *pDCTstat, u8 Channel)
1712{
1713 u32 dev;
1714 u32 index_reg;
1715 u32 index;
1716 u8 ChipSel;
Timothy Pearsonb8a355d2015-09-05 17:55:58 -05001717 u16 *p;
Zheng Baoeb75f652010-04-23 17:32:48 +00001718 u32 val;
1719
1720 dev = pDCTstat->dev_dct;
Timothy Pearson730a0432015-10-16 13:51:51 -05001721 index_reg = 0x98;
Zheng Baoeb75f652010-04-23 17:32:48 +00001722 index = 0x12;
1723 p = pDCTstat->CH_D_BC_RCVRDLY[Channel];
1724 print_debug_dqs("\t\tSetEccDQSRcvrPos: Channel ", Channel, 2);
1725 for(ChipSel = 0; ChipSel < MAX_CS_SUPPORTED; ChipSel += 2) {
1726 val = p[ChipSel>>1];
Timothy Pearson730a0432015-10-16 13:51:51 -05001727 Set_NB32_index_wait_DCT(dev, Channel, index_reg, index, val);
Zheng Baoeb75f652010-04-23 17:32:48 +00001728 print_debug_dqs_pair("\t\tSetEccDQSRcvrPos: ChipSel ",
1729 ChipSel, " rcvr_delay ", val, 2);
1730 index += 3;
1731 }
1732}
1733
1734static void CalcEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat,
1735 struct DCTStatStruc *pDCTstat, u8 Channel)
1736{
1737 u8 ChipSel;
1738 u16 EccDQSLike;
1739 u8 EccDQSScale;
1740 u32 val, val0, val1;
1741
1742 EccDQSLike = pDCTstat->CH_EccDQSLike[Channel];
1743 EccDQSScale = pDCTstat->CH_EccDQSScale[Channel];
1744
1745 for (ChipSel = 0; ChipSel < MAX_CS_SUPPORTED; ChipSel += 2) {
1746 if(mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, ChipSel)) {
Timothy Pearsonb8a355d2015-09-05 17:55:58 -05001747 u16 *p;
Zheng Baoeb75f652010-04-23 17:32:48 +00001748 p = pDCTstat->CH_D_B_RCVRDLY[Channel][ChipSel>>1];
1749
1750 /* DQS Delay Value of Data Bytelane
1751 * most like ECC byte lane */
1752 val0 = p[EccDQSLike & 0x07];
1753 /* DQS Delay Value of Data Bytelane
1754 * 2nd most like ECC byte lane */
1755 val1 = p[(EccDQSLike>>8) & 0x07];
1756
1757 if (!(pDCTstat->Status & (1 << SB_Registered))) {
1758 if(val0 > val1) {
1759 val = val0 - val1;
1760 } else {
1761 val = val1 - val0;
1762 }
1763
1764 val *= ~EccDQSScale;
1765 val >>= 8; /* /256 */
1766
1767 if(val0 > val1) {
1768 val -= val1;
1769 } else {
1770 val += val0;
1771 }
1772 } else {
1773 val = val1 - val0;
1774 val += val1;
1775 }
1776
Zheng Bao52000e12010-10-01 06:27:35 +00001777 pDCTstat->CH_D_BC_RCVRDLY[Channel][ChipSel>>1] = val;
Zheng Baoeb75f652010-04-23 17:32:48 +00001778 }
1779 }
1780 SetEccDQSRcvrEn_D(pDCTstat, Channel);
1781}
1782
Timothy Pearsonb8a355d2015-09-05 17:55:58 -05001783/* 2.8.9.9.4
1784 * ECC Byte Lane Training
1785 * DQS Receiver Enable Delay
1786 */
Zheng Baoeb75f652010-04-23 17:32:48 +00001787void mctSetEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat,
1788 struct DCTStatStruc *pDCTstatA)
1789{
1790 u8 Node;
1791 u8 i;
1792
1793 for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
1794 struct DCTStatStruc *pDCTstat;
1795 pDCTstat = pDCTstatA + Node;
1796 if (!pDCTstat->NodePresent)
1797 break;
1798 if (pDCTstat->DCTSysLimit) {
Zheng Bao52000e12010-10-01 06:27:35 +00001799 for(i=0; i<2; i++)
1800 CalcEccDQSRcvrEn_D(pMCTstat, pDCTstat, i);
Zheng Baoeb75f652010-04-23 17:32:48 +00001801 }
1802 }
1803}
1804
1805void phyAssistedMemFnceTraining(struct MCTStatStruc *pMCTstat,
1806 struct DCTStatStruc *pDCTstatA)
1807{
1808 u8 Node = 0;
1809 struct DCTStatStruc *pDCTstat;
1810
Timothy Pearson730a0432015-10-16 13:51:51 -05001811 printk(BIOS_DEBUG, "%s: Start\n", __func__);
Zheng Baoeb75f652010-04-23 17:32:48 +00001812
Timothy Pearson730a0432015-10-16 13:51:51 -05001813 /* FIXME: skip for Ax */
1814 for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
1815 pDCTstat = pDCTstatA + Node;
1816 if (!pDCTstat->NodePresent)
1817 continue;
1818
1819 if (pDCTstat->DCTSysLimit) {
1820 if (is_fam15h()) {
1821 /* Fam15h BKDG v3.14 section 2.10.5.3.3
1822 * This picks up where InitDDRPhy left off
1823 */
1824 uint8_t dct;
1825 uint8_t index;
1826 uint32_t dword;
1827 uint32_t datc_backup;
1828 uint32_t training_dword;
1829 uint32_t fence2_config_dword;
1830 uint32_t fence_tx_pad_config_dword;
1831 uint32_t index_reg = 0x98;
1832 uint32_t dev = pDCTstat->dev_dct;
1833
1834 for (dct = 0; dct < 2; dct++) {
1835 if (!pDCTstat->DIMMValidDCT[dct])
1836 continue;
1837
1838 /* Back up D18F2x9C_x0000_0004_dct[1:0] */
1839 datc_backup = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000004);
1840
1841 /* FenceTrSel = 0x2 */
1842 dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000008);
1843 dword &= ~(0x3 << 6);
1844 dword |= (0x2 << 6);
1845 Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000008, dword);
1846
1847 /* Set phase recovery seed values */
1848 Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000050, 0x13131313);
1849 Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000051, 0x13131313);
1850 Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000052, 0x00000013);
1851
1852 training_dword = fenceDynTraining_D(pMCTstat, pDCTstat, dct);
1853
1854 /* Save calculated fence value to the TX DLL */
1855 dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0000000c);
1856 dword &= ~(0x1f << 26);
1857 dword |= ((training_dword & 0x1f) << 26);
1858 Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0000000c, dword);
1859
1860 /* D18F2x9C_x0D0F_0[F,8:0]0F_dct[1:0][AlwaysEnDllClks]=0x1 */
1861 for (index = 0; index < 0x9; index++) {
1862 dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f000f | (index << 8));
1863 dword &= ~(0x7 << 12);
1864 dword |= (0x1 << 12);
1865 Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f000f | (index << 8), dword);
1866 }
1867
1868 /* FenceTrSel = 0x1 */
1869 dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000008);
1870 dword &= ~(0x3 << 6);
1871 dword |= (0x1 << 6);
1872 Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000008, dword);
1873
1874 /* Set phase recovery seed values */
1875 Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000050, 0x13131313);
1876 Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000051, 0x13131313);
1877 Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000052, 0x00000013);
1878
1879 training_dword = fenceDynTraining_D(pMCTstat, pDCTstat, dct);
1880
1881 /* Save calculated fence value to the RX DLL */
1882 dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0000000c);
1883 dword &= ~(0x1f << 21);
1884 dword |= ((training_dword & 0x1f) << 21);
1885 Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0000000c, dword);
1886
1887 /* D18F2x9C_x0D0F_0[F,8:0]0F_dct[1:0][AlwaysEnDllClks]=0x0 */
1888 for (index = 0; index < 0x9; index++) {
1889 dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f000f | (index << 8));
1890 dword &= ~(0x7 << 12);
1891 Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f000f | (index << 8), dword);
1892 }
1893
1894 /* FenceTrSel = 0x3 */
1895 dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000008);
1896 dword &= ~(0x3 << 6);
1897 dword |= (0x3 << 6);
1898 Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000008, dword);
1899
1900 /* Set phase recovery seed values */
1901 Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000050, 0x13131313);
1902 Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000051, 0x13131313);
1903 Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000052, 0x00000013);
1904
1905 fence_tx_pad_config_dword = fenceDynTraining_D(pMCTstat, pDCTstat, dct);
1906
1907 /* Save calculated fence value to the TX Pad */
1908 dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0000000c);
1909 dword &= ~(0x1f << 16);
1910 dword |= ((fence_tx_pad_config_dword & 0x1f) << 16);
1911 Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0000000c, dword);
1912
1913 /* Program D18F2x9C_x0D0F_[C,8,2][2:0]31_dct[1:0] */
1914 training_dword = fence_tx_pad_config_dword;
1915 if (fence_tx_pad_config_dword < 16)
1916 training_dword |= (0x1 << 4);
1917 else
1918 training_dword = 0;
1919 for (index = 0; index < 0x3; index++) {
1920 dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f2031 | (index << 8));
1921 dword &= ~(0x1f);
1922 dword |= (training_dword & 0x1f);
1923 Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f2031 | (index << 8), dword);
1924 }
1925 for (index = 0; index < 0x3; index++) {
1926 dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f8031 | (index << 8));
1927 dword &= ~(0x1f);
1928 dword |= (training_dword & 0x1f);
1929 Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f8031 | (index << 8), dword);
1930 }
1931 for (index = 0; index < 0x3; index++) {
1932 dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0fc031 | (index << 8));
1933 dword &= ~(0x1f);
1934 dword |= (training_dword & 0x1f);
1935 Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0fc031 | (index << 8), dword);
1936 }
1937
1938 /* Assemble Fence2 configuration word (Fam15h BKDG v3.14 page 331) */
1939 dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0000000c);
1940 fence2_config_dword = 0;
1941
1942 /* TxPad */
1943 training_dword = (dword >> 16) & 0x1f;
1944 if (training_dword < 16)
1945 training_dword |= 0x10;
1946 else
1947 training_dword = 0;
1948 fence2_config_dword |= training_dword;
1949
1950 /* RxDll */
1951 training_dword = (dword >> 21) & 0x1f;
1952 if (training_dword < 16)
1953 training_dword |= 0x10;
1954 else
1955 training_dword = 0;
1956 fence2_config_dword |= (training_dword << 10);
1957
1958 /* TxDll */
1959 training_dword = (dword >> 26) & 0x1f;
1960 if (training_dword < 16)
1961 training_dword |= 0x10;
1962 else
1963 training_dword = 0;
1964 fence2_config_dword |= (training_dword << 5);
1965
1966 /* Program D18F2x9C_x0D0F_0[F,8:0]31_dct[1:0] */
1967 for (index = 0; index < 0x9; index++) {
1968 dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f0031 | (index << 8));
1969 dword &= ~(0x7fff);
1970 dword |= fence2_config_dword;
1971 Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f0031 | (index << 8), dword);
1972 }
1973
1974 /* Restore D18F2x9C_x0000_0004_dct[1:0] */
1975 Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000004, datc_backup);
1976 }
1977 } else {
1978 fenceDynTraining_D(pMCTstat, pDCTstat, 0);
1979 fenceDynTraining_D(pMCTstat, pDCTstat, 1);
1980 }
Zheng Baoeb75f652010-04-23 17:32:48 +00001981 }
Zheng Baoeb75f652010-04-23 17:32:48 +00001982 }
Timothy Pearson730a0432015-10-16 13:51:51 -05001983
1984 printk(BIOS_DEBUG, "%s: Done\n", __func__);
Zheng Baoeb75f652010-04-23 17:32:48 +00001985}
1986
Timothy Pearson730a0432015-10-16 13:51:51 -05001987static uint32_t fenceDynTraining_D(struct MCTStatStruc *pMCTstat,
Zheng Baoeb75f652010-04-23 17:32:48 +00001988 struct DCTStatStruc *pDCTstat, u8 dct)
1989{
1990 u16 avRecValue;
1991 u32 val;
1992 u32 dev;
Timothy Pearson730a0432015-10-16 13:51:51 -05001993 u32 index_reg = 0x98;
Zheng Baoeb75f652010-04-23 17:32:48 +00001994 u32 index;
1995
Zheng Baoeb75f652010-04-23 17:32:48 +00001996 dev = pDCTstat->dev_dct;
Timothy Pearson730a0432015-10-16 13:51:51 -05001997
1998 if (is_fam15h()) {
1999 /* Set F2x[1,0]9C_x08[PhyFenceTrEn] */
2000 val = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x08);
2001 val |= 1 << PhyFenceTrEn;
2002 Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x08, val);
2003
2004 /* Wait 2000 MEMCLKs */
2005 precise_memclk_delay_fam15(pMCTstat, pDCTstat, dct, 2000);
2006
2007 /* Clear F2x[1,0]9C_x08[PhyFenceTrEn] */
2008 val = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x08);
2009 val &= ~(1 << PhyFenceTrEn);
2010 Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x08, val);
2011
2012 /* BIOS reads the phase recovery engine registers
2013 * F2x[1,0]9C_x[51:50] and F2x[1,0]9C_x52.
2014 * Average the fine delay components only.
2015 */
2016 avRecValue = 0;
2017 for (index = 0x50; index <= 0x52; index++) {
2018 val = Get_NB32_index_wait_DCT(dev, dct, index_reg, index);
2019 avRecValue += val & 0x1f;
2020 if (index != 0x52) {
2021 avRecValue += (val >> 8) & 0x1f;
2022 avRecValue += (val >> 16) & 0x1f;
2023 avRecValue += (val >> 24) & 0x1f;
2024 }
Zheng Baoeb75f652010-04-23 17:32:48 +00002025 }
Zheng Baoeb75f652010-04-23 17:32:48 +00002026
Timothy Pearson730a0432015-10-16 13:51:51 -05002027 val = avRecValue / 9;
2028 if (avRecValue % 9)
2029 val++;
2030 avRecValue = val;
Zheng Baoeb75f652010-04-23 17:32:48 +00002031
Timothy Pearson730a0432015-10-16 13:51:51 -05002032 if (avRecValue < 6)
2033 avRecValue = 0;
2034 else
2035 avRecValue -= 6;
Zheng Baoeb75f652010-04-23 17:32:48 +00002036
Timothy Pearson730a0432015-10-16 13:51:51 -05002037 return avRecValue;
2038 } else {
2039 /* BIOS first programs a seed value to the phase recovery engine
2040 * (recommended 19) registers.
2041 * Dram Phase Recovery Control Register (F2x[1,0]9C_x[51:50] and
2042 * F2x[1,0]9C_x52.) .
2043 */
2044 for (index = 0x50; index <= 0x52; index ++) {
2045 val = (FenceTrnFinDlySeed & 0x1F);
2046 if (index != 0x52) {
2047 val |= val << 8 | val << 16 | val << 24;
2048 }
2049 Set_NB32_index_wait_DCT(dev, dct, index_reg, index, val);
Zheng Baoeb75f652010-04-23 17:32:48 +00002050 }
Timothy Pearson730a0432015-10-16 13:51:51 -05002051
2052 /* Set F2x[1,0]9C_x08[PhyFenceTrEn]=1. */
2053 val = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x08);
2054 val |= 1 << PhyFenceTrEn;
2055 Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x08, val);
2056
2057 /* Wait 200 MEMCLKs. */
2058 mct_Wait(50000); /* wait 200us */
2059
2060 /* Clear F2x[1,0]9C_x08[PhyFenceTrEn]=0. */
2061 val = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x08);
2062 val &= ~(1 << PhyFenceTrEn);
2063 Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x08, val);
2064
2065 /* BIOS reads the phase recovery engine registers
2066 * F2x[1,0]9C_x[51:50] and F2x[1,0]9C_x52. */
2067 avRecValue = 0;
2068 for (index = 0x50; index <= 0x52; index ++) {
2069 val = Get_NB32_index_wait_DCT(dev, dct, index_reg, index);
2070 avRecValue += val & 0x7F;
2071 if (index != 0x52) {
2072 avRecValue += (val >> 8) & 0x7F;
2073 avRecValue += (val >> 16) & 0x7F;
2074 avRecValue += (val >> 24) & 0x7F;
2075 }
2076 }
2077
2078 val = avRecValue / 9;
2079 if (avRecValue % 9)
2080 val++;
2081 avRecValue = val;
2082
2083 /* Write the (averaged value -8) to F2x[1,0]9C_x0C[PhyFence]. */
2084 /* inlined mct_AdjustFenceValue() */
2085 /* TODO: The RBC0 is not supported. */
2086 /* if (pDCTstat->LogicalCPUID & AMD_RB_C0)
2087 avRecValue -= 3;
2088 else
2089 */
2090 if (pDCTstat->LogicalCPUID & AMD_DR_Dx)
2091 avRecValue -= 8;
2092 else if (pDCTstat->LogicalCPUID & AMD_DR_Cx)
2093 avRecValue -= 8;
2094 else if (pDCTstat->LogicalCPUID & AMD_DR_Bx)
2095 avRecValue -= 8;
2096
2097 val = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0C);
2098 val &= ~(0x1F << 16);
2099 val |= (avRecValue & 0x1F) << 16;
2100 Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0C, val);
2101
2102 /* Rewrite F2x[1,0]9C_x04-DRAM Address/Command Timing Control Register
2103 * delays (both channels).
2104 */
2105 val = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x04);
2106 Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x04, val);
2107
2108 return avRecValue;
Zheng Baoeb75f652010-04-23 17:32:48 +00002109 }
Zheng Baoeb75f652010-04-23 17:32:48 +00002110}
2111
2112void mct_Wait(u32 cycles)
2113{
2114 u32 saved;
2115 u32 hi, lo, msr;
2116
2117 /* Wait # of 50ns cycles
2118 This seems like a hack to me... */
2119
2120 cycles <<= 3; /* x8 (number of 1.25ns ticks) */
2121
2122 msr = 0x10; /* TSC */
2123 _RDMSR(msr, &lo, &hi);
2124 saved = lo;
2125 do {
2126 _RDMSR(msr, &lo, &hi);
2127 } while (lo - saved < cycles );
2128}