nb/intel/x4x: Implement both read and write training

This training find the optimal write DQ delay and read DQS delay
settings. It does so on all lanes at the same time, like
vendor (training each lane individually has poor results).

The results are stored in the sysinfo struct and restored on next
boots and S3 resume.

This potentially increases stability as optimal settings are chosen
and is more necessary for DDR3 raminit where the write DQS delays are
leveled/variable due to the flyby topology.

TESTED on Intel DG43GT with (2G + 1G) on each channel, see that the
results are quite close to the safe original ones (that previous
worked fine) and tested with memtest86+.

Change-Id: Iacdc63b91b4705d1a80437314bfe55385ea5b6c1
Signed-off-by: Arthur Heymans <arthur@aheymans.xyz>
Reviewed-on: https://review.coreboot.org/22329
Tested-by: build bot (Jenkins) <no-reply@coreboot.org>
Reviewed-by: Felix Held <felix-coreboot@felixheld.de>
diff --git a/src/northbridge/intel/x4x/raminit_ddr2.c b/src/northbridge/intel/x4x/raminit_ddr2.c
index b967583..a36242b 100644
--- a/src/northbridge/intel/x4x/raminit_ddr2.c
+++ b/src/northbridge/intel/x4x/raminit_ddr2.c
@@ -293,7 +293,7 @@
  * All finer DQ and DQS DLL settings are set to the same value
  * for each rank in a channel, while coarse is common.
  */
-static void dqsset(u8 ch, u8 lane, const struct dll_setting *setting)
+void dqsset(u8 ch, u8 lane, const struct dll_setting *setting)
 {
 	int rank;
 
@@ -320,7 +320,7 @@
 	}
 }
 
-static void dqset(u8 ch, u8 lane, const struct dll_setting *setting)
+void dqset(u8 ch, u8 lane, const struct dll_setting *setting)
 {
 	int rank;
 	MCHBAR32(0x400 * ch + 0x5fc) = (MCHBAR32(0x400 * ch + 0x5fc)
@@ -346,12 +346,12 @@
 	}
 }
 
-static void rt_set_dqs(u8 channel, u8 lane, u8 rank,
+void rt_set_dqs(u8 channel, u8 lane, u8 rank,
 		struct rt_dqs_setting *dqs_setting)
 {
 	u16 saved_tap = MCHBAR16(0x540 + 0x400 * channel + lane * 4);
 	u16 saved_pi = MCHBAR16(0x542 + 0x400 * channel + lane * 4);
-	printk(RAM_SPEW, "RT DQS: ch%d, L%d, %d.%d\n", channel, lane,
+	printk(RAM_SPEW, "RT DQS: ch%d, r%d, L%d: %d.%d\n", channel, rank, lane,
 		dqs_setting->tap,
 		dqs_setting->pi);
 
@@ -1680,9 +1680,14 @@
 
 	// XXX tRD
 
-	// XXX Write training
-
-	// XXX Read training
+	if (!fast_boot) {
+		if (s->selected_timings.mem_clk > MEM_CLOCK_667MHz) {
+			if(do_write_training(s))
+				die("DQ write training failed!");
+		}
+		if (do_read_training(s))
+			die("DQS read training failed!");
+	}
 
 	// DRADRB
 	dradrb_ddr2(s);