Merge branch 'master' of http://git.denx.de/u-boot-sunxi
diff --git a/arch/arm/cpu/armv7/sunxi/dram.c b/arch/arm/cpu/armv7/sunxi/dram.c
index 0f1ceec..584f742 100644
--- a/arch/arm/cpu/armv7/sunxi/dram.c
+++ b/arch/arm/cpu/armv7/sunxi/dram.c
@@ -36,18 +36,39 @@
 #define CPU_CFG_CHIP_REV_B 0x3
 
 /*
- * Wait up to 1s for mask to be clear in given reg.
+ * Wait up to 1s for value to be set in given part of reg.
  */
-static void await_completion(u32 *reg, u32 mask)
+static void await_completion(u32 *reg, u32 mask, u32 val)
 {
 	unsigned long tmo = timer_get_us() + 1000000;
 
-	while (readl(reg) & mask) {
+	while ((readl(reg) & mask) != val) {
 		if (timer_get_us() > tmo)
 			panic("Timeout initialising DRAM\n");
 	}
 }
 
+/*
+ * Wait up to 1s for mask to be clear in given reg.
+ */
+static inline void await_bits_clear(u32 *reg, u32 mask)
+{
+	await_completion(reg, mask, 0);
+}
+
+/*
+ * Wait up to 1s for mask to be set in given reg.
+ */
+static inline void await_bits_set(u32 *reg, u32 mask)
+{
+	await_completion(reg, mask, mask);
+}
+
+/*
+ * This performs the external DRAM reset by driving the RESET pin low and
+ * then high again. According to the DDR3 spec, the RESET pin needs to be
+ * kept low for at least 200 us.
+ */
 static void mctl_ddr3_reset(void)
 {
 	struct sunxi_dram_reg *dram =
@@ -64,15 +85,28 @@
 	if ((reg_val & CPU_CFG_CHIP_VER_MASK) !=
 	    CPU_CFG_CHIP_VER(CPU_CFG_CHIP_REV_A)) {
 		setbits_le32(&dram->mcr, DRAM_MCR_RESET);
-		udelay(2);
+		udelay(200);
 		clrbits_le32(&dram->mcr, DRAM_MCR_RESET);
 	} else
 #endif
 	{
 		clrbits_le32(&dram->mcr, DRAM_MCR_RESET);
-		udelay(2);
+		udelay(200);
 		setbits_le32(&dram->mcr, DRAM_MCR_RESET);
 	}
+	/* After the RESET pin is de-asserted, the DDR3 spec requires to wait
+	 * for additional 500 us before driving the CKE pin (Clock Enable)
+	 * high. The duration of this delay can be configured in the SDR_IDCR
+	 * (Initialization Delay Configuration Register) and applied
+	 * automatically by the DRAM controller during the DDR3 initialization
+	 * step. But SDR_IDCR has limited range on sun4i/sun5i hardware and
+	 * can't provide sufficient delay at DRAM clock frequencies higher than
+	 * 524 MHz (while Allwinner A13 supports DRAM clock frequency up to
+	 * 533 MHz according to the datasheet). Additionally, there is no
+	 * official documentation for the SDR_IDCR register anywhere, and
+	 * there is always a chance that we are interpreting it wrong.
+	 * Better be safe than sorry, so add an explicit delay here. */
+	udelay(500);
 }
 
 static void mctl_set_drive(void)
@@ -102,6 +136,14 @@
 	clrbits_le32(&dram->ccr, DRAM_CCR_ITM_OFF);
 }
 
+static void mctl_itm_reset(void)
+{
+	mctl_itm_disable();
+	udelay(1); /* ITM reset needs a bit of delay */
+	mctl_itm_enable();
+	udelay(1);
+}
+
 static void mctl_enable_dll0(u32 phase)
 {
 	struct sunxi_dram_reg *dram = (struct sunxi_dram_reg *)SUNXI_DRAMC_BASE;
@@ -118,23 +160,28 @@
 	udelay(22);
 }
 
+/* Get the number of DDR byte lanes */
+static u32 mctl_get_number_of_lanes(void)
+{
+	struct sunxi_dram_reg *dram = (struct sunxi_dram_reg *)SUNXI_DRAMC_BASE;
+	if ((readl(&dram->dcr) & DRAM_DCR_BUS_WIDTH_MASK) ==
+				DRAM_DCR_BUS_WIDTH(DRAM_DCR_BUS_WIDTH_32BIT))
+		return 4;
+	else
+		return 2;
+}
+
 /*
  * Note: This differs from pm/standby in that it checks the bus width
  */
 static void mctl_enable_dllx(u32 phase)
 {
 	struct sunxi_dram_reg *dram = (struct sunxi_dram_reg *)SUNXI_DRAMC_BASE;
-	u32 i, n, bus_width;
+	u32 i, number_of_lanes;
 
-	bus_width = readl(&dram->dcr);
+	number_of_lanes = mctl_get_number_of_lanes();
 
-	if ((bus_width & DRAM_DCR_BUS_WIDTH_MASK) ==
-	    DRAM_DCR_BUS_WIDTH(DRAM_DCR_BUS_WIDTH_32BIT))
-		n = DRAM_DCR_NR_DLLCR_32BIT;
-	else
-		n = DRAM_DCR_NR_DLLCR_16BIT;
-
-	for (i = 1; i < n; i++) {
+	for (i = 1; i <= number_of_lanes; i++) {
 		clrsetbits_le32(&dram->dllcr[i], 0xf << 14,
 				(phase & 0xf) << 14);
 		clrsetbits_le32(&dram->dllcr[i], DRAM_DLLCR_NRESET,
@@ -143,12 +190,12 @@
 	}
 	udelay(2);
 
-	for (i = 1; i < n; i++)
+	for (i = 1; i <= number_of_lanes; i++)
 		clrbits_le32(&dram->dllcr[i], DRAM_DLLCR_NRESET |
 			     DRAM_DLLCR_DISABLE);
 	udelay(22);
 
-	for (i = 1; i < n; i++)
+	for (i = 1; i <= number_of_lanes; i++)
 		clrsetbits_le32(&dram->dllcr[i], DRAM_DLLCR_DISABLE,
 				DRAM_DLLCR_NRESET);
 	udelay(22);
@@ -201,11 +248,20 @@
 		writel(hpcr_value[i], &dram->hpcr[i]);
 }
 
-static void mctl_setup_dram_clock(u32 clk)
+static void mctl_setup_dram_clock(u32 clk, u32 mbus_clk)
 {
 	u32 reg_val;
 	struct sunxi_ccm_reg *ccm = (struct sunxi_ccm_reg *)SUNXI_CCM_BASE;
 
+	/* PLL5P and PLL6 are the potential clock sources for MBUS */
+	u32 pll6x_div, pll5p_div;
+	u32 pll6x_clk = clock_get_pll6() / 1000000;
+	u32 pll5p_clk = clk / 24 * 48;
+	u32 pll5p_rate, pll6x_rate;
+#ifdef CONFIG_SUN7I
+	pll6x_clk *= 2; /* sun7i uses PLL6*2, sun5i uses just PLL6 */
+#endif
+
 	/* setup DRAM PLL */
 	reg_val = readl(&ccm->pll5_cfg);
 	reg_val &= ~CCM_PLL5_CTRL_M_MASK;		/* set M to 0 (x1) */
@@ -213,41 +269,40 @@
 	reg_val &= ~CCM_PLL5_CTRL_N_MASK;		/* set N to 0 (x0) */
 	reg_val &= ~CCM_PLL5_CTRL_P_MASK;		/* set P to 0 (x1) */
 	if (clk >= 540 && clk < 552) {
-		/* dram = 540MHz, pll5p = 540MHz */
+		/* dram = 540MHz, pll5p = 1080MHz */
+		pll5p_clk = 1080;
 		reg_val |= CCM_PLL5_CTRL_M(CCM_PLL5_CTRL_M_X(2));
 		reg_val |= CCM_PLL5_CTRL_K(CCM_PLL5_CTRL_K_X(3));
 		reg_val |= CCM_PLL5_CTRL_N(CCM_PLL5_CTRL_N_X(15));
-		reg_val |= CCM_PLL5_CTRL_P(1);
 	} else if (clk >= 512 && clk < 528) {
-		/* dram = 512MHz, pll5p = 384MHz */
+		/* dram = 512MHz, pll5p = 1536MHz */
+		pll5p_clk = 1536;
 		reg_val |= CCM_PLL5_CTRL_M(CCM_PLL5_CTRL_M_X(3));
 		reg_val |= CCM_PLL5_CTRL_K(CCM_PLL5_CTRL_K_X(4));
 		reg_val |= CCM_PLL5_CTRL_N(CCM_PLL5_CTRL_N_X(16));
-		reg_val |= CCM_PLL5_CTRL_P(2);
 	} else if (clk >= 496 && clk < 504) {
-		/* dram = 496MHz, pll5p = 372MHz */
+		/* dram = 496MHz, pll5p = 1488MHz */
+		pll5p_clk = 1488;
 		reg_val |= CCM_PLL5_CTRL_M(CCM_PLL5_CTRL_M_X(3));
 		reg_val |= CCM_PLL5_CTRL_K(CCM_PLL5_CTRL_K_X(2));
 		reg_val |= CCM_PLL5_CTRL_N(CCM_PLL5_CTRL_N_X(31));
-		reg_val |= CCM_PLL5_CTRL_P(2);
 	} else if (clk >= 468 && clk < 480) {
-		/* dram = 468MHz, pll5p = 468MHz */
+		/* dram = 468MHz, pll5p = 936MHz */
+		pll5p_clk = 936;
 		reg_val |= CCM_PLL5_CTRL_M(CCM_PLL5_CTRL_M_X(2));
 		reg_val |= CCM_PLL5_CTRL_K(CCM_PLL5_CTRL_K_X(3));
 		reg_val |= CCM_PLL5_CTRL_N(CCM_PLL5_CTRL_N_X(13));
-		reg_val |= CCM_PLL5_CTRL_P(1);
 	} else if (clk >= 396 && clk < 408) {
-		/* dram = 396MHz, pll5p = 396MHz */
+		/* dram = 396MHz, pll5p = 792MHz */
+		pll5p_clk = 792;
 		reg_val |= CCM_PLL5_CTRL_M(CCM_PLL5_CTRL_M_X(2));
 		reg_val |= CCM_PLL5_CTRL_K(CCM_PLL5_CTRL_K_X(3));
 		reg_val |= CCM_PLL5_CTRL_N(CCM_PLL5_CTRL_N_X(11));
-		reg_val |= CCM_PLL5_CTRL_P(1);
 	} else 	{
 		/* any other frequency that is a multiple of 24 */
 		reg_val |= CCM_PLL5_CTRL_M(CCM_PLL5_CTRL_M_X(2));
 		reg_val |= CCM_PLL5_CTRL_K(CCM_PLL5_CTRL_K_X(2));
 		reg_val |= CCM_PLL5_CTRL_N(CCM_PLL5_CTRL_N_X(clk / 24));
-		reg_val |= CCM_PLL5_CTRL_P(CCM_PLL5_CTRL_P_X(2));
 	}
 	reg_val &= ~CCM_PLL5_CTRL_VCO_GAIN;		/* PLL VCO Gain off */
 	reg_val |= CCM_PLL5_CTRL_EN;			/* PLL On */
@@ -264,20 +319,30 @@
 	clrbits_le32(&ccm->ahb_gate0, CCM_AHB_GATE_GPS);
 #endif
 
-#if defined(CONFIG_SUN5I) || defined(CONFIG_SUN7I)
 	/* setup MBUS clock */
-	reg_val = CCM_MBUS_CTRL_GATE |
-#ifdef CONFIG_SUN7I
-		  CCM_MBUS_CTRL_CLK_SRC(CCM_MBUS_CTRL_CLK_SRC_PLL6) |
-		  CCM_MBUS_CTRL_N(CCM_MBUS_CTRL_N_X(2)) |
-		  CCM_MBUS_CTRL_M(CCM_MBUS_CTRL_M_X(2));
-#else /* defined(CONFIG_SUN5I) */
-		  CCM_MBUS_CTRL_CLK_SRC(CCM_MBUS_CTRL_CLK_SRC_PLL5) |
-		  CCM_MBUS_CTRL_N(CCM_MBUS_CTRL_N_X(1)) |
-		  CCM_MBUS_CTRL_M(CCM_MBUS_CTRL_M_X(2));
-#endif
+	if (!mbus_clk)
+		mbus_clk = 300;
+	pll6x_div = DIV_ROUND_UP(pll6x_clk, mbus_clk);
+	pll5p_div = DIV_ROUND_UP(pll5p_clk, mbus_clk);
+	pll6x_rate = pll6x_clk / pll6x_div;
+	pll5p_rate = pll5p_clk / pll5p_div;
+
+	if (pll6x_div <= 16 && pll6x_rate > pll5p_rate) {
+		/* use PLL6 as the MBUS clock source */
+		reg_val = CCM_MBUS_CTRL_GATE |
+			  CCM_MBUS_CTRL_CLK_SRC(CCM_MBUS_CTRL_CLK_SRC_PLL6) |
+			  CCM_MBUS_CTRL_N(CCM_MBUS_CTRL_N_X(1)) |
+			  CCM_MBUS_CTRL_M(CCM_MBUS_CTRL_M_X(pll6x_div));
+	} else if (pll5p_div <= 16) {
+		/* use PLL5P as the MBUS clock source */
+		reg_val = CCM_MBUS_CTRL_GATE |
+			  CCM_MBUS_CTRL_CLK_SRC(CCM_MBUS_CTRL_CLK_SRC_PLL5) |
+			  CCM_MBUS_CTRL_N(CCM_MBUS_CTRL_N_X(1)) |
+			  CCM_MBUS_CTRL_M(CCM_MBUS_CTRL_M_X(pll5p_div));
+	} else {
+		panic("Bad mbus_clk\n");
+	}
 	writel(reg_val, &ccm->mbus_clk_cfg);
-#endif
 
 	/*
 	 * open DRAMC AHB & DLL register clock
@@ -299,19 +364,48 @@
 	udelay(22);
 }
 
+/*
+ * The data from rslrX and rdgrX registers (X=rank) is stored
+ * in a single 32-bit value using the following format:
+ *   bits [31:26] - DQS gating system latency for byte lane 3
+ *   bits [25:24] - DQS gating phase select for byte lane 3
+ *   bits [23:18] - DQS gating system latency for byte lane 2
+ *   bits [17:16] - DQS gating phase select for byte lane 2
+ *   bits [15:10] - DQS gating system latency for byte lane 1
+ *   bits [ 9:8 ] - DQS gating phase select for byte lane 1
+ *   bits [ 7:2 ] - DQS gating system latency for byte lane 0
+ *   bits [ 1:0 ] - DQS gating phase select for byte lane 0
+ */
+static void mctl_set_dqs_gating_delay(int rank, u32 dqs_gating_delay)
+{
+	struct sunxi_dram_reg *dram = (struct sunxi_dram_reg *)SUNXI_DRAMC_BASE;
+	u32 lane, number_of_lanes = mctl_get_number_of_lanes();
+	/* rank0 gating system latency (3 bits per lane: cycles) */
+	u32 slr = readl(rank == 0 ? &dram->rslr0 : &dram->rslr1);
+	/* rank0 gating phase select (2 bits per lane: 90, 180, 270, 360) */
+	u32 dgr = readl(rank == 0 ? &dram->rdgr0 : &dram->rdgr1);
+	for (lane = 0; lane < number_of_lanes; lane++) {
+		u32 tmp = dqs_gating_delay >> (lane * 8);
+		slr &= ~(7 << (lane * 3));
+		slr |= ((tmp >> 2) & 7) << (lane * 3);
+		dgr &= ~(3 << (lane * 2));
+		dgr |= (tmp & 3) << (lane * 2);
+	}
+	writel(slr, rank == 0 ? &dram->rslr0 : &dram->rslr1);
+	writel(dgr, rank == 0 ? &dram->rdgr0 : &dram->rdgr1);
+}
+
 static int dramc_scan_readpipe(void)
 {
 	struct sunxi_dram_reg *dram = (struct sunxi_dram_reg *)SUNXI_DRAMC_BASE;
 	u32 reg_val;
 
 	/* data training trigger */
-#ifdef CONFIG_SUN7I
 	clrbits_le32(&dram->csr, DRAM_CSR_FAILED);
-#endif
 	setbits_le32(&dram->ccr, DRAM_CCR_DATA_TRAINING);
 
 	/* check whether data training process has completed */
-	await_completion(&dram->ccr, DRAM_CCR_DATA_TRAINING);
+	await_bits_clear(&dram->ccr, DRAM_CCR_DATA_TRAINING);
 
 	/* check data training result */
 	reg_val = readl(&dram->csr);
@@ -321,117 +415,6 @@
 	return 0;
 }
 
-static int dramc_scan_dll_para(void)
-{
-	struct sunxi_dram_reg *dram = (struct sunxi_dram_reg *)SUNXI_DRAMC_BASE;
-	const u32 dqs_dly[7] = {0x3, 0x2, 0x1, 0x0, 0xe, 0xd, 0xc};
-	const u32 clk_dly[15] = {0x07, 0x06, 0x05, 0x04, 0x03,
-				 0x02, 0x01, 0x00, 0x08, 0x10,
-				 0x18, 0x20, 0x28, 0x30, 0x38};
-	u32 clk_dqs_count[15];
-	u32 dqs_i, clk_i, cr_i;
-	u32 max_val, min_val;
-	u32 dqs_index, clk_index;
-
-	/* Find DQS_DLY Pass Count for every CLK_DLY */
-	for (clk_i = 0; clk_i < 15; clk_i++) {
-		clk_dqs_count[clk_i] = 0;
-		clrsetbits_le32(&dram->dllcr[0], 0x3f << 6,
-				(clk_dly[clk_i] & 0x3f) << 6);
-		for (dqs_i = 0; dqs_i < 7; dqs_i++) {
-			for (cr_i = 1; cr_i < 5; cr_i++) {
-				clrsetbits_le32(&dram->dllcr[cr_i],
-						0x4f << 14,
-						(dqs_dly[dqs_i] & 0x4f) << 14);
-			}
-			udelay(2);
-			if (dramc_scan_readpipe() == 0)
-				clk_dqs_count[clk_i]++;
-		}
-	}
-	/* Test DQS_DLY Pass Count for every CLK_DLY from up to down */
-	for (dqs_i = 15; dqs_i > 0; dqs_i--) {
-		max_val = 15;
-		min_val = 15;
-		for (clk_i = 0; clk_i < 15; clk_i++) {
-			if (clk_dqs_count[clk_i] == dqs_i) {
-				max_val = clk_i;
-				if (min_val == 15)
-					min_val = clk_i;
-			}
-		}
-		if (max_val < 15)
-			break;
-	}
-
-	/* Check if Find a CLK_DLY failed */
-	if (!dqs_i)
-		goto fail;
-
-	/* Find the middle index of CLK_DLY */
-	clk_index = (max_val + min_val) >> 1;
-	if ((max_val == (15 - 1)) && (min_val > 0))
-		/* if CLK_DLY[MCTL_CLK_DLY_COUNT] is very good, then the middle
-		 * value can be more close to the max_val
-		 */
-		clk_index = (15 + clk_index) >> 1;
-	else if ((max_val < (15 - 1)) && (min_val == 0))
-		/* if CLK_DLY[0] is very good, then the middle value can be more
-		 * close to the min_val
-		 */
-		clk_index >>= 1;
-	if (clk_dqs_count[clk_index] < dqs_i)
-		clk_index = min_val;
-
-	/* Find the middle index of DQS_DLY for the CLK_DLY got above, and Scan
-	 * read pipe again
-	 */
-	clrsetbits_le32(&dram->dllcr[0], 0x3f << 6,
-			(clk_dly[clk_index] & 0x3f) << 6);
-	max_val = 7;
-	min_val = 7;
-	for (dqs_i = 0; dqs_i < 7; dqs_i++) {
-		clk_dqs_count[dqs_i] = 0;
-		for (cr_i = 1; cr_i < 5; cr_i++) {
-			clrsetbits_le32(&dram->dllcr[cr_i],
-					0x4f << 14,
-					(dqs_dly[dqs_i] & 0x4f) << 14);
-		}
-		udelay(2);
-		if (dramc_scan_readpipe() == 0) {
-			clk_dqs_count[dqs_i] = 1;
-			max_val = dqs_i;
-			if (min_val == 7)
-				min_val = dqs_i;
-		}
-	}
-
-	if (max_val < 7) {
-		dqs_index = (max_val + min_val) >> 1;
-		if ((max_val == (7-1)) && (min_val > 0))
-			dqs_index = (7 + dqs_index) >> 1;
-		else if ((max_val < (7-1)) && (min_val == 0))
-			dqs_index >>= 1;
-		if (!clk_dqs_count[dqs_index])
-			dqs_index = min_val;
-		for (cr_i = 1; cr_i < 5; cr_i++) {
-			clrsetbits_le32(&dram->dllcr[cr_i],
-					0x4f << 14,
-					(dqs_dly[dqs_index] & 0x4f) << 14);
-		}
-		udelay(2);
-		return dramc_scan_readpipe();
-	}
-
-fail:
-	clrbits_le32(&dram->dllcr[0], 0x3f << 6);
-	for (cr_i = 1; cr_i < 5; cr_i++)
-		clrbits_le32(&dram->dllcr[cr_i], 0x4f << 14);
-	udelay(2);
-
-	return dramc_scan_readpipe();
-}
-
 static void dramc_clock_output_en(u32 on)
 {
 #if defined(CONFIG_SUN5I) || defined(CONFIG_SUN7I)
@@ -451,48 +434,164 @@
 #endif
 }
 
-static const u16 tRFC_table[2][6] = {
-	/*       256Mb    512Mb    1Gb      2Gb      4Gb      8Gb      */
-	/* DDR2  75ns     105ns    127.5ns  195ns    327.5ns  invalid  */
-	{        77,      108,     131,     200,     336,     336 },
-	/* DDR3  invalid  90ns     110ns    160ns    300ns    350ns    */
-	{        93,      93,      113,     164,     308,     359 }
+/* tRFC in nanoseconds for different densities (from the DDR3 spec) */
+static const u16 tRFC_DDR3_table[6] = {
+	/* 256Mb    512Mb    1Gb      2Gb      4Gb      8Gb */
+	   90,      90,      110,     160,     300,     350
 };
 
-static void dramc_set_autorefresh_cycle(u32 clk, u32 type, u32 density)
+static void dramc_set_autorefresh_cycle(u32 clk, u32 density)
 {
 	struct sunxi_dram_reg *dram = (struct sunxi_dram_reg *)SUNXI_DRAMC_BASE;
 	u32 tRFC, tREFI;
 
-	tRFC = (tRFC_table[type][density] * clk + 1023) >> 10;
+	tRFC = (tRFC_DDR3_table[density] * clk + 999) / 1000;
 	tREFI = (7987 * clk) >> 10;	/* <= 7.8us */
 
 	writel(DRAM_DRR_TREFI(tREFI) | DRAM_DRR_TRFC(tRFC), &dram->drr);
 }
 
-unsigned long dramc_init(struct dram_para *para)
+/* Calculate the value for A11, A10, A9 bits in MR0 (write recovery) */
+static u32 ddr3_write_recovery(u32 clk)
+{
+	u32 twr_ns = 15; /* DDR3 spec says that it is 15ns for all speed bins */
+	u32 twr_ck = (twr_ns * clk + 999) / 1000;
+	if (twr_ck < 5)
+		return 1;
+	else if (twr_ck <= 8)
+		return twr_ck - 4;
+	else if (twr_ck <= 10)
+		return 5;
+	else
+		return 6;
+}
+
+/*
+ * If the dram->ppwrsctl (SDR_DPCR) register has the lowest bit set to 1, this
+ * means that DRAM is currently in self-refresh mode and retaining the old
+ * data. Since we have no idea what to do in this situation yet, just set this
+ * register to 0 and initialize DRAM in the same way as on any normal reboot
+ * (discarding whatever was stored there).
+ *
+ * Note: on sun7i hardware, the highest 16 bits need to be set to 0x1651 magic
+ * value for this write operation to have any effect. On sun5i hadware this
+ * magic value is not necessary. And on sun4i hardware the writes to this
+ * register seem to have no effect at all.
+ */
+static void mctl_disable_power_save(void)
+{
+	struct sunxi_dram_reg *dram = (struct sunxi_dram_reg *)SUNXI_DRAMC_BASE;
+	writel(0x16510000, &dram->ppwrsctl);
+}
+
+/*
+ * After the DRAM is powered up or reset, the DDR3 spec requires to wait at
+ * least 500 us before driving the CKE pin (Clock Enable) high. The dram->idct
+ * (SDR_IDCR) register appears to configure this delay, which gets applied
+ * right at the time when the DRAM initialization is activated in the
+ * 'mctl_ddr3_initialize' function.
+ */
+static void mctl_set_cke_delay(void)
+{
+	struct sunxi_dram_reg *dram = (struct sunxi_dram_reg *)SUNXI_DRAMC_BASE;
+
+	/* The CKE delay is represented in DRAM clock cycles, multiplied by N
+	 * (where N=2 for sun4i/sun5i and N=3 for sun7i). Here it is set to
+	 * the maximum possible value 0x1ffff, just like in the Allwinner's
+	 * boot0 bootloader. The resulting delay value is somewhere between
+	 * ~0.4 ms (sun5i with 648 MHz DRAM clock speed) and ~1.1 ms (sun7i
+	 * with 360 MHz DRAM clock speed). */
+	setbits_le32(&dram->idcr, 0x1ffff);
+}
+
+/*
+ * This triggers the DRAM initialization. It performs sending the mode registers
+ * to the DRAM among other things. Very likely the ZQCL command is also getting
+ * executed (to do the initial impedance calibration on the DRAM side of the
+ * wire). The memory controller and the PHY must be already configured before
+ * calling this function.
+ */
+static void mctl_ddr3_initialize(void)
+{
+	struct sunxi_dram_reg *dram = (struct sunxi_dram_reg *)SUNXI_DRAMC_BASE;
+	setbits_le32(&dram->ccr, DRAM_CCR_INIT);
+	await_bits_clear(&dram->ccr, DRAM_CCR_INIT);
+}
+
+/*
+ * Perform impedance calibration on the DRAM controller side of the wire.
+ */
+static void mctl_set_impedance(u32 zq, u32 odt_en)
+{
+	struct sunxi_dram_reg *dram = (struct sunxi_dram_reg *)SUNXI_DRAMC_BASE;
+	u32 reg_val;
+	u32 zprog = zq & 0xFF, zdata = (zq >> 8) & 0xFFFFF;
+
+#ifndef CONFIG_SUN7I
+	/* Appears that some kind of automatically initiated default
+	 * ZQ calibration is already in progress at this point on sun4i/sun5i
+	 * hardware, but not on sun7i. So it is reasonable to wait for its
+	 * completion before doing anything else. */
+	await_bits_set(&dram->zqsr, DRAM_ZQSR_ZDONE);
+#endif
+
+	/* ZQ calibration is not really useful unless ODT is enabled */
+	if (!odt_en)
+		return;
+
+#ifdef CONFIG_SUN7I
+	/* Enabling ODT in SDR_IOCR on sun7i hardware results in a deadlock
+	 * unless bit 24 is set in SDR_ZQCR1. Not much is known about the
+	 * SDR_ZQCR1 register, but there are hints indicating that it might
+	 * be related to periodic impedance re-calibration. This particular
+	 * magic value is borrowed from the Allwinner boot0 bootloader, and
+	 * using it helps to avoid troubles */
+	writel((1 << 24) | (1 << 1), &dram->zqcr1);
+#endif
+
+	/* Needed at least for sun5i, because it does not self clear there */
+	clrbits_le32(&dram->zqcr0, DRAM_ZQCR0_ZCAL);
+
+	if (zdata) {
+		/* Set the user supplied impedance data */
+		reg_val = DRAM_ZQCR0_ZDEN | zdata;
+		writel(reg_val, &dram->zqcr0);
+		/* no need to wait, this takes effect immediately */
+	} else {
+		/* Do the calibration using the external resistor */
+		reg_val = DRAM_ZQCR0_ZCAL | DRAM_ZQCR0_IMP_DIV(zprog);
+		writel(reg_val, &dram->zqcr0);
+		/* Wait for the new impedance configuration to settle */
+		await_bits_set(&dram->zqsr, DRAM_ZQSR_ZDONE);
+	}
+
+	/* Needed at least for sun5i, because it does not self clear there */
+	clrbits_le32(&dram->zqcr0, DRAM_ZQCR0_ZCAL);
+
+	/* Set I/O configure register */
+	writel(DRAM_IOCR_ODT_EN(odt_en), &dram->iocr);
+}
+
+static unsigned long dramc_init_helper(struct dram_para *para)
 {
 	struct sunxi_dram_reg *dram = (struct sunxi_dram_reg *)SUNXI_DRAMC_BASE;
 	u32 reg_val;
 	u32 density;
 	int ret_val;
 
-	/* check input dram parameter structure */
-	if (!para)
+	/*
+	 * only single rank DDR3 is supported by this code even though the
+	 * hardware can theoretically support DDR2 and up to two ranks
+	 */
+	if (para->type != DRAM_MEMORY_TYPE_DDR3 || para->rank_num != 1)
 		return 0;
 
 	/* setup DRAM relative clock */
-	mctl_setup_dram_clock(para->clock);
+	mctl_setup_dram_clock(para->clock, para->mbus_clock);
 
-#ifdef CONFIG_SUN5I
 	/* Disable any pad power save control */
-	writel(0, &dram->ppwrsctl);
-#endif
+	mctl_disable_power_save();
 
-	/* reset external DRAM */
-#ifndef CONFIG_SUN7I
-	mctl_ddr3_reset();
-#endif
 	mctl_set_drive();
 
 	/* dram clock off */
@@ -507,9 +606,7 @@
 	mctl_enable_dll0(para->tpr3);
 
 	/* configure external DRAM */
-	reg_val = 0x0;
-	if (para->type == DRAM_MEMORY_TYPE_DDR3)
-		reg_val |= DRAM_DCR_TYPE_DDR3;
+	reg_val = DRAM_DCR_TYPE_DDR3;
 	reg_val |= DRAM_DCR_IO_WIDTH(para->io_width >> 3);
 
 	if (para->density == 256)
@@ -534,85 +631,41 @@
 	reg_val |= DRAM_DCR_MODE(DRAM_DCR_MODE_INTERLEAVE);
 	writel(reg_val, &dram->dcr);
 
-#ifdef CONFIG_SUN7I
-	setbits_le32(&dram->zqcr1, (0x1 << 24) | (0x1 << 1));
-	if (para->tpr4 & 0x2)
-		clrsetbits_le32(&dram->zqcr1, (0x1 << 24), (0x1 << 1));
 	dramc_clock_output_en(1);
-#endif
 
-#if (defined(CONFIG_SUN5I) || defined(CONFIG_SUN7I))
-	/* set odt impendance divide ratio */
-	reg_val = ((para->zq) >> 8) & 0xfffff;
-	reg_val |= ((para->zq) & 0xff) << 20;
-	reg_val |= (para->zq) & 0xf0000000;
-	writel(reg_val, &dram->zqcr0);
-#endif
+	mctl_set_impedance(para->zq, para->odt_en);
 
-#ifdef CONFIG_SUN7I
-	/* Set CKE Delay to about 1ms */
-	setbits_le32(&dram->idcr, 0x1ffff);
-#endif
+	mctl_set_cke_delay();
 
-#ifdef CONFIG_SUN7I
-	if ((readl(&dram->ppwrsctl) & 0x1) != 0x1)
-		mctl_ddr3_reset();
-	else
-		setbits_le32(&dram->mcr, DRAM_MCR_RESET);
-#else
-	/* dram clock on */
-	dramc_clock_output_en(1);
-#endif
+	mctl_ddr3_reset();
 
 	udelay(1);
 
-	await_completion(&dram->ccr, DRAM_CCR_INIT);
+	await_bits_clear(&dram->ccr, DRAM_CCR_INIT);
 
 	mctl_enable_dllx(para->tpr3);
 
-#ifdef CONFIG_SUN4I
-	/* set odt impedance divide ratio */
-	reg_val = ((para->zq) >> 8) & 0xfffff;
-	reg_val |= ((para->zq) & 0xff) << 20;
-	reg_val |= (para->zq) & 0xf0000000;
-	writel(reg_val, &dram->zqcr0);
-#endif
-
-#ifdef CONFIG_SUN4I
-	/* set I/O configure register */
-	reg_val = 0x00cc0000;
-	reg_val |= (para->odt_en) & 0x3;
-	reg_val |= ((para->odt_en) & 0x3) << 30;
-	writel(reg_val, &dram->iocr);
-#endif
-
 	/* set refresh period */
-	dramc_set_autorefresh_cycle(para->clock, para->type - 2, density);
+	dramc_set_autorefresh_cycle(para->clock, density);
 
 	/* set timing parameters */
 	writel(para->tpr0, &dram->tpr0);
 	writel(para->tpr1, &dram->tpr1);
 	writel(para->tpr2, &dram->tpr2);
 
-	if (para->type == DRAM_MEMORY_TYPE_DDR3) {
-		reg_val = DRAM_MR_BURST_LENGTH(0x0);
+	reg_val = DRAM_MR_BURST_LENGTH(0x0);
 #if (defined(CONFIG_SUN5I) || defined(CONFIG_SUN7I))
-		reg_val |= DRAM_MR_POWER_DOWN;
+	reg_val |= DRAM_MR_POWER_DOWN;
 #endif
-		reg_val |= DRAM_MR_CAS_LAT(para->cas - 4);
-		reg_val |= DRAM_MR_WRITE_RECOVERY(0x5);
-	} else if (para->type == DRAM_MEMORY_TYPE_DDR2) {
-		reg_val = DRAM_MR_BURST_LENGTH(0x2);
-		reg_val |= DRAM_MR_CAS_LAT(para->cas);
-		reg_val |= DRAM_MR_WRITE_RECOVERY(0x5);
-	}
+	reg_val |= DRAM_MR_CAS_LAT(para->cas - 4);
+	reg_val |= DRAM_MR_WRITE_RECOVERY(ddr3_write_recovery(para->clock));
 	writel(reg_val, &dram->mr);
 
 	writel(para->emr1, &dram->emr);
 	writel(para->emr2, &dram->emr2);
 	writel(para->emr3, &dram->emr3);
 
-	/* set DQS window mode */
+	/* disable drift compensation and set passive DQS window mode */
 	clrsetbits_le32(&dram->ccr, DRAM_CCR_DQS_DRIFT_COMP, DRAM_CCR_DQS_GATE);
 
 #ifdef CONFIG_SUN7I
@@ -620,70 +673,78 @@
 	if (para->tpr4 & 0x1)
 		setbits_le32(&dram->ccr, DRAM_CCR_COMMAND_RATE_1T);
 #endif
-	/* reset external DRAM */
-	setbits_le32(&dram->ccr, DRAM_CCR_INIT);
-	await_completion(&dram->ccr, DRAM_CCR_INIT);
-
-#ifdef CONFIG_SUN7I
-	/* setup zq calibration manual */
-	reg_val = readl(&dram->ppwrsctl);
-	if ((reg_val & 0x1) == 1) {
-		/* super_standby_flag = 1 */
-
-		reg_val = readl(0x01c20c00 + 0x120); /* rtc */
-		reg_val &= 0x000fffff;
-		reg_val |= 0x17b00000;
-		writel(reg_val, &dram->zqcr0);
-
-		/* exit self-refresh state */
-		clrsetbits_le32(&dram->dcr, 0x1f << 27, 0x12 << 27);
-		/* check whether command has been executed */
-		await_completion(&dram->dcr, 0x1 << 31);
-
-		udelay(2);
-
-		/* dram pad hold off */
-		setbits_le32(&dram->ppwrsctl, 0x16510000);
-
-		await_completion(&dram->ppwrsctl, 0x1);
-
-		/* exit self-refresh state */
-		clrsetbits_le32(&dram->dcr, 0x1f << 27, 0x12 << 27);
-
-		/* check whether command has been executed */
-		await_completion(&dram->dcr, 0x1 << 31);
-
-		udelay(2);
-
-		/* issue a refresh command */
-		clrsetbits_le32(&dram->dcr, 0x1f << 27, 0x13 << 27);
-		await_completion(&dram->dcr, 0x1 << 31);
-
-		udelay(2);
-	}
-#endif
+	/* initialize external DRAM */
+	mctl_ddr3_initialize();
 
 	/* scan read pipe value */
 	mctl_itm_enable();
-	if (para->tpr3 & (0x1 << 31)) {
-		ret_val = dramc_scan_dll_para();
-		if (ret_val == 0)
-			para->tpr3 =
-				(((readl(&dram->dllcr[0]) >> 6) & 0x3f) << 16) |
-				(((readl(&dram->dllcr[1]) >> 14) & 0xf) << 0) |
-				(((readl(&dram->dllcr[2]) >> 14) & 0xf) << 4) |
-				(((readl(&dram->dllcr[3]) >> 14) & 0xf) << 8) |
-				(((readl(&dram->dllcr[4]) >> 14) & 0xf) << 12
-				);
-	} else {
-		ret_val = dramc_scan_readpipe();
-	}
+
+	/* Hardware DQS gate training */
+	ret_val = dramc_scan_readpipe();
 
 	if (ret_val < 0)
 		return 0;
 
+	/* allow to override the DQS training results with a custom delay */
+	if (para->dqs_gating_delay)
+		mctl_set_dqs_gating_delay(0, para->dqs_gating_delay);
+
+	/* set the DQS gating window type */
+	if (para->active_windowing)
+		clrbits_le32(&dram->ccr, DRAM_CCR_DQS_GATE);
+	else
+		setbits_le32(&dram->ccr, DRAM_CCR_DQS_GATE);
+
+	mctl_itm_reset();
+
 	/* configure all host port */
 	mctl_configure_hostport();
 
 	return get_ram_size((long *)PHYS_SDRAM_0, PHYS_SDRAM_0_SIZE);
 }
+
+unsigned long dramc_init(struct dram_para *para)
+{
+	unsigned long dram_size, actual_density;
+
+	/* If the dram configuration is not provided, use a default */
+	if (!para)
+		return 0;
+
+	/* if everything is known, then autodetection is not necessary */
+	if (para->io_width && para->bus_width && para->density)
+		return dramc_init_helper(para);
+
+	/* try to autodetect the DRAM bus width and density */
+	para->io_width  = 16;
+	para->bus_width = 32;
+#if defined(CONFIG_SUN4I) || defined(CONFIG_SUN5I)
+	/* only A0-A14 address lines on A10/A13, limiting max density to 4096 */
+	para->density = 4096;
+#else
+	/* all A0-A15 address lines on A20, which allow density 8192 */
+	para->density = 8192;
+#endif
+
+	dram_size = dramc_init_helper(para);
+	if (!dram_size) {
+		/* if 32-bit bus width failed, try 16-bit bus width instead */
+		para->bus_width = 16;
+		dram_size = dramc_init_helper(para);
+		if (!dram_size) {
+			/* if 16-bit bus width also failed, then bail out */
+			return dram_size;
+		}
+	}
+
+	/* check if we need to adjust the density */
+	actual_density = (dram_size >> 17) * para->io_width / para->bus_width;
+
+	if (actual_density != para->density) {
+		/* update the density and re-initialize DRAM again */
+		para->density = actual_density;
+		dram_size = dramc_init_helper(para);
+	}
+
+	return dram_size;
+}
diff --git a/arch/arm/include/asm/arch-sunxi/dram.h b/arch/arm/include/asm/arch-sunxi/dram.h
index 67fbfad..1945f75 100644
--- a/arch/arm/include/asm/arch-sunxi/dram.h
+++ b/arch/arm/include/asm/arch-sunxi/dram.h
@@ -69,6 +69,7 @@
 
 struct dram_para {
 	u32 clock;
+	u32 mbus_clock;
 	u32 type;
 	u32 rank_num;
 	u32 density;
@@ -87,6 +88,8 @@
 	u32 emr1;
 	u32 emr2;
 	u32 emr3;
+	u32 dqs_gating_delay;
+	u32 active_windowing;
 };
 
 #define DRAM_CCR_COMMAND_RATE_1T (0x1 << 5)
@@ -121,9 +124,6 @@
 #define DRAM_DCR_BUS_WIDTH_32BIT 0x3
 #define DRAM_DCR_BUS_WIDTH_16BIT 0x1
 #define DRAM_DCR_BUS_WIDTH_8BIT 0x0
-#define DRAM_DCR_NR_DLLCR_32BIT 5
-#define DRAM_DCR_NR_DLLCR_16BIT 3
-#define DRAM_DCR_NR_DLLCR_8BIT 2
 #define DRAM_DCR_RANK_SEL(n) (((n) & 0x3) << 10)
 #define DRAM_DCR_RANK_SEL_MASK DRAM_DCR_CMD_RANK(0x3)
 #define DRAM_DCR_CMD_RANK_ALL (0x1 << 12)
@@ -132,7 +132,9 @@
 #define DRAM_DCR_MODE_SEQ 0x0
 #define DRAM_DCR_MODE_INTERLEAVE 0x1
 
-#define DRAM_CSR_FAILED (0x1 << 20)
+#define DRAM_CSR_DTERR  (0x1 << 20)
+#define DRAM_CSR_DTIERR (0x1 << 21)
+#define DRAM_CSR_FAILED (DRAM_CSR_DTERR | DRAM_CSR_DTIERR)
 
 #define DRAM_DRR_TRFC(n) ((n) & 0xff)
 #define DRAM_DRR_TREFI(n) (((n) & 0xffff) << 8)
@@ -159,6 +161,10 @@
 
 #define DRAM_ZQCR0_IMP_DIV(n) (((n) & 0xff) << 20)
 #define DRAM_ZQCR0_IMP_DIV_MASK DRAM_ZQCR0_IMP_DIV(0xff)
+#define DRAM_ZQCR0_ZCAL (1 << 31) /* Starts ZQ calibration when set to 1 */
+#define DRAM_ZQCR0_ZDEN (1 << 28) /* Uses ZDATA instead of doing calibration */
+
+#define DRAM_ZQSR_ZDONE (1 << 31) /* ZQ calibration completion flag */
 
 #define DRAM_IOCR_ODT_EN(n) ((((n) & 0x3) << 30) | ((n) & 0x3) << 0)
 #define DRAM_IOCR_ODT_EN_MASK DRAM_IOCR_ODT_EN(0x3)
diff --git a/board/sunxi/Kconfig b/board/sunxi/Kconfig
index b06b5e0..c61c650 100644
--- a/board/sunxi/Kconfig
+++ b/board/sunxi/Kconfig
@@ -1,17 +1,5 @@
 if TARGET_SUN4I
 
-config SYS_CPU
-	string
-	default "armv7"
-
-config SYS_BOARD
-	string
-	default "sunxi"
-
-config SYS_SOC
-	string
-	default "sunxi"
-
 config SYS_CONFIG_NAME
 	string
 	default "sun4i"
@@ -20,18 +8,6 @@
 
 if TARGET_SUN5I
 
-config SYS_CPU
-	string
-	default "armv7"
-
-config SYS_BOARD
-	string
-	default "sunxi"
-
-config SYS_SOC
-	string
-	default "sunxi"
-
 config SYS_CONFIG_NAME
 	string
 	default "sun5i"
@@ -40,6 +16,14 @@
 
 if TARGET_SUN7I
 
+config SYS_CONFIG_NAME
+	string
+	default "sun7i"
+
+endif
+
+if TARGET_SUN4I || TARGET_SUN5I || TARGET_SUN7I
+
 config SYS_CPU
 	string
 	default "armv7"
@@ -52,8 +36,7 @@
 	string
 	default "sunxi"
 
-config SYS_CONFIG_NAME
-	string
-	default "sun7i"
+config FTDFILE
+	string "Default ftdfile env setting for this board"
 
 endif
diff --git a/configs/A10-OLinuXino-Lime_defconfig b/configs/A10-OLinuXino-Lime_defconfig
index a414953..b93ae7d 100644
--- a/configs/A10-OLinuXino-Lime_defconfig
+++ b/configs/A10-OLinuXino-Lime_defconfig
@@ -1,4 +1,5 @@
 CONFIG_SPL=y
 CONFIG_SYS_EXTRA_OPTIONS="A10_OLINUXINO_L,SPL,AXP209_POWER,SUNXI_EMAC,AHCI,SATAPWR=SUNXI_GPC(3),USB_EHCI"
+CONFIG_FTDFILE="sun4i-a10-olinuxino-lime.dtb"
 +S:CONFIG_ARM=y
 +S:CONFIG_TARGET_SUN4I=y
diff --git a/configs/A10s-OLinuXino-M_defconfig b/configs/A10s-OLinuXino-M_defconfig
index 1560ab1..f206970 100644
--- a/configs/A10s-OLinuXino-M_defconfig
+++ b/configs/A10s-OLinuXino-M_defconfig
@@ -1,4 +1,5 @@
 CONFIG_SPL=y
 CONFIG_SYS_EXTRA_OPTIONS="A10S_OLINUXINO_M,SPL,AXP152_POWER,SUNXI_EMAC,USB_EHCI,SUNXI_USB_VBUS0_GPIO=SUNXI_GPB(10)"
+CONFIG_FTDFILE="sun5i-a10s-olinuxino-micro.dtb"
 +S:CONFIG_ARM=y
 +S:CONFIG_TARGET_SUN5I=y
diff --git a/configs/A13-OLinuXinoM_defconfig b/configs/A13-OLinuXinoM_defconfig
index fb510d5..8529dbe 100644
--- a/configs/A13-OLinuXinoM_defconfig
+++ b/configs/A13-OLinuXinoM_defconfig
@@ -1,4 +1,5 @@
 CONFIG_SPL=y
 CONFIG_SYS_EXTRA_OPTIONS="A13_OLINUXINOM,SPL,CONS_INDEX=2,USB_EHCI,SUNXI_USB_VBUS0_GPIO=SUNXI_GPG(11)"
+CONFIG_FTDFILE="sun5i-a13-olinuxino-micro.dtb"
 +S:CONFIG_ARM=y
 +S:CONFIG_TARGET_SUN5I=y
diff --git a/configs/A13-OLinuXino_defconfig b/configs/A13-OLinuXino_defconfig
index ba21136..c3a12cc 100644
--- a/configs/A13-OLinuXino_defconfig
+++ b/configs/A13-OLinuXino_defconfig
@@ -1,4 +1,5 @@
 CONFIG_SPL=y
 CONFIG_SYS_EXTRA_OPTIONS="A13_OLINUXINO,SPL,CONS_INDEX=2,AXP209_POWER,USB_EHCI,SUNXI_USB_VBUS0_GPIO=SUNXI_GPG(11)"
+CONFIG_FTDFILE="sun5i-a13-olinuxino.dtb"
 +S:CONFIG_ARM=y
 +S:CONFIG_TARGET_SUN5I=y
diff --git a/configs/A20-OLinuXino_MICRO_defconfig b/configs/A20-OLinuXino_MICRO_defconfig
index 52a857a..c91319d 100644
--- a/configs/A20-OLinuXino_MICRO_defconfig
+++ b/configs/A20-OLinuXino_MICRO_defconfig
@@ -1,4 +1,5 @@
 CONFIG_SPL=y
 CONFIG_SYS_EXTRA_OPTIONS="A20_OLINUXINO_M,SPL,AXP209_POWER,SUNXI_GMAC,AHCI,SATAPWR=SUNXI_GPB(8),USB_EHCI"
+CONFIG_FTDFILE="sun7i-a20-olinuxino-micro.dtb"
 +S:CONFIG_ARM=y
 +S:CONFIG_TARGET_SUN7I=y
diff --git a/configs/Auxtek-T004_defconfig b/configs/Auxtek-T004_defconfig
index 643e628..193019c 100644
--- a/configs/Auxtek-T004_defconfig
+++ b/configs/Auxtek-T004_defconfig
@@ -1,4 +1,5 @@
 CONFIG_SPL=y
 CONFIG_SYS_EXTRA_OPTIONS="AUXTEK_T004,SPL,AXP152_POWER,USB_EHCI,SUNXI_USB_VBUS0_GPIO=SUNXI_GPG(13)"
+CONFIG_FTDFILE="sun5i-a10s-auxtek-t004.dtb"
 +S:CONFIG_ARM=y
 +S:CONFIG_TARGET_SUN5I=y
diff --git a/configs/Bananapi_defconfig b/configs/Bananapi_defconfig
index be3d815..dc68469 100644
--- a/configs/Bananapi_defconfig
+++ b/configs/Bananapi_defconfig
@@ -1,4 +1,5 @@
 CONFIG_SPL=y
 CONFIG_SYS_EXTRA_OPTIONS="BANANAPI,SPL,AXP209_POWER,SUNXI_GMAC,RGMII,MACPWR=SUNXI_GPH(23),AHCI,USB_EHCI"
+CONFIG_FTDFILE="sun7i-a20-bananapi.dtb"
 +S:CONFIG_ARM=y
 +S:CONFIG_TARGET_SUN7I=y
diff --git a/configs/Cubieboard2_FEL_defconfig b/configs/Cubieboard2_FEL_defconfig
index 63636b0..ae5e25a 100644
--- a/configs/Cubieboard2_FEL_defconfig
+++ b/configs/Cubieboard2_FEL_defconfig
@@ -1,4 +1,5 @@
 CONFIG_SPL=y
 CONFIG_SYS_EXTRA_OPTIONS="CUBIEBOARD2,SPL_FEL,AXP209_POWER,SUNXI_GMAC,AHCI,SATAPWR=SUNXI_GPB(8),USB_EHCI"
+CONFIG_FTDFILE="sun7i-a20-cubieboard2.dtb"
 +S:CONFIG_ARM=y
 +S:CONFIG_TARGET_SUN7I=y
diff --git a/configs/Cubieboard2_defconfig b/configs/Cubieboard2_defconfig
index fa34c51..df87edc 100644
--- a/configs/Cubieboard2_defconfig
+++ b/configs/Cubieboard2_defconfig
@@ -1,4 +1,5 @@
 CONFIG_SPL=y
 CONFIG_SYS_EXTRA_OPTIONS="CUBIEBOARD2,SPL,AXP209_POWER,SUNXI_GMAC,AHCI,SATAPWR=SUNXI_GPB(8),USB_EHCI"
+CONFIG_FTDFILE="sun7i-a20-cubieboard2.dtb"
 +S:CONFIG_ARM=y
 +S:CONFIG_TARGET_SUN7I=y
diff --git a/configs/Cubieboard_defconfig b/configs/Cubieboard_defconfig
index 774029c..57bf045 100644
--- a/configs/Cubieboard_defconfig
+++ b/configs/Cubieboard_defconfig
@@ -1,4 +1,5 @@
 CONFIG_SPL=y
 CONFIG_SYS_EXTRA_OPTIONS="CUBIEBOARD,SPL,AXP209_POWER,SUNXI_EMAC,AHCI,SATAPWR=SUNXI_GPB(8),USB_EHCI"
+CONFIG_FTDFILE="sun4i-a10-cubieboard.dtb"
 +S:CONFIG_ARM=y
 +S:CONFIG_TARGET_SUN4I=y
diff --git a/configs/Cubietruck_FEL_defconfig b/configs/Cubietruck_FEL_defconfig
index 7ebf635..790125a 100644
--- a/configs/Cubietruck_FEL_defconfig
+++ b/configs/Cubietruck_FEL_defconfig
@@ -1,4 +1,5 @@
 CONFIG_SPL=y
 CONFIG_SYS_EXTRA_OPTIONS="CUBIETRUCK,SPL_FEL,AXP209_POWER,SUNXI_GMAC,RGMII,AHCI,SATAPWR=SUNXI_GPH(12),USB_EHCI"
+CONFIG_FTDFILE="sun7i-a20-cubietruck.dtb"
 +S:CONFIG_ARM=y
 +S:CONFIG_TARGET_SUN7I=y
diff --git a/configs/Cubietruck_defconfig b/configs/Cubietruck_defconfig
index fbc9127..4ad82e3 100644
--- a/configs/Cubietruck_defconfig
+++ b/configs/Cubietruck_defconfig
@@ -1,4 +1,5 @@
 CONFIG_SPL=y
 CONFIG_SYS_EXTRA_OPTIONS="CUBIETRUCK,SPL,AXP209_POWER,SUNXI_GMAC,RGMII,AHCI,SATAPWR=SUNXI_GPH(12),USB_EHCI"
+CONFIG_FTDFILE="sun7i-a20-cubietruck.dtb"
 +S:CONFIG_ARM=y
 +S:CONFIG_TARGET_SUN7I=y
diff --git a/configs/Linksprite_pcDuino3_defconfig b/configs/Linksprite_pcDuino3_defconfig
index 10ebcef..22d0446 100644
--- a/configs/Linksprite_pcDuino3_defconfig
+++ b/configs/Linksprite_pcDuino3_defconfig
@@ -1,4 +1,5 @@
 CONFIG_SPL=y
 CONFIG_SYS_EXTRA_OPTIONS="PCDUINO3,SPL,AXP209_POWER,SUNXI_GMAC,AHCI,SATAPWR=SUNXI_GPH(2),USB_EHCI"
+CONFIG_FTDFILE="sun7i-a20-pcduino3.dtb"
 +S:CONFIG_ARM=y
 +S:CONFIG_TARGET_SUN7I=y
diff --git a/configs/Mele_A1000G_defconfig b/configs/Mele_A1000G_defconfig
index 2079279..fa47fae 100644
--- a/configs/Mele_A1000G_defconfig
+++ b/configs/Mele_A1000G_defconfig
@@ -1,4 +1,5 @@
 CONFIG_SPL=y
 CONFIG_SYS_EXTRA_OPTIONS="MELE_A1000G,SPL,AXP209_POWER,SUNXI_EMAC,MACPWR=SUNXI_GPH(15),AHCI,USB_EHCI"
+CONFIG_FTDFILE="sun4i-a10-a1000.dtb"
 +S:CONFIG_ARM=y
 +S:CONFIG_TARGET_SUN4I=y
diff --git a/configs/Mele_A1000_defconfig b/configs/Mele_A1000_defconfig
index 13528ab..d7c156d 100644
--- a/configs/Mele_A1000_defconfig
+++ b/configs/Mele_A1000_defconfig
@@ -1,4 +1,5 @@
 CONFIG_SPL=y
 CONFIG_SYS_EXTRA_OPTIONS="MELE_A1000,SPL,AXP209_POWER,SUNXI_EMAC,MACPWR=SUNXI_GPH(15),AHCI,USB_EHCI"
+CONFIG_FTDFILE="sun4i-a10-a1000.dtb"
 +S:CONFIG_ARM=y
 +S:CONFIG_TARGET_SUN4I=y
diff --git a/configs/Mini-X-1Gb_defconfig b/configs/Mini-X-1Gb_defconfig
index 61fa1de..af0b800 100644
--- a/configs/Mini-X-1Gb_defconfig
+++ b/configs/Mini-X-1Gb_defconfig
@@ -1,4 +1,5 @@
 CONFIG_SPL=y
 CONFIG_SYS_EXTRA_OPTIONS="MINI_X_1GB,SPL,AXP209_POWER,USB_EHCI"
+CONFIG_FTDFILE="sun4i-a10-mini-xplus.dtb"
 +S:CONFIG_ARM=y
 +S:CONFIG_TARGET_SUN4I=y
diff --git a/configs/Mini-X_defconfig b/configs/Mini-X_defconfig
index ceb02c5..ea0c786 100644
--- a/configs/Mini-X_defconfig
+++ b/configs/Mini-X_defconfig
@@ -1,4 +1,5 @@
 CONFIG_SPL=y
 CONFIG_SYS_EXTRA_OPTIONS="MINI_X,SPL,AXP209_POWER,USB_EHCI"
+CONFIG_FTDFILE="sun4i-a10-mini-xplus.dtb"
 +S:CONFIG_ARM=y
 +S:CONFIG_TARGET_SUN4I=y
diff --git a/configs/ba10_tv_box_defconfig b/configs/ba10_tv_box_defconfig
index 02a2538..c9961bd 100644
--- a/configs/ba10_tv_box_defconfig
+++ b/configs/ba10_tv_box_defconfig
@@ -1,4 +1,5 @@
 CONFIG_SPL=y
 CONFIG_SYS_EXTRA_OPTIONS="BA10_TV_BOX,SPL,AXP209_POWER,SUNXI_EMAC,USB_EHCI,SUNXI_USB_VBUS1_GPIO=SUNXI_GPH(12)"
+CONFIG_FTDFILE="sun4i-a10-ba10-tvbox.dtb"
 +S:CONFIG_ARM=y
 +S:CONFIG_TARGET_SUN4I=y
diff --git a/configs/i12-tvbox_defconfig b/configs/i12-tvbox_defconfig
index ff86841..b267312 100644
--- a/configs/i12-tvbox_defconfig
+++ b/configs/i12-tvbox_defconfig
@@ -1,4 +1,5 @@
 CONFIG_SPL=y
 CONFIG_SYS_EXTRA_OPTIONS="I12_TVBOX,SPL,AXP209_POWER,SUNXI_GMAC,MACPWR=SUNXI_GPH(21),USB_EHCI"
+CONFIG_FTDFILE="sun7i-a20-i12-tvbox.dtb"
 +S:CONFIG_ARM=y
 +S:CONFIG_TARGET_SUN7I=y
diff --git a/configs/qt840a_defconfig b/configs/qt840a_defconfig
index acb100c..7360212 100644
--- a/configs/qt840a_defconfig
+++ b/configs/qt840a_defconfig
@@ -1,4 +1,5 @@
 CONFIG_SPL=y
 CONFIG_SYS_EXTRA_OPTIONS="QT840A,SPL,AXP209_POWER,SUNXI_GMAC,MACPWR=SUNXI_GPH(21),USB_EHCI"
+CONFIG_FTDFILE="sun7i-a20-i12-tvbox.dtb"
 +S:CONFIG_ARM=y
 +S:CONFIG_TARGET_SUN7I=y
diff --git a/configs/r7-tv-dongle_defconfig b/configs/r7-tv-dongle_defconfig
index f0f97b0..35b66f0 100644
--- a/configs/r7-tv-dongle_defconfig
+++ b/configs/r7-tv-dongle_defconfig
@@ -1,4 +1,5 @@
 CONFIG_SPL=y
 CONFIG_SYS_EXTRA_OPTIONS="R7DONGLE,SPL,AXP152_POWER,USB_EHCI,SUNXI_USB_VBUS0_GPIO=SUNXI_GPG(13)"
+CONFIG_FTDFILE="sun5i-a10s-r7-tv-dongle.dtb"
 +S:CONFIG_ARM=y
 +S:CONFIG_TARGET_SUN5I=y
diff --git a/include/configs/sunxi-common.h b/include/configs/sunxi-common.h
index 6a3044f..e768921 100644
--- a/include/configs/sunxi-common.h
+++ b/include/configs/sunxi-common.h
@@ -100,10 +100,10 @@
 /* Boot Argument Buffer Size */
 #define CONFIG_SYS_BARGSIZE		CONFIG_SYS_CBSIZE
 
-#define CONFIG_SYS_LOAD_ADDR		0x48000000 /* default load address */
+#define CONFIG_SYS_LOAD_ADDR		0x42000000 /* default load address */
 
 /* standalone support */
-#define CONFIG_STANDALONE_LOAD_ADDR	0x48000000
+#define CONFIG_STANDALONE_LOAD_ADDR	0x42000000
 
 #define CONFIG_SYS_HZ			1000
 
@@ -123,12 +123,8 @@
 #define CONFIG_ENV_OFFSET		(544 << 10) /* (8 + 24 + 512) KiB */
 #define CONFIG_ENV_SIZE			(128 << 10)	/* 128 KiB */
 
-#define CONFIG_EXTRA_ENV_SETTINGS \
-	"bootm_size=0x10000000\0"
-
-#define CONFIG_SYS_BOOT_GET_CMDLINE
-
 #include <config_cmd_default.h>
+#undef CONFIG_CMD_FPGA
 
 #define CONFIG_FAT_WRITE	/* enable write access */
 
@@ -168,10 +164,6 @@
 #define CONFIG_SYS_SPL_MALLOC_START	0x4ff00000
 #define CONFIG_SYS_SPL_MALLOC_SIZE	0x00080000	/* 512 KiB */
 
-#undef CONFIG_CMD_FPGA
-#undef CONFIG_CMD_NET
-#undef CONFIG_CMD_NFS
-
 /* I2C */
 #define CONFIG_SPL_I2C_SUPPORT
 #define CONFIG_SYS_I2C
@@ -207,14 +199,6 @@
 #define CONFIG_PHYLIB
 #endif
 
-#ifdef CONFIG_CMD_NET
-#define CONFIG_CMD_NFS
-#define CONFIG_CMD_DNS
-#define CONFIG_NETCONSOLE
-#define CONFIG_BOOTP_DNS2
-#define CONFIG_BOOTP_SEND_HOSTNAME
-#endif
-
 #ifdef CONFIG_USB_EHCI
 #define CONFIG_CMD_USB
 #define CONFIG_SYS_USB_EHCI_MAX_ROOT_PORTS 1
@@ -232,6 +216,40 @@
 
 #ifndef CONFIG_SPL_BUILD
 #include <config_distro_defaults.h>
+
+/* 256M RAM (minimum), 32M uncompressed kernel, 16M compressed kernel, 1M fdt,
+ * 1M script, 1M pxe and the ramdisk at the end */
+#define MEM_LAYOUT_ENV_SETTINGS \
+	"bootm_size=0x10000000\0" \
+	"kernel_addr_r=0x42000000\0" \
+	"fdt_addr_r=0x43000000\0" \
+	"scriptaddr=0x43100000\0" \
+	"pxefile_addr_r=0x43200000\0" \
+	"ramdisk_addr_r=0x43300000\0"
+
+#ifdef CONFIG_AHCI
+#define BOOT_TARGET_DEVICES_SCSI(func) func(SCSI, scsi, 0)
+#else
+#define BOOT_TARGET_DEVICES_SCSI(func)
+#endif
+
+#define BOOT_TARGET_DEVICES(func) \
+	func(MMC, mmc, 0) \
+	BOOT_TARGET_DEVICES_SCSI(func) \
+	func(USB, usb, 0) \
+	func(PXE, pxe, na) \
+	func(DHCP, dhcp, na)
+
+#include <config_distro_bootcmd.h>
+
+#define CONFIG_EXTRA_ENV_SETTINGS \
+	MEM_LAYOUT_ENV_SETTINGS \
+	"fdtfile=" CONFIG_FTDFILE "\0" \
+	"console=ttyS0,115200\0" \
+	BOOTENV
+
+#else /* ifndef CONFIG_SPL_BUILD */
+#define CONFIG_EXTRA_ENV_SETTINGS
 #endif
 
 #endif /* _SUNXI_COMMON_CONFIG_H */