ehci-hcd.c, musb_core, usb.h: Add USB_DMA_MINALIGN define for cache alignment

The USB spec says that 32 bytes is the minimum required alignment.
However on some platforms we have a larger minimum requirement for cache
coherency.  In those cases, use that value rather than the USB spec
minimum.  We add a cpp check to <usb.h> to define USB_DMA_MINALIGN and
make use of it in ehci-hcd.c and musb_core.h.  We cannot use MAX() here
as we are not allowed to have tests inside of align(...).

Signed-off-by: Tom Rini <trini@ti.com>
[marek.vasut]: introduce some crazy macro voodoo
Signed-off-by: Marek Vasut <marex@denx.de>
[ilya.yanok]: moved external buffer fixes to separate patch,
we use {ALLOC,DEFINE}_ALIGN_BUFFER macros with alignment of USB_DMA_MINALIGN
for qh_list, qh and qtd structures to make sure they are proper aligned
for both controller and cache operations.
Signed-off-by: Ilya Yanok <ilya.yanok@cogentembedded.com>
diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c
index 3be2a77..2b46662 100644
--- a/drivers/usb/host/ehci-hcd.c
+++ b/drivers/usb/host/ehci-hcd.c
@@ -34,7 +34,10 @@
 volatile struct ehci_hcor *hcor;
 
 static uint16_t portreset;
-static struct QH qh_list __attribute__((aligned(32)));
+DEFINE_ALIGN_BUFFER(struct QH, qh_list, 1, USB_DMA_MINALIGN);
+
+#define ALIGN_END_ADDR(type, ptr, size)			\
+	((uint32_t)(ptr) + roundup((size) * sizeof(type), USB_DMA_MINALIGN))
 
 static struct descriptor {
 	struct usb_hub_descriptor hub;
@@ -207,8 +210,8 @@
 ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer,
 		   int length, struct devrequest *req)
 {
-	static struct QH qh __attribute__((aligned(32)));
-	static struct qTD qtd[3] __attribute__((aligned (32)));
+	ALLOC_ALIGN_BUFFER(struct QH, qh, 1, USB_DMA_MINALIGN);
+	ALLOC_ALIGN_BUFFER(struct qTD, qtd, 3, USB_DMA_MINALIGN);
 	int qtd_counter = 0;
 
 	volatile struct qTD *vtd;
@@ -229,8 +232,8 @@
 		      le16_to_cpu(req->value), le16_to_cpu(req->value),
 		      le16_to_cpu(req->index));
 
-	memset(&qh, 0, sizeof(struct QH));
-	memset(qtd, 0, sizeof(qtd));
+	memset(qh, 0, sizeof(struct QH));
+	memset(qtd, 0, 3 * sizeof(*qtd));
 
 	toggle = usb_gettoggle(dev, usb_pipeendpoint(pipe), usb_pipeout(pipe));
 
@@ -244,7 +247,7 @@
 	 *   qh_overlay.qt_next ...... 13-10 H
 	 * - qh_overlay.qt_altnext
 	 */
-	qh.qh_link = cpu_to_hc32((uint32_t)&qh_list | QH_LINK_TYPE_QH);
+	qh->qh_link = cpu_to_hc32((uint32_t)qh_list | QH_LINK_TYPE_QH);
 	c = (usb_pipespeed(pipe) != USB_SPEED_HIGH &&
 	     usb_pipeendpoint(pipe) == 0) ? 1 : 0;
 	endpt = (8 << 28) |
@@ -255,14 +258,14 @@
 	    (usb_pipespeed(pipe) << 12) |
 	    (usb_pipeendpoint(pipe) << 8) |
 	    (0 << 7) | (usb_pipedevice(pipe) << 0);
-	qh.qh_endpt1 = cpu_to_hc32(endpt);
+	qh->qh_endpt1 = cpu_to_hc32(endpt);
 	endpt = (1 << 30) |
 	    (dev->portnr << 23) |
 	    (dev->parent->devnum << 16) | (0 << 8) | (0 << 0);
-	qh.qh_endpt2 = cpu_to_hc32(endpt);
-	qh.qh_overlay.qt_next = cpu_to_hc32(QT_NEXT_TERMINATE);
+	qh->qh_endpt2 = cpu_to_hc32(endpt);
+	qh->qh_overlay.qt_next = cpu_to_hc32(QT_NEXT_TERMINATE);
 
-	tdp = &qh.qh_overlay.qt_next;
+	tdp = &qh->qh_overlay.qt_next;
 
 	if (req != NULL) {
 		/*
@@ -340,16 +343,16 @@
 		tdp = &qtd[qtd_counter++].qt_next;
 	}
 
-	qh_list.qh_link = cpu_to_hc32((uint32_t)&qh | QH_LINK_TYPE_QH);
+	qh_list->qh_link = cpu_to_hc32((uint32_t)qh | QH_LINK_TYPE_QH);
 
 	/* Flush dcache */
-	flush_dcache_range((uint32_t)&qh_list,
-		(uint32_t)&qh_list + sizeof(struct QH));
-	flush_dcache_range((uint32_t)&qh, (uint32_t)&qh + sizeof(struct QH));
-	flush_dcache_range((uint32_t)qtd, (uint32_t)qtd + sizeof(qtd));
+	flush_dcache_range((uint32_t)qh_list,
+		ALIGN_END_ADDR(struct QH, qh_list, 1));
+	flush_dcache_range((uint32_t)qh, ALIGN_END_ADDR(struct QH, qh, 1));
+	flush_dcache_range((uint32_t)qtd, ALIGN_END_ADDR(struct qTD, qtd, 3));
 
 	/* Set async. queue head pointer. */
-	ehci_writel(&hcor->or_asynclistaddr, (uint32_t)&qh_list);
+	ehci_writel(&hcor->or_asynclistaddr, (uint32_t)qh_list);
 
 	usbsts = ehci_readl(&hcor->or_usbsts);
 	ehci_writel(&hcor->or_usbsts, (usbsts & 0x3f));
@@ -372,12 +375,12 @@
 	timeout = USB_TIMEOUT_MS(pipe);
 	do {
 		/* Invalidate dcache */
-		invalidate_dcache_range((uint32_t)&qh_list,
-			(uint32_t)&qh_list + sizeof(struct QH));
-		invalidate_dcache_range((uint32_t)&qh,
-			(uint32_t)&qh + sizeof(struct QH));
+		invalidate_dcache_range((uint32_t)qh_list,
+			ALIGN_END_ADDR(struct QH, qh_list, 1));
+		invalidate_dcache_range((uint32_t)qh,
+			ALIGN_END_ADDR(struct QH, qh, 1));
 		invalidate_dcache_range((uint32_t)qtd,
-			(uint32_t)qtd + sizeof(qtd));
+			ALIGN_END_ADDR(struct qTD, qtd, 3));
 
 		token = hc32_to_cpu(vtd->qt_token);
 		if (!(token & 0x80))
@@ -406,7 +409,7 @@
 		goto fail;
 	}
 
-	token = hc32_to_cpu(qh.qh_overlay.qt_token);
+	token = hc32_to_cpu(qh->qh_overlay.qt_token);
 	if (!(token & 0x80)) {
 		debug("TOKEN=%#x\n", token);
 		switch (token & 0xfc) {
@@ -734,13 +737,13 @@
 #endif
 
 	/* Set head of reclaim list */
-	memset(&qh_list, 0, sizeof(qh_list));
-	qh_list.qh_link = cpu_to_hc32((uint32_t)&qh_list | QH_LINK_TYPE_QH);
-	qh_list.qh_endpt1 = cpu_to_hc32((1 << 15) | (USB_SPEED_HIGH << 12));
-	qh_list.qh_curtd = cpu_to_hc32(QT_NEXT_TERMINATE);
-	qh_list.qh_overlay.qt_next = cpu_to_hc32(QT_NEXT_TERMINATE);
-	qh_list.qh_overlay.qt_altnext = cpu_to_hc32(QT_NEXT_TERMINATE);
-	qh_list.qh_overlay.qt_token = cpu_to_hc32(0x40);
+	memset(qh_list, 0, sizeof(*qh_list));
+	qh_list->qh_link = cpu_to_hc32((uint32_t)qh_list | QH_LINK_TYPE_QH);
+	qh_list->qh_endpt1 = cpu_to_hc32((1 << 15) | (USB_SPEED_HIGH << 12));
+	qh_list->qh_curtd = cpu_to_hc32(QT_NEXT_TERMINATE);
+	qh_list->qh_overlay.qt_next = cpu_to_hc32(QT_NEXT_TERMINATE);
+	qh_list->qh_overlay.qt_altnext = cpu_to_hc32(QT_NEXT_TERMINATE);
+	qh_list->qh_overlay.qt_token = cpu_to_hc32(0x40);
 
 	reg = ehci_readl(&hccr->cr_hcsparams);
 	descriptor.hub.bNbrPorts = HCS_N_PORTS(reg);
diff --git a/drivers/usb/musb/musb_core.h b/drivers/usb/musb/musb_core.h
index a8adcce..e914369 100644
--- a/drivers/usb/musb/musb_core.h
+++ b/drivers/usb/musb/musb_core.h
@@ -145,7 +145,7 @@
 		struct musb_epN_regs epN;
 	} ep[16];
 
-} __attribute__((packed, aligned(32)));
+} __attribute__((packed, aligned(USB_DMA_MINALIGN)));
 #endif
 
 /*
diff --git a/include/usb.h b/include/usb.h
index 6da91e7..ba3d169 100644
--- a/include/usb.h
+++ b/include/usb.h
@@ -29,6 +29,16 @@
 #include <usb_defs.h>
 #include <usbdescriptors.h>
 
+/*
+ * The EHCI spec says that we must align to at least 32 bytes.  However,
+ * some platforms require larger alignment.
+ */
+#if ARCH_DMA_MINALIGN > 32
+#define USB_DMA_MINALIGN	ARCH_DMA_MINALIGN
+#else
+#define USB_DMA_MINALIGN	32
+#endif
+
 /* Everything is aribtrary */
 #define USB_ALTSETTINGALLOC		4
 #define USB_MAXALTSETTING		128	/* Hard limit */