Move ALLOC_CACHE_ALIGN_BUFFER() to the new memalign.h header

Now that we have a new header file for cache-aligned allocation, we should
move the stack-based allocation macro there also.

Signed-off-by: Simon Glass <sjg@chromium.org>
diff --git a/include/memalign.h b/include/memalign.h
index f78b9dd..a960039 100644
--- a/include/memalign.h
+++ b/include/memalign.h
@@ -13,9 +13,100 @@
  */
 #ifndef __ASSEMBLY__
 #include <asm/cache.h>
-
 #include <malloc.h>
 
+/*
+ * The ALLOC_CACHE_ALIGN_BUFFER macro is used to allocate a buffer on the
+ * stack that meets the minimum architecture alignment requirements for DMA.
+ * Such a buffer is useful for DMA operations where flushing and invalidating
+ * the cache before and after a read and/or write operation is required for
+ * correct operations.
+ *
+ * When called the macro creates an array on the stack that is sized such
+ * that:
+ *
+ * 1) The beginning of the array can be advanced enough to be aligned.
+ *
+ * 2) The size of the aligned portion of the array is a multiple of the minimum
+ *    architecture alignment required for DMA.
+ *
+ * 3) The aligned portion contains enough space for the original number of
+ *    elements requested.
+ *
+ * The macro then creates a pointer to the aligned portion of this array and
+ * assigns to the pointer the address of the first element in the aligned
+ * portion of the array.
+ *
+ * Calling the macro as:
+ *
+ *     ALLOC_CACHE_ALIGN_BUFFER(uint32_t, buffer, 1024);
+ *
+ * Will result in something similar to saying:
+ *
+ *     uint32_t    buffer[1024];
+ *
+ * The following differences exist:
+ *
+ * 1) The resulting buffer is guaranteed to be aligned to the value of
+ *    ARCH_DMA_MINALIGN.
+ *
+ * 2) The buffer variable created by the macro is a pointer to the specified
+ *    type, and NOT an array of the specified type.  This can be very important
+ *    if you want the address of the buffer, which you probably do, to pass it
+ *    to the DMA hardware.  The value of &buffer is different in the two cases.
+ *    In the macro case it will be the address of the pointer, not the address
+ *    of the space reserved for the buffer.  However, in the second case it
+ *    would be the address of the buffer.  So if you are replacing hard coded
+ *    stack buffers with this macro you need to make sure you remove the & from
+ *    the locations where you are taking the address of the buffer.
+ *
+ * Note that the size parameter is the number of array elements to allocate,
+ * not the number of bytes.
+ *
+ * This macro can not be used outside of function scope, or for the creation
+ * of a function scoped static buffer.  It can not be used to create a cache
+ * line aligned global buffer.
+ */
+#define PAD_COUNT(s, pad) (((s) - 1) / (pad) + 1)
+#define PAD_SIZE(s, pad) (PAD_COUNT(s, pad) * pad)
+#define ALLOC_ALIGN_BUFFER_PAD(type, name, size, align, pad)		\
+	char __##name[ROUND(PAD_SIZE((size) * sizeof(type), pad), align)  \
+		      + (align - 1)];					\
+									\
+	type *name = (type *)ALIGN((uintptr_t)__##name, align)
+#define ALLOC_ALIGN_BUFFER(type, name, size, align)		\
+	ALLOC_ALIGN_BUFFER_PAD(type, name, size, align, 1)
+#define ALLOC_CACHE_ALIGN_BUFFER_PAD(type, name, size, pad)		\
+	ALLOC_ALIGN_BUFFER_PAD(type, name, size, ARCH_DMA_MINALIGN, pad)
+#define ALLOC_CACHE_ALIGN_BUFFER(type, name, size)			\
+	ALLOC_ALIGN_BUFFER(type, name, size, ARCH_DMA_MINALIGN)
+
+/*
+ * DEFINE_CACHE_ALIGN_BUFFER() is similar to ALLOC_CACHE_ALIGN_BUFFER, but it's
+ * purpose is to allow allocating aligned buffers outside of function scope.
+ * Usage of this macro shall be avoided or used with extreme care!
+ */
+#define DEFINE_ALIGN_BUFFER(type, name, size, align)			\
+	static char __##name[ALIGN(size * sizeof(type), align)]	\
+			__aligned(align);				\
+									\
+	static type *name = (type *)__##name
+#define DEFINE_CACHE_ALIGN_BUFFER(type, name, size)			\
+	DEFINE_ALIGN_BUFFER(type, name, size, ARCH_DMA_MINALIGN)
+
+/**
+ * malloc_cache_aligned() - allocate a memory region aligned to cache line size
+ *
+ * This allocates memory at a cache-line boundary. The amount allocated may
+ * be larger than requested as it is rounded up to the nearest multiple of the
+ * cache-line size. This ensured that subsequent cache operations on this
+ * memory (flush, invalidate) will not affect subsequently allocated regions.
+ *
+ * @size:	Minimum number of bytes to allocate
+ *
+ * @return pointer to new memory region, or NULL if there is no more memory
+ * available.
+ */
 static inline void *malloc_cache_aligned(size_t size)
 {
 	return memalign(ARCH_DMA_MINALIGN, ALIGN(size, ARCH_DMA_MINALIGN));