[qca-nss-drv] Convert vmap to cacheable

Allocate the descriptor rings from cacheable memory.

Change-Id: I369cbda1f3a342009c223194a8c852423d45951d
Signed-off-by: Selin Dag <sdag@codeaurora.org>
diff --git a/Makefile b/Makefile
index 4cab556..c6f8ec0 100644
--- a/Makefile
+++ b/Makefile
@@ -86,6 +86,7 @@
 ccflags-y += -I$(obj)/nss_hal/include -I$(obj)/nss_data_plane/include -I$(obj)/exports -DNSS_DEBUG_LEVEL=0 -DNSS_PKT_STATS_ENABLED=1
 
 ccflags-y += -DNSS_PM_DEBUG_LEVEL=0 -DNSS_SKB_RECYCLE_SUPPORT=1
+ccflags-y += -DNSS_CACHED_RING=0
 ccflags-y += -Werror
 
 ifneq ($(findstring 3.4, $(KERNELVERSION)),)
diff --git a/nss_core.c b/nss_core.c
index cab289d..f3c149e 100644
--- a/nss_core.c
+++ b/nss_core.c
@@ -74,6 +74,38 @@
 static atomic_t paged_mode;
 
 /*
+ * Cache behavior configuration.
+ */
+#if (NSS_CACHED_RING == 0)
+#define NSS_CORE_DSB()
+#define NSS_CORE_DMA_CACHE_MAINT(start, size, dir)
+#else
+#define NSS_CORE_DSB() dsb()
+#define NSS_CORE_DMA_CACHE_MAINT(start, size, dir) nss_core_dma_cache_maint(start, size, dir)
+
+/*
+ * nss_core_dma_cache_maint()
+ *	Perform the appropriate cache op based on direction
+ */
+static inline void nss_core_dma_cache_maint(void *start, uint32_t size, int direction)
+{
+	switch (direction) {
+	case DMA_FROM_DEVICE:/* invalidate only */
+		dmac_inv_range(start, start + size);
+		break;
+	case DMA_TO_DEVICE:/* writeback only */
+		dmac_clean_range(start, start + size);
+		break;
+	case DMA_BIDIRECTIONAL:/* writeback and invalidate */
+		dmac_flush_range(start, start + size);
+		break;
+	default:
+		BUG();
+	}
+}
+#endif
+
+/*
  * nss_core_update_max_ipv4_conn()
  *	Update the maximum number of configured IPv4 connections
  */
@@ -1212,7 +1244,10 @@
 	n2h_desc_ring = &nss_ctx->n2h_desc_ring[qid];
 	desc_if = &n2h_desc_ring->desc_ring;
 	desc_ring = desc_if->desc;
+	NSS_CORE_DMA_CACHE_MAINT((void *)&if_map->n2h_nss_index[qid], sizeof(uint32_t), DMA_FROM_DEVICE);
+	NSS_CORE_DSB();
 	nss_index = if_map->n2h_nss_index[qid];
+
 	hlos_index = n2h_desc_ring->hlos_index;
 	size = desc_if->size;
 	mask = size - 1;
@@ -1345,12 +1380,27 @@
 		nss_core_rx_pbuf(nss_ctx, desc, &(int_ctx->napi), buffer_type, nbuf);
 
 next:
+		/*
+		 * We are done with the descriptor. Invalidate it
+		 * so we don't work on dirty descriptors next round.
+		 */
+		NSS_CORE_DMA_CACHE_MAINT((void *)desc, sizeof(*desc), DMA_FROM_DEVICE);
+
 		hlos_index = (hlos_index + 1) & (mask);
 		count_temp--;
 	}
 
+	/*
+	 * Wait for invals to be synced before writing the index
+	 */
+	NSS_CORE_DSB();
+
 	n2h_desc_ring->hlos_index = hlos_index;
 	if_map->n2h_hlos_index[qid] = hlos_index;
+
+	NSS_CORE_DMA_CACHE_MAINT((void *)&if_map->n2h_hlos_index[qid], sizeof(uint32_t), DMA_BIDIRECTIONAL);
+	NSS_CORE_DSB();
+
 	return count;
 }
 
@@ -1499,12 +1549,27 @@
 		desc->opaque = (nss_ptr_t)nbuf;
 		desc->buffer = buffer;
 		desc->buffer_type = buffer_type;
+
+		/*
+		 * Flush the descriptor
+		 */
+		NSS_CORE_DMA_CACHE_MAINT((void *)desc, sizeof(*desc), DMA_TO_DEVICE);
+
 		hlos_index = (hlos_index + 1) & (mask);
 		count--;
 	}
 
+	/*
+	 * Wait for the flushes to be synced before writing the index
+	 */
+	NSS_CORE_DSB();
+
 	h2n_desc_ring->hlos_index = hlos_index;
 	if_map->h2n_hlos_index[buffer_queue] = hlos_index;
+
+	NSS_CORE_DMA_CACHE_MAINT(&if_map->h2n_hlos_index[buffer_queue], sizeof(uint32_t), DMA_BIDIRECTIONAL);
+	NSS_CORE_DSB();
+
 	NSS_PKT_STATS_INCREMENT(nss_ctx, &nss_top->stats_drv[stats_index]);
 }
 
@@ -1558,12 +1623,27 @@
 		desc->opaque = (nss_ptr_t)nbuf;
 		desc->buffer = buffer;
 		desc->buffer_type = H2N_BUFFER_EMPTY;
+
+		/*
+		 * Flush the descriptor
+		 */
+		NSS_CORE_DMA_CACHE_MAINT((void *)desc, sizeof(*desc), DMA_TO_DEVICE);
+
 		hlos_index = (hlos_index + 1) & (mask);
 		count--;
 	}
 
+	/*
+	 * Wait for the flushes to be synced before writing the index
+	 */
+	NSS_CORE_DSB();
+
 	h2n_desc_ring->hlos_index = hlos_index;
 	if_map->h2n_hlos_index[NSS_IF_EMPTY_BUFFER_QUEUE] = hlos_index;
+
+	NSS_CORE_DMA_CACHE_MAINT(&if_map->h2n_hlos_index[NSS_IF_EMPTY_BUFFER_QUEUE], sizeof(uint32_t), DMA_BIDIRECTIONAL);
+	NSS_CORE_DSB();
+
 	NSS_PKT_STATS_INCREMENT(nss_ctx, &nss_top->stats_drv[NSS_STATS_DRV_TX_EMPTY]);
 }
 
@@ -1620,12 +1700,27 @@
 		desc->opaque = (nss_ptr_t)nbuf;
 		desc->buffer = buffer;
 		desc->buffer_type = H2N_BUFFER_EMPTY;
+
+		/*
+		 * Flush the descriptor
+		 */
+		NSS_CORE_DMA_CACHE_MAINT((void *)desc, sizeof(*desc), DMA_TO_DEVICE);
+
 		hlos_index = (hlos_index + 1) & (mask);
 		count--;
 	}
 
+	/*
+	 * Wait for the flushes to be synced before writing the index
+	 */
+	NSS_CORE_DSB();
+
 	h2n_desc_ring->hlos_index = hlos_index;
 	if_map->h2n_hlos_index[NSS_IF_EMPTY_BUFFER_QUEUE] = hlos_index;
+
+	NSS_CORE_DMA_CACHE_MAINT(&if_map->h2n_hlos_index[NSS_IF_EMPTY_BUFFER_QUEUE], sizeof(uint32_t), DMA_BIDIRECTIONAL);
+	NSS_CORE_DSB();
+
 	NSS_PKT_STATS_INCREMENT(nss_ctx, &nss_top->stats_drv[NSS_STATS_DRV_TX_EMPTY]);
 }
 
@@ -1646,7 +1741,10 @@
 	/*
 	 * Check how many empty buffers could be filled in queue
 	 */
+	NSS_CORE_DMA_CACHE_MAINT(&if_map->h2n_nss_index[NSS_IF_EMPTY_BUFFER_QUEUE], sizeof(uint32_t), DMA_FROM_DEVICE);
+	NSS_CORE_DSB();
 	nss_index = if_map->h2n_nss_index[NSS_IF_EMPTY_BUFFER_QUEUE];
+
 	hlos_index = h2n_desc_ring->hlos_index;
 	size = h2n_desc_ring->desc_ring.size;
 
@@ -1695,7 +1793,10 @@
 	/*
 	 * Check how many empty buffers could be filled in queue
 	 */
+	NSS_CORE_DMA_CACHE_MAINT((void *)&if_map->h2n_nss_index[NSS_IF_EMPTY_PAGED_BUFFER_QUEUE], sizeof(uint32_t), DMA_FROM_DEVICE);
+	NSS_CORE_DSB();
 	nss_index = if_map->h2n_nss_index[NSS_IF_EMPTY_PAGED_BUFFER_QUEUE];
+
 	hlos_index = h2n_desc_ring->hlos_index;
 	size = h2n_desc_ring->desc_ring.size;
 
@@ -2268,6 +2369,8 @@
 		(nss_ptr_t)nbuf, (uint16_t)(nbuf->data - nbuf->head), nbuf->len,
 		sz, (uint32_t)nbuf->priority, mss, bit_flags);
 
+	NSS_CORE_DMA_CACHE_MAINT((void *)desc, sizeof(*desc), DMA_TO_DEVICE);
+
 	/*
 	 * We are done using the skb fields and can recycle it now
 	 */
@@ -2289,6 +2392,8 @@
 		(nss_ptr_t)nbuf, (uint16_t)(nbuf->data - nbuf->head), nbuf->len,
 		(uint16_t)skb_end_offset(nbuf), (uint32_t)nbuf->priority, mss, bit_flags);
 
+	NSS_CORE_DMA_CACHE_MAINT((void *)desc, sizeof(*desc), DMA_TO_DEVICE);
+
 	NSS_PKT_STATS_INCREMENT(nss_ctx, &nss_ctx->nss_top->stats_drv[NSS_STATS_DRV_TX_SIMPLE]);
 	return 1;
 }
@@ -2337,6 +2442,8 @@
 		(nss_ptr_t)NULL, nbuf->data - nbuf->head, nbuf->len - nbuf->data_len,
 		skb_end_offset(nbuf), (uint32_t)nbuf->priority, mss, bit_flags | H2N_BIT_FLAG_FIRST_SEGMENT);
 
+	NSS_CORE_DMA_CACHE_MAINT((void *)desc, sizeof(*desc), DMA_TO_DEVICE);
+
 	/*
 	 * Now handle rest of the fragments.
 	 */
@@ -2358,6 +2465,8 @@
 		nss_core_write_one_descriptor(desc, buffer_type, buffer, if_num,
 			(nss_ptr_t)NULL, 0, skb_frag_size(frag), skb_frag_size(frag),
 			nbuf->priority, mss, bit_flags);
+
+		NSS_CORE_DMA_CACHE_MAINT((void *)desc, sizeof(*desc), DMA_TO_DEVICE);
 	}
 
 	/*
@@ -2372,6 +2481,9 @@
 	desc->bit_flags |= H2N_BIT_FLAG_LAST_SEGMENT;
 	desc->bit_flags &= ~(H2N_BIT_FLAG_DISCARD);
 	desc->opaque = (nss_ptr_t)nbuf;
+
+	NSS_CORE_DMA_CACHE_MAINT((void *)desc, sizeof(*desc), DMA_TO_DEVICE);
+
 	NSS_PKT_STATS_INCREMENT(nss_ctx, &nss_ctx->nss_top->stats_drv[NSS_STATS_DRV_TX_NR_FRAGS]);
 	return i+1;
 }
@@ -2419,6 +2531,9 @@
 		(nss_ptr_t)nbuf, nbuf->data - nbuf->head, nbuf->len - nbuf->data_len,
 		skb_end_offset(nbuf), (uint32_t)nbuf->priority, mss, bit_flags | H2N_BIT_FLAG_FIRST_SEGMENT);
 
+	NSS_CORE_DMA_CACHE_MAINT((void *)desc, sizeof(*desc), DMA_TO_DEVICE);
+
+
 	/*
 	 * Set everyone but first fragment/descriptor as discard
 	 */
@@ -2459,6 +2574,8 @@
 			(nss_ptr_t)NULL, iter->data - iter->head, iter->len - iter->data_len,
 			skb_end_offset(iter), iter->priority, mss, bit_flags);
 
+		NSS_CORE_DMA_CACHE_MAINT((void *)desc, sizeof(*desc), DMA_TO_DEVICE);
+
 		i++;
 	}
 
@@ -2466,6 +2583,8 @@
 	 * Update bit flag for last descriptor.
 	 */
 	desc->bit_flags |= H2N_BIT_FLAG_LAST_SEGMENT;
+	NSS_CORE_DMA_CACHE_MAINT((void *)desc, sizeof(*desc), DMA_TO_DEVICE);
+
 	NSS_PKT_STATS_INCREMENT(nss_ctx, &nss_ctx->nss_top->stats_drv[NSS_STATS_DRV_TX_FRAGLIST]);
 	return i+1;
 }
@@ -2524,7 +2643,10 @@
 	 * We need to work out if there's sufficent space in our transmit descriptor
 	 * ring to place all the segments of a nbuf.
 	 */
+	NSS_CORE_DMA_CACHE_MAINT((void *)&if_map->h2n_nss_index[qid], sizeof(uint32_t), DMA_FROM_DEVICE);
+	NSS_CORE_DSB();
 	nss_index = if_map->h2n_nss_index[qid];
+
 	hlos_index = h2n_desc_ring->hlos_index;
 
 	count = ((nss_index - hlos_index - 1) + size) & (mask);
@@ -2610,12 +2732,20 @@
 	}
 
 	/*
+	 * Sync to ensure all flushing of the descriptors are complete
+	 */
+	NSS_CORE_DSB();
+
+	/*
 	 * Update our host index so the NSS sees we've written a new descriptor.
 	 */
 	hlos_index = (hlos_index + count) & mask;
 	h2n_desc_ring->hlos_index = hlos_index;
 	if_map->h2n_hlos_index[qid] = hlos_index;
 
+	NSS_CORE_DMA_CACHE_MAINT(&if_map->h2n_hlos_index[qid], sizeof(uint32_t), DMA_BIDIRECTIONAL);
+	NSS_CORE_DSB();
+
 #ifdef CONFIG_DEBUG_KMEMLEAK
 	/*
 	 * We are holding this skb in NSS FW, let kmemleak know about it.
diff --git a/nss_hal/fsm9010/nss_hal_pvt.c b/nss_hal/fsm9010/nss_hal_pvt.c
index d5f2b33..0f04ea0 100644
--- a/nss_hal/fsm9010/nss_hal_pvt.c
+++ b/nss_hal/fsm9010/nss_hal_pvt.c
@@ -151,7 +151,11 @@
 		goto out;
 	}
 
+#if (NSS_CACHED_RING == 1)
+	npd->vmap = ioremap_cache(npd->vphys, resource_size(&res_vphys));
+#else
 	npd->vmap = ioremap_nocache(npd->vphys, resource_size(&res_vphys));
+#endif
 	if (!npd->vmap) {
 		nss_info_always("%p: nss%d: ioremap() fail for vphys\n", nss_ctx, nss_ctx->id);
 		goto out;
diff --git a/nss_hal/ipq806x/nss_hal_pvt.c b/nss_hal/ipq806x/nss_hal_pvt.c
index e49364d..8c7f52c 100644
--- a/nss_hal/ipq806x/nss_hal_pvt.c
+++ b/nss_hal/ipq806x/nss_hal_pvt.c
@@ -464,7 +464,11 @@
 		goto out;
 	}
 
+#if (NSS_CACHED_RING == 1)
+	npd->vmap = ioremap_cache(npd->vphys, resource_size(&res_vphys));
+#else
 	npd->vmap = ioremap_nocache(npd->vphys, resource_size(&res_vphys));
+#endif
 	if (!npd->vmap) {
 		nss_info_always("%p: nss%d: ioremap() fail for vphys\n", nss_ctx, nss_ctx->id);
 		goto out;
diff --git a/nss_hal/ipq807x/nss_hal_pvt.c b/nss_hal/ipq807x/nss_hal_pvt.c
index b81ef1b..154492f 100644
--- a/nss_hal/ipq807x/nss_hal_pvt.c
+++ b/nss_hal/ipq807x/nss_hal_pvt.c
@@ -224,7 +224,11 @@
 		goto out;
 	}
 
+#if (NSS_CACHED_RING == 1)
+	npd->vmap = ioremap_cache(npd->vphys, resource_size(&res_vphys));
+#else
 	npd->vmap = ioremap_nocache(npd->vphys, resource_size(&res_vphys));
+#endif
 	if (!npd->vmap) {
 		nss_info_always("%p: nss%d: ioremap() fail for vphys\n", nss_ctx, nss_ctx->id);
 		goto out;