vlib: don't use vector for keeping buffer indices in the pool

Type: refactor

Change-Id: I72221b97d7e0bf5c93e20bbda4473ca67bfcdeb4
Signed-off-by: Damjan Marion <damarion@cisco.com>
diff --git a/src/plugins/dpdk/buffer.c b/src/plugins/dpdk/buffer.c
index a1c1ea1..d7a7916 100644
--- a/src/plugins/dpdk/buffer.c
+++ b/src/plugins/dpdk/buffer.c
@@ -40,7 +40,7 @@
   struct rte_mempool *mp, *nmp;
   struct rte_pktmbuf_pool_private priv;
   enum rte_iova_mode iova_mode;
-  u32 *bi;
+  u32 i;
   u8 *name = 0;
 
   u32 elt_size =
@@ -54,7 +54,7 @@
 
   /* normal mempool */
   name = format (name, "vpp pool %u%c", bp->index, 0);
-  mp = rte_mempool_create_empty ((char *) name, vec_len (bp->buffers),
+  mp = rte_mempool_create_empty ((char *) name, bp->n_buffers,
 				 elt_size, 512, sizeof (priv),
 				 bp->numa_node, 0);
   if (!mp)
@@ -68,7 +68,7 @@
 
   /* non-cached mempool */
   name = format (name, "vpp pool %u (no cache)%c", bp->index, 0);
-  nmp = rte_mempool_create_empty ((char *) name, vec_len (bp->buffers),
+  nmp = rte_mempool_create_empty ((char *) name, bp->n_buffers,
 				  elt_size, 0, sizeof (priv),
 				  bp->numa_node, 0);
   if (!nmp)
@@ -99,11 +99,10 @@
   iova_mode = rte_eal_iova_mode ();
 
   /* populate mempool object buffer header */
-  /* *INDENT-OFF* */
-  vec_foreach (bi, bp->buffers)
+  for (i = 0; i < bp->n_buffers; i++)
     {
       struct rte_mempool_objhdr *hdr;
-      vlib_buffer_t *b = vlib_get_buffer (vm, *bi);
+      vlib_buffer_t *b = vlib_get_buffer (vm, bp->buffers[i]);
       struct rte_mbuf *mb = rte_mbuf_from_vlib_buffer (b);
       hdr = (struct rte_mempool_objhdr *) RTE_PTR_SUB (mb, sizeof (*hdr));
       hdr->mp = mp;
@@ -114,7 +113,6 @@
       mp->populated_size++;
       nmp->populated_size++;
     }
-  /* *INDENT-ON* */
 
   /* call the object initializers */
   rte_mempool_obj_iter (mp, rte_pktmbuf_init, 0);
@@ -127,14 +125,12 @@
 					  (buffer_mem_start, *bp->buffers,
 					   0)), sizeof (struct rte_mbuf));
 
-  /* *INDENT-OFF* */
-  vec_foreach (bi, bp->buffers)
+  for (i = 0; i < bp->n_buffers; i++)
     {
       vlib_buffer_t *b;
-      b = vlib_buffer_ptr_from_index (buffer_mem_start, *bi, 0);
+      b = vlib_buffer_ptr_from_index (buffer_mem_start, bp->buffers[i], 0);
       vlib_buffer_copy_template (b, &bp->buffer_template);
     }
-  /* *INDENT-ON* */
 
   /* map DMA pages if at least one physical device exists */
   if (rte_eth_dev_count_avail ())
diff --git a/src/vlib/buffer.c b/src/vlib/buffer.c
index c4c05bb..9838e23 100644
--- a/src/vlib/buffer.c
+++ b/src/vlib/buffer.c
@@ -552,9 +552,9 @@
   n_alloc_per_page = (1ULL << m->log2_page_size) / alloc_size;
 
   /* preallocate buffer indices memory */
-  vec_validate_aligned (bp->buffers, m->n_pages * n_alloc_per_page,
-			CLIB_CACHE_LINE_BYTES);
-  vec_reset_length (bp->buffers);
+  bp->n_buffers = m->n_pages * n_alloc_per_page;
+  bp->buffers = clib_mem_alloc_aligned (bp->n_buffers * sizeof (u32),
+					CLIB_CACHE_LINE_BYTES);
 
   clib_spinlock_init (&bp->lock);
 
@@ -571,11 +571,11 @@
 
 	bi = vlib_get_buffer_index (vm, (vlib_buffer_t *) p);
 
-	vec_add1_aligned (bp->buffers, bi, CLIB_CACHE_LINE_BYTES);
+	bp->buffers[bp->n_avail++] = bi;
+
 	vlib_get_buffer (vm, bi);
       }
 
-  bp->n_buffers = vec_len (bp->buffers);
   return bp->index;
 }
 
@@ -594,14 +594,14 @@
 
   /* *INDENT-OFF* */
   vec_foreach (bpt, bp->threads)
-    cached += vec_len (bpt->cached_buffers);
+    cached += bpt->n_cached;
   /* *INDENT-ON* */
 
   s = format (s, "%-20s%=6d%=6d%=6u%=11u%=6u%=8u%=8u%=8u",
 	      bp->name, bp->index, bp->numa_node, bp->data_size +
 	      sizeof (vlib_buffer_t) + vm->buffer_main->ext_hdr_size,
-	      bp->data_size, bp->n_buffers, vec_len (bp->buffers), cached,
-	      bp->n_buffers - vec_len (bp->buffers) - cached);
+	      bp->data_size, bp->n_buffers, bp->n_avail, cached,
+	      bp->n_buffers - bp->n_avail - cached);
 
   return s;
 }
@@ -736,7 +736,7 @@
 
   /* *INDENT-OFF* */
   vec_foreach (bpt, bp->threads)
-    cached += vec_len (bpt->cached_buffers);
+    cached += bpt->n_cached;
   /* *INDENT-ON* */
 
   clib_spinlock_unlock (&bp->lock);
@@ -763,7 +763,7 @@
   if (!bp)
     return;
 
-  e->value = bp->n_buffers - vec_len (bp->buffers) - buffer_get_cached (bp);
+  e->value = bp->n_buffers - bp->n_avail - buffer_get_cached (bp);
 }
 
 static void
@@ -775,7 +775,7 @@
   if (!bp)
     return;
 
-  e->value = vec_len (bp->buffers);
+  e->value = bp->n_avail;
 }
 
 static void
diff --git a/src/vlib/buffer.h b/src/vlib/buffer.h
index c8761af..c2ca821 100644
--- a/src/vlib/buffer.h
+++ b/src/vlib/buffer.h
@@ -411,12 +411,15 @@
 /* Forward declaration. */
 struct vlib_main_t;
 
+#define VLIB_BUFFER_POOL_PER_THREAD_CACHE_SZ 512
+
 typedef struct
 {
   CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
-  u32 *cached_buffers;
-  u32 n_alloc;
+  u32 cached_buffers[VLIB_BUFFER_POOL_PER_THREAD_CACHE_SZ];
+  u32 n_cached;
 } vlib_buffer_pool_thread_t;
+
 typedef struct
 {
   CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
@@ -428,6 +431,7 @@
   u32 physmem_map_index;
   u32 data_size;
   u32 n_buffers;
+  u32 n_avail;
   u32 *buffers;
   u8 *name;
   clib_spinlock_t lock;
diff --git a/src/vlib/buffer_funcs.h b/src/vlib/buffer_funcs.h
index 7480326..2ba9f1c 100644
--- a/src/vlib/buffer_funcs.h
+++ b/src/vlib/buffer_funcs.h
@@ -491,19 +491,19 @@
   ASSERT (bp->buffers);
 
   clib_spinlock_lock (&bp->lock);
-  len = vec_len (bp->buffers);
+  len = bp->n_avail;
   if (PREDICT_TRUE (n_buffers < len))
     {
       len -= n_buffers;
       vlib_buffer_copy_indices (buffers, bp->buffers + len, n_buffers);
-      _vec_len (bp->buffers) = len;
+      bp->n_avail = len;
       clib_spinlock_unlock (&bp->lock);
       return n_buffers;
     }
   else
     {
       vlib_buffer_copy_indices (buffers, bp->buffers, len);
-      _vec_len (bp->buffers) = 0;
+      bp->n_avail = 0;
       clib_spinlock_unlock (&bp->lock);
       return len;
     }
@@ -533,14 +533,26 @@
 
   dst = buffers;
   n_left = n_buffers;
-  len = vec_len (bpt->cached_buffers);
+  len = bpt->n_cached;
 
   /* per-thread cache contains enough buffers */
   if (len >= n_buffers)
     {
       src = bpt->cached_buffers + len - n_buffers;
       vlib_buffer_copy_indices (dst, src, n_buffers);
-      _vec_len (bpt->cached_buffers) -= n_buffers;
+      bpt->n_cached -= n_buffers;
+
+      if (CLIB_DEBUG > 0)
+	vlib_buffer_validate_alloc_free (vm, buffers, n_buffers,
+					 VLIB_BUFFER_KNOWN_FREE);
+      return n_buffers;
+    }
+
+  /* alloc bigger than cache - take buffers directly from main pool */
+  if (n_buffers >= VLIB_BUFFER_POOL_PER_THREAD_CACHE_SZ)
+    {
+      n_buffers = vlib_buffer_pool_get (vm, buffer_pool_index, buffers,
+					n_buffers);
 
       if (CLIB_DEBUG > 0)
 	vlib_buffer_validate_alloc_free (vm, buffers, n_buffers,
@@ -552,23 +564,22 @@
   if (len)
     {
       vlib_buffer_copy_indices (dst, bpt->cached_buffers, len);
-      _vec_len (bpt->cached_buffers) = 0;
+      bpt->n_cached = 0;
       dst += len;
       n_left -= len;
     }
 
   len = round_pow2 (n_left, 32);
-  vec_validate_aligned (bpt->cached_buffers, len - 1, CLIB_CACHE_LINE_BYTES);
   len = vlib_buffer_pool_get (vm, buffer_pool_index, bpt->cached_buffers,
 			      len);
-  _vec_len (bpt->cached_buffers) = len;
+  bpt->n_cached = len;
 
   if (len)
     {
       u32 n_copy = clib_min (len, n_left);
       src = bpt->cached_buffers + len - n_copy;
       vlib_buffer_copy_indices (dst, src, n_copy);
-      _vec_len (bpt->cached_buffers) -= n_copy;
+      bpt->n_cached -= n_copy;
       n_left -= n_copy;
     }
 
@@ -681,26 +692,33 @@
 		      u32 * buffers, u32 n_buffers)
 {
   vlib_buffer_pool_t *bp = vlib_get_buffer_pool (vm, buffer_pool_index);
-  vlib_buffer_pool_thread_t *bpt =
-    vec_elt_at_index (bp->threads, vm->thread_index);
+  vlib_buffer_pool_thread_t *bpt = vec_elt_at_index (bp->threads,
+						     vm->thread_index);
+  u32 n_cached, n_empty;
 
   if (CLIB_DEBUG > 0)
     vlib_buffer_validate_alloc_free (vm, buffers, n_buffers,
 				     VLIB_BUFFER_KNOWN_ALLOCATED);
 
-  vec_add_aligned (bpt->cached_buffers, buffers, n_buffers,
-		   CLIB_CACHE_LINE_BYTES);
-
-  if (vec_len (bpt->cached_buffers) > 4 * VLIB_FRAME_SIZE)
+  n_cached = bpt->n_cached;
+  n_empty = VLIB_BUFFER_POOL_PER_THREAD_CACHE_SZ - n_cached;
+  if (n_buffers <= n_empty)
     {
-      clib_spinlock_lock (&bp->lock);
-      /* keep last stored buffers, as they are more likely hot in the cache */
-      vec_add_aligned (bp->buffers, bpt->cached_buffers, VLIB_FRAME_SIZE,
-		       CLIB_CACHE_LINE_BYTES);
-      vec_delete (bpt->cached_buffers, VLIB_FRAME_SIZE, 0);
-      bpt->n_alloc -= VLIB_FRAME_SIZE;
-      clib_spinlock_unlock (&bp->lock);
+      vlib_buffer_copy_indices (bpt->cached_buffers + n_cached,
+				buffers, n_buffers);
+      bpt->n_cached = n_cached + n_buffers;
+      return;
     }
+
+  vlib_buffer_copy_indices (bpt->cached_buffers + n_cached,
+			    buffers + n_buffers - n_empty, n_empty);
+  bpt->n_cached = VLIB_BUFFER_POOL_PER_THREAD_CACHE_SZ;
+
+  clib_spinlock_lock (&bp->lock);
+  vlib_buffer_copy_indices (bp->buffers + bp->n_avail, buffers,
+			    n_buffers - n_empty);
+  bp->n_avail += n_buffers - n_empty;
+  clib_spinlock_unlock (&bp->lock);
 }
 
 static_always_inline void