vlib: don't use vector for keeping buffer indices in the pool
Type: refactor
Change-Id: I72221b97d7e0bf5c93e20bbda4473ca67bfcdeb4
Signed-off-by: Damjan Marion <damarion@cisco.com>
diff --git a/src/plugins/dpdk/buffer.c b/src/plugins/dpdk/buffer.c
index a1c1ea1..d7a7916 100644
--- a/src/plugins/dpdk/buffer.c
+++ b/src/plugins/dpdk/buffer.c
@@ -40,7 +40,7 @@
struct rte_mempool *mp, *nmp;
struct rte_pktmbuf_pool_private priv;
enum rte_iova_mode iova_mode;
- u32 *bi;
+ u32 i;
u8 *name = 0;
u32 elt_size =
@@ -54,7 +54,7 @@
/* normal mempool */
name = format (name, "vpp pool %u%c", bp->index, 0);
- mp = rte_mempool_create_empty ((char *) name, vec_len (bp->buffers),
+ mp = rte_mempool_create_empty ((char *) name, bp->n_buffers,
elt_size, 512, sizeof (priv),
bp->numa_node, 0);
if (!mp)
@@ -68,7 +68,7 @@
/* non-cached mempool */
name = format (name, "vpp pool %u (no cache)%c", bp->index, 0);
- nmp = rte_mempool_create_empty ((char *) name, vec_len (bp->buffers),
+ nmp = rte_mempool_create_empty ((char *) name, bp->n_buffers,
elt_size, 0, sizeof (priv),
bp->numa_node, 0);
if (!nmp)
@@ -99,11 +99,10 @@
iova_mode = rte_eal_iova_mode ();
/* populate mempool object buffer header */
- /* *INDENT-OFF* */
- vec_foreach (bi, bp->buffers)
+ for (i = 0; i < bp->n_buffers; i++)
{
struct rte_mempool_objhdr *hdr;
- vlib_buffer_t *b = vlib_get_buffer (vm, *bi);
+ vlib_buffer_t *b = vlib_get_buffer (vm, bp->buffers[i]);
struct rte_mbuf *mb = rte_mbuf_from_vlib_buffer (b);
hdr = (struct rte_mempool_objhdr *) RTE_PTR_SUB (mb, sizeof (*hdr));
hdr->mp = mp;
@@ -114,7 +113,6 @@
mp->populated_size++;
nmp->populated_size++;
}
- /* *INDENT-ON* */
/* call the object initializers */
rte_mempool_obj_iter (mp, rte_pktmbuf_init, 0);
@@ -127,14 +125,12 @@
(buffer_mem_start, *bp->buffers,
0)), sizeof (struct rte_mbuf));
- /* *INDENT-OFF* */
- vec_foreach (bi, bp->buffers)
+ for (i = 0; i < bp->n_buffers; i++)
{
vlib_buffer_t *b;
- b = vlib_buffer_ptr_from_index (buffer_mem_start, *bi, 0);
+ b = vlib_buffer_ptr_from_index (buffer_mem_start, bp->buffers[i], 0);
vlib_buffer_copy_template (b, &bp->buffer_template);
}
- /* *INDENT-ON* */
/* map DMA pages if at least one physical device exists */
if (rte_eth_dev_count_avail ())
diff --git a/src/vlib/buffer.c b/src/vlib/buffer.c
index c4c05bb..9838e23 100644
--- a/src/vlib/buffer.c
+++ b/src/vlib/buffer.c
@@ -552,9 +552,9 @@
n_alloc_per_page = (1ULL << m->log2_page_size) / alloc_size;
/* preallocate buffer indices memory */
- vec_validate_aligned (bp->buffers, m->n_pages * n_alloc_per_page,
- CLIB_CACHE_LINE_BYTES);
- vec_reset_length (bp->buffers);
+ bp->n_buffers = m->n_pages * n_alloc_per_page;
+ bp->buffers = clib_mem_alloc_aligned (bp->n_buffers * sizeof (u32),
+ CLIB_CACHE_LINE_BYTES);
clib_spinlock_init (&bp->lock);
@@ -571,11 +571,11 @@
bi = vlib_get_buffer_index (vm, (vlib_buffer_t *) p);
- vec_add1_aligned (bp->buffers, bi, CLIB_CACHE_LINE_BYTES);
+ bp->buffers[bp->n_avail++] = bi;
+
vlib_get_buffer (vm, bi);
}
- bp->n_buffers = vec_len (bp->buffers);
return bp->index;
}
@@ -594,14 +594,14 @@
/* *INDENT-OFF* */
vec_foreach (bpt, bp->threads)
- cached += vec_len (bpt->cached_buffers);
+ cached += bpt->n_cached;
/* *INDENT-ON* */
s = format (s, "%-20s%=6d%=6d%=6u%=11u%=6u%=8u%=8u%=8u",
bp->name, bp->index, bp->numa_node, bp->data_size +
sizeof (vlib_buffer_t) + vm->buffer_main->ext_hdr_size,
- bp->data_size, bp->n_buffers, vec_len (bp->buffers), cached,
- bp->n_buffers - vec_len (bp->buffers) - cached);
+ bp->data_size, bp->n_buffers, bp->n_avail, cached,
+ bp->n_buffers - bp->n_avail - cached);
return s;
}
@@ -736,7 +736,7 @@
/* *INDENT-OFF* */
vec_foreach (bpt, bp->threads)
- cached += vec_len (bpt->cached_buffers);
+ cached += bpt->n_cached;
/* *INDENT-ON* */
clib_spinlock_unlock (&bp->lock);
@@ -763,7 +763,7 @@
if (!bp)
return;
- e->value = bp->n_buffers - vec_len (bp->buffers) - buffer_get_cached (bp);
+ e->value = bp->n_buffers - bp->n_avail - buffer_get_cached (bp);
}
static void
@@ -775,7 +775,7 @@
if (!bp)
return;
- e->value = vec_len (bp->buffers);
+ e->value = bp->n_avail;
}
static void
diff --git a/src/vlib/buffer.h b/src/vlib/buffer.h
index c8761af..c2ca821 100644
--- a/src/vlib/buffer.h
+++ b/src/vlib/buffer.h
@@ -411,12 +411,15 @@
/* Forward declaration. */
struct vlib_main_t;
+#define VLIB_BUFFER_POOL_PER_THREAD_CACHE_SZ 512
+
typedef struct
{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
- u32 *cached_buffers;
- u32 n_alloc;
+ u32 cached_buffers[VLIB_BUFFER_POOL_PER_THREAD_CACHE_SZ];
+ u32 n_cached;
} vlib_buffer_pool_thread_t;
+
typedef struct
{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
@@ -428,6 +431,7 @@
u32 physmem_map_index;
u32 data_size;
u32 n_buffers;
+ u32 n_avail;
u32 *buffers;
u8 *name;
clib_spinlock_t lock;
diff --git a/src/vlib/buffer_funcs.h b/src/vlib/buffer_funcs.h
index 7480326..2ba9f1c 100644
--- a/src/vlib/buffer_funcs.h
+++ b/src/vlib/buffer_funcs.h
@@ -491,19 +491,19 @@
ASSERT (bp->buffers);
clib_spinlock_lock (&bp->lock);
- len = vec_len (bp->buffers);
+ len = bp->n_avail;
if (PREDICT_TRUE (n_buffers < len))
{
len -= n_buffers;
vlib_buffer_copy_indices (buffers, bp->buffers + len, n_buffers);
- _vec_len (bp->buffers) = len;
+ bp->n_avail = len;
clib_spinlock_unlock (&bp->lock);
return n_buffers;
}
else
{
vlib_buffer_copy_indices (buffers, bp->buffers, len);
- _vec_len (bp->buffers) = 0;
+ bp->n_avail = 0;
clib_spinlock_unlock (&bp->lock);
return len;
}
@@ -533,14 +533,26 @@
dst = buffers;
n_left = n_buffers;
- len = vec_len (bpt->cached_buffers);
+ len = bpt->n_cached;
/* per-thread cache contains enough buffers */
if (len >= n_buffers)
{
src = bpt->cached_buffers + len - n_buffers;
vlib_buffer_copy_indices (dst, src, n_buffers);
- _vec_len (bpt->cached_buffers) -= n_buffers;
+ bpt->n_cached -= n_buffers;
+
+ if (CLIB_DEBUG > 0)
+ vlib_buffer_validate_alloc_free (vm, buffers, n_buffers,
+ VLIB_BUFFER_KNOWN_FREE);
+ return n_buffers;
+ }
+
+ /* alloc bigger than cache - take buffers directly from main pool */
+ if (n_buffers >= VLIB_BUFFER_POOL_PER_THREAD_CACHE_SZ)
+ {
+ n_buffers = vlib_buffer_pool_get (vm, buffer_pool_index, buffers,
+ n_buffers);
if (CLIB_DEBUG > 0)
vlib_buffer_validate_alloc_free (vm, buffers, n_buffers,
@@ -552,23 +564,22 @@
if (len)
{
vlib_buffer_copy_indices (dst, bpt->cached_buffers, len);
- _vec_len (bpt->cached_buffers) = 0;
+ bpt->n_cached = 0;
dst += len;
n_left -= len;
}
len = round_pow2 (n_left, 32);
- vec_validate_aligned (bpt->cached_buffers, len - 1, CLIB_CACHE_LINE_BYTES);
len = vlib_buffer_pool_get (vm, buffer_pool_index, bpt->cached_buffers,
len);
- _vec_len (bpt->cached_buffers) = len;
+ bpt->n_cached = len;
if (len)
{
u32 n_copy = clib_min (len, n_left);
src = bpt->cached_buffers + len - n_copy;
vlib_buffer_copy_indices (dst, src, n_copy);
- _vec_len (bpt->cached_buffers) -= n_copy;
+ bpt->n_cached -= n_copy;
n_left -= n_copy;
}
@@ -681,26 +692,33 @@
u32 * buffers, u32 n_buffers)
{
vlib_buffer_pool_t *bp = vlib_get_buffer_pool (vm, buffer_pool_index);
- vlib_buffer_pool_thread_t *bpt =
- vec_elt_at_index (bp->threads, vm->thread_index);
+ vlib_buffer_pool_thread_t *bpt = vec_elt_at_index (bp->threads,
+ vm->thread_index);
+ u32 n_cached, n_empty;
if (CLIB_DEBUG > 0)
vlib_buffer_validate_alloc_free (vm, buffers, n_buffers,
VLIB_BUFFER_KNOWN_ALLOCATED);
- vec_add_aligned (bpt->cached_buffers, buffers, n_buffers,
- CLIB_CACHE_LINE_BYTES);
-
- if (vec_len (bpt->cached_buffers) > 4 * VLIB_FRAME_SIZE)
+ n_cached = bpt->n_cached;
+ n_empty = VLIB_BUFFER_POOL_PER_THREAD_CACHE_SZ - n_cached;
+ if (n_buffers <= n_empty)
{
- clib_spinlock_lock (&bp->lock);
- /* keep last stored buffers, as they are more likely hot in the cache */
- vec_add_aligned (bp->buffers, bpt->cached_buffers, VLIB_FRAME_SIZE,
- CLIB_CACHE_LINE_BYTES);
- vec_delete (bpt->cached_buffers, VLIB_FRAME_SIZE, 0);
- bpt->n_alloc -= VLIB_FRAME_SIZE;
- clib_spinlock_unlock (&bp->lock);
+ vlib_buffer_copy_indices (bpt->cached_buffers + n_cached,
+ buffers, n_buffers);
+ bpt->n_cached = n_cached + n_buffers;
+ return;
}
+
+ vlib_buffer_copy_indices (bpt->cached_buffers + n_cached,
+ buffers + n_buffers - n_empty, n_empty);
+ bpt->n_cached = VLIB_BUFFER_POOL_PER_THREAD_CACHE_SZ;
+
+ clib_spinlock_lock (&bp->lock);
+ vlib_buffer_copy_indices (bp->buffers + bp->n_avail, buffers,
+ n_buffers - n_empty);
+ bp->n_avail += n_buffers - n_empty;
+ clib_spinlock_unlock (&bp->lock);
}
static_always_inline void