vppinfra: vector perf improvements

Type: improvement
Change-Id: I37c187af80c21b8fb1ab15af112527a837e0df9e
Signed-off-by: Damjan Marion <damarion@cisco.com>
diff --git a/src/vlib/node_funcs.h b/src/vlib/node_funcs.h
index df8ec7c..5b4d2a4 100644
--- a/src/vlib/node_funcs.h
+++ b/src/vlib/node_funcs.h
@@ -836,6 +836,7 @@
 {
   uword p_flags, add_to_pending, delete_from_wheel;
   u8 *data_to_be_written_by_caller;
+  vec_attr_t va = { .elt_sz = n_data_elt_bytes };
 
   ASSERT (n->type == VLIB_NODE_TYPE_PROCESS);
 
@@ -856,9 +857,7 @@
 
     l = vec_len (data_vec);
 
-    data_vec =
-      _vec_realloc (data_vec, l + n_data_elts, n_data_elt_bytes,
-		    /* header_bytes */ 0, /* data_align */ 0, /* heap */ 0);
+    data_vec = _vec_realloc_internal (data_vec, l + n_data_elts, &va);
 
     p->pending_event_data_by_type_index[t] = data_vec;
     data_to_be_written_by_caller = data_vec + l * n_data_elt_bytes;
diff --git a/src/vppinfra/elf.h b/src/vppinfra/elf.h
index 8d98931..56869f1 100644
--- a/src/vppinfra/elf.h
+++ b/src/vppinfra/elf.h
@@ -966,10 +966,10 @@
   result = 0;
   if (vec_len (s->contents) > 0)
     {
+      vec_attr_t va = { .elt_sz = elt_size };
       /* Make vector copy of contents with given element size. */
       result =
-	_vec_realloc (result, vec_len (s->contents) / elt_size, elt_size,
-		      /* header_bytes */ 0, /* align */ 0, 0);
+	_vec_realloc_internal (result, vec_len (s->contents) / elt_size, &va);
       clib_memcpy (result, s->contents, vec_len (s->contents));
     }
 
diff --git a/src/vppinfra/fifo.c b/src/vppinfra/fifo.c
index af68765..2b1cfea 100644
--- a/src/vppinfra/fifo.c
+++ b/src/vppinfra/fifo.c
@@ -84,6 +84,9 @@
   uword n_old_elts;
   uword n_copy_bytes, n_zero_bytes;
   clib_fifo_header_t *f_new, *f_old;
+  vec_attr_t va = { .elt_sz = elt_bytes,
+		    .hdr_sz = sizeof (clib_fifo_header_t),
+		    .align = align };
 
   n_old_elts = clib_fifo_elts (v_old);
   n_new_elts += n_old_elts;
@@ -92,9 +95,7 @@
   else
     n_new_elts = max_pow2 (n_new_elts);
 
-  v_new = _vec_realloc (0, n_new_elts, elt_bytes, sizeof (clib_fifo_header_t),
-			align, 0);
-
+  v_new = _vec_alloc_internal (n_new_elts, &va);
   f_new = clib_fifo_header (v_new);
   f_new->head_index = 0;
   f_new->tail_index = n_old_elts;
diff --git a/src/vppinfra/hash.c b/src/vppinfra/hash.c
index 7deff4a..76f71d3 100644
--- a/src/vppinfra/hash.c
+++ b/src/vppinfra/hash.c
@@ -548,6 +548,7 @@
   hash_t *h;
   uword log2_pair_size;
   void *v;
+  vec_attr_t va = { .hdr_sz = sizeof (h[0]), .align = sizeof (hash_pair_t) };
 
   /* Size of hash is power of 2 >= ELTS and larger than
      number of bits in is_user bitmap elements. */
@@ -558,8 +559,8 @@
   if (h_user)
     log2_pair_size = h_user->log2_pair_size;
 
-  v = _vec_realloc (0, elts, (1 << log2_pair_size) * sizeof (hash_pair_t),
-		    sizeof (h[0]), sizeof (hash_pair_t), 0);
+  va.elt_sz = (1 << log2_pair_size) * sizeof (hash_pair_t),
+  v = _vec_alloc_internal (elts, &va);
   h = hash_header (v);
 
   if (h_user)
diff --git a/src/vppinfra/heap.c b/src/vppinfra/heap.c
index 066756b..7db8142 100644
--- a/src/vppinfra/heap.c
+++ b/src/vppinfra/heap.c
@@ -413,6 +413,9 @@
   if (!e)
     {
       uword max_len;
+      vec_attr_t va = { .elt_sz = elt_bytes,
+			.hdr_sz = sizeof (h[0]),
+			.align = HEAP_DATA_ALIGN };
 
       offset = vec_len (v);
       max_len = heap_get_max_len (v);
@@ -422,8 +425,7 @@
 
       h = heap_header (v);
       if (!v || !(h->flags & HEAP_IS_STATIC))
-	v = _vec_realloc (v, offset + align_size, elt_bytes, sizeof (h[0]),
-			  HEAP_DATA_ALIGN, 0);
+	v = _vec_realloc_internal (v, offset + align_size, &va);
       else
 	vec_inc_len (v, align_size);
 
diff --git a/src/vppinfra/heap.h b/src/vppinfra/heap.h
index f496fe0..45f3131 100644
--- a/src/vppinfra/heap.h
+++ b/src/vppinfra/heap.h
@@ -185,6 +185,9 @@
 _heap_dup (void *v_old, uword v_bytes)
 {
   heap_header_t *h_old, *h_new;
+  vec_attr_t va = { .align = HEAP_DATA_ALIGN,
+		    .hdr_sz = sizeof (heap_header_t),
+		    .elt_sz = 1 };
   void *v_new;
 
   h_old = heap_header (v_old);
@@ -192,8 +195,7 @@
   if (!v_old)
     return v_old;
 
-  v_new = _vec_realloc (0, _vec_len (v_old), 1, sizeof (heap_header_t),
-			HEAP_DATA_ALIGN, 0);
+  v_new = _vec_alloc_internal (_vec_len (v_old), &va);
   h_new = heap_header (v_new);
   heap_dup_header (h_old, h_new);
   clib_memcpy_fast (v_new, v_old, v_bytes);
@@ -212,8 +214,10 @@
 always_inline void *
 _heap_new (u32 len, u32 n_elt_bytes)
 {
-  void *v = _vec_realloc ((void *) 0, len, n_elt_bytes, sizeof (heap_header_t),
-			  HEAP_DATA_ALIGN, 0);
+  vec_attr_t va = { .align = HEAP_DATA_ALIGN,
+		    .hdr_sz = sizeof (heap_header_t),
+		    .elt_sz = n_elt_bytes };
+  void *v = _vec_alloc_internal (len, &va);
   heap_header (v)->elt_bytes = n_elt_bytes;
   return v;
 }
diff --git a/src/vppinfra/pool.c b/src/vppinfra/pool.c
index 2bbfe60..1f3b96f 100644
--- a/src/vppinfra/pool.c
+++ b/src/vppinfra/pool.c
@@ -44,11 +44,14 @@
   pool_header_t *ph;
   u8 *v;
   u32 i;
+  vec_attr_t va = { .elt_sz = elt_size,
+		    .align = align,
+		    .hdr_sz = sizeof (pool_header_t) };
 
   ASSERT (elt_size);
   ASSERT (max_elts);
 
-  v = _vec_realloc (0, max_elts, elt_size, sizeof (pool_header_t), align, 0);
+  v = _vec_alloc_internal (max_elts, &va);
 
   ph = pool_header (v);
   ph->max_elts = max_elts;
diff --git a/src/vppinfra/pool.h b/src/vppinfra/pool.h
index 8330d64..2a35629 100644
--- a/src/vppinfra/pool.h
+++ b/src/vppinfra/pool.h
@@ -172,6 +172,9 @@
   uword len = 0;
   void *p = pp[0];
   void *e;
+  vec_attr_t va = { .hdr_sz = sizeof (pool_header_t),
+		    .elt_sz = elt_sz,
+		    .align = align };
 
   if (p)
     {
@@ -199,8 +202,7 @@
   len = vec_len (p);
 
   /* Nothing on free list, make a new element and return it. */
-  p =
-    _vec_realloc_inline (p, len + 1, elt_sz, sizeof (pool_header_t), align, 0);
+  p = _vec_realloc_internal (p, len + 1, &va);
   e = p + len * elt_sz;
 
   _vec_update_pointer (pp, p);
@@ -312,6 +314,10 @@
 {
   pool_header_t *ph = pool_header (pp[0]);
   uword len = vec_len (pp[0]);
+  const vec_attr_t va = { .hdr_sz = sizeof (pool_header_t),
+			  .elt_sz = elt_sz,
+			  .align = align,
+			  .heap = heap };
 
   if (ph && ph->max_elts)
     {
@@ -319,8 +325,7 @@
       os_out_of_memory ();
     }
 
-  pp[0] = _vec_realloc_inline (pp[0], len + n_elts, elt_sz,
-			       sizeof (pool_header_t), align, heap);
+  pp[0] = _vec_resize_internal (pp[0], len + n_elts, &va);
   _vec_set_len (pp[0], len, elt_sz);
   clib_mem_poison (pp[0] + len * elt_sz, n_elts * elt_sz);
 
@@ -342,6 +347,9 @@
 {
   pool_header_t *nph, *ph = pool_header (p);
   uword len = vec_len (p);
+  const vec_attr_t va = { .hdr_sz = sizeof (pool_header_t),
+			  .elt_sz = elt_sz,
+			  .align = align };
   void *n;
 
   if (ph && ph->max_elts)
@@ -350,7 +358,7 @@
       os_out_of_memory ();
     }
 
-  n = _vec_realloc_inline (0, len, elt_sz, sizeof (pool_header_t), align, 0);
+  n = _vec_alloc_internal (len, &va);
   nph = pool_header (n);
   clib_memset_u8 (nph, 0, sizeof (vec_header_t));
 
diff --git a/src/vppinfra/ring.h b/src/vppinfra/ring.h
index d7e1915..8527fdb 100644
--- a/src/vppinfra/ring.h
+++ b/src/vppinfra/ring.h
@@ -37,12 +37,11 @@
 {
   void *v;
   clib_ring_header_t *h;
+  vec_attr_t va = { .elt_sz = elt_bytes,
+		    .hdr_sz = sizeof (clib_ring_header_t),
+		    .align = align };
 
-  v = _vec_realloc (0,
-		    /* length increment */ size,
-		    /* data bytes */ elt_bytes,
-		    /* header bytes */ sizeof (h[0]),
-		    /* data align */ align, 0);
+  v = _vec_alloc_internal (size, &va);
 
   h = clib_ring_header (v);
   h->next = 0;
diff --git a/src/vppinfra/serialize.c b/src/vppinfra/serialize.c
index d84d7ca..f5c0064 100644
--- a/src/vppinfra/serialize.c
+++ b/src/vppinfra/serialize.c
@@ -308,13 +308,16 @@
 {
   void *v, *p;
   u32 l;
+  vec_attr_t va = { .align = align,
+		    .elt_sz = elt_bytes,
+		    .hdr_sz = header_bytes };
 
   unserialize_integer (m, &l, sizeof (l));
   if (l > max_length)
     serialize_error (&m->header,
 		     clib_error_create ("bad vector length %d", l));
-  p = v = _vec_realloc ((void *) 0, l, elt_bytes, header_bytes,
-			/* align */ align, 0);
+
+  p = v = _vec_alloc_internal (l, &va);
 
   while (l != 0)
     {
@@ -437,6 +440,9 @@
   void *v;
   u32 i, l, lo, hi;
   pool_header_t *p;
+  vec_attr_t va = { .align = align,
+		    .elt_sz = elt_bytes,
+		    .hdr_sz = sizeof (pool_header_t) };
 
   unserialize_integer (m, &l, sizeof (l));
   if (l == 0)
@@ -444,7 +450,7 @@
       return 0;
     }
 
-  v = _vec_realloc ((void *) 0, l, elt_bytes, sizeof (p[0]), align, 0);
+  v = _vec_alloc_internal (l, &va);
   p = pool_header (v);
 
   vec_unserialize (m, &p->free_indices, unserialize_vec_32);
diff --git a/src/vppinfra/sparse_vec.h b/src/vppinfra/sparse_vec.h
index dc9cb00..1f57d30 100644
--- a/src/vppinfra/sparse_vec.h
+++ b/src/vppinfra/sparse_vec.h
@@ -73,15 +73,14 @@
   void *v;
   sparse_vec_header_t *h;
   word n;
+  vec_attr_t va = { .elt_sz = elt_bytes, .hdr_sz = sizeof (h[0]) };
 
   ASSERT (sparse_index_bits <= 16);
 
-  v = _vec_realloc (0, /* data bytes */ 8, elt_bytes,
-		    /* header bytes */ sizeof (h[0]), /* data align */ 0,
-		    /* heap */ 0);
+  v = _vec_alloc_internal (/* data bytes */ 8, &va);
 
   /* Make space for invalid entry (entry 0). */
-  _vec_find (v)->len = 1;
+  _vec_set_len (v, 1, elt_bytes);
 
   h = sparse_vec_header (v);
 
diff --git a/src/vppinfra/test_vec.c b/src/vppinfra/test_vec.c
index f32cd7f..9f336a0 100644
--- a/src/vppinfra/test_vec.c
+++ b/src/vppinfra/test_vec.c
@@ -211,6 +211,8 @@
   ({                                                                          \
     elt_type *_v (v) = NULL;                                                  \
     uword _v (l) = (len);                                                     \
+    vec_attr_t _v (attr) = { .hdr_sz = (hdr_bytes),                           \
+			     .elt_sz = sizeof (elt_type) };                   \
     uword _v (h) = (hdr_bytes);                                               \
     u8 *_v (hdr);                                                             \
                                                                               \
@@ -221,7 +223,7 @@
     if (_v (l) == ~0)                                                         \
       _v (l) = bounded_random_u32 (&(seed), 0, MAX_VEC_LEN);                  \
                                                                               \
-    _v (v) = _vec_realloc (NULL, _v (l), sizeof (elt_type), _v (h), 0, 0);    \
+    _v (v) = _vec_alloc_internal (_v (l), &_v (attr));                        \
     fill_with_random_data (_v (v), vec_bytes (_v (v)), (seed));               \
                                                                               \
     /* Fill header with random data as well. */                               \
diff --git a/src/vppinfra/vec.c b/src/vppinfra/vec.c
index 4dc8f18..dbaadad 100644
--- a/src/vppinfra/vec.c
+++ b/src/vppinfra/vec.c
@@ -16,77 +16,120 @@
 }
 
 __clib_export void *
-_vec_realloc (void *v, uword n_elts, uword elt_sz, uword hdr_sz, uword align,
-	      void *heap)
+_vec_alloc_internal (uword n_elts, const vec_attr_t *const attr)
 {
-  uword n_data_bytes, alloc_size, new_data_size;
-  void *p;
+  uword req_size, alloc_size, data_offset, align;
+  uword elt_sz = attr->elt_sz;
+  void *p, *v, *heap = attr->heap;
 
   /* alignment must be power of 2 */
-  align = clib_max (align, VEC_MIN_ALIGN);
+  align = clib_max (attr->align, VEC_MIN_ALIGN);
   ASSERT (count_set_bits (align) == 1);
 
-  /* mumber of bytes needed to store vector data */
-  n_data_bytes = n_elts * elt_sz;
+  /* calc offset where vector data starts */
+  data_offset = attr->hdr_sz + sizeof (vec_header_t);
+  data_offset += heap ? sizeof (void *) : 0;
+  data_offset = round_pow2 (data_offset, align);
 
-  if (v)
+  req_size = data_offset + n_elts * elt_sz;
+  p = clib_mem_heap_alloc_aligned (heap, req_size, align);
+
+  /* zero out whole alocation */
+  alloc_size = clib_mem_size (p);
+  clib_mem_unpoison (p, alloc_size);
+  clib_memset_u8 (p, 0, alloc_size);
+
+  /* fill vector header */
+  v = p + data_offset;
+  _vec_find (v)->len = n_elts;
+  _vec_find (v)->hdr_size = data_offset / VEC_MIN_ALIGN;
+  _vec_find (v)->log2_align = min_log2 (align);
+  if (heap)
     {
-      uword data_offset = vec_get_header_size (v);
-      uword old_data_size = data_offset + _vec_len (v) * elt_sz;
-      new_data_size = data_offset + n_data_bytes;
-      heap = _vec_find (v)->default_heap ? 0 : _vec_heap (v);
-      p = vec_header (v);
-      alloc_size = clib_mem_size (p);
-
-      /* check that we are still dealing with the same vector type */
-      ASSERT (_vec_find (v)->hdr_size * VEC_MIN_ALIGN == data_offset);
-      ASSERT (_vec_find (v)->log2_align == min_log2 (align));
-
-      /* realloc if new size cannot fit into existing allocation */
-      if (alloc_size < new_data_size)
-	{
-	  if (CLIB_VECTOR_GROW_BY_ONE)
-	    alloc_size = n_data_bytes + data_offset;
-	  else
-	    alloc_size = (n_data_bytes * 3) / 2 + data_offset;
-
-	  p = clib_mem_heap_realloc_aligned (heap, p, alloc_size, align);
-	  alloc_size = clib_mem_size (p);
-	  v = p + data_offset;
-	}
-
-      clib_mem_unpoison (p, alloc_size);
-      clib_memset_u8 (p + old_data_size, 0, alloc_size - old_data_size);
+      _vec_find (v)->default_heap = 0;
+      _vec_heap (v) = heap;
     }
   else
-    {
-      /* new allocation */
-      uword data_offset = hdr_sz + sizeof (vec_header_t);
-      data_offset += heap ? sizeof (void *) : 0;
-      data_offset = round_pow2 (data_offset, align);
+    _vec_find (v)->default_heap = 1;
 
-      new_data_size = data_offset + n_data_bytes;
-      p = clib_mem_heap_alloc_aligned (heap, new_data_size, align);
-      alloc_size = clib_mem_size (p);
-      clib_mem_unpoison (p, alloc_size);
-      clib_memset_u8 (p, 0, alloc_size);
+  /* poison extra space given by allocator */
+  clib_mem_poison (p + req_size, alloc_size - req_size);
+  _vec_set_grow_elts (v, (alloc_size - req_size) / elt_sz);
+  return v;
+}
+
+static inline void
+_vec_update_len (void *v, uword n_elts, uword elt_sz, uword n_data_bytes,
+		 uword unused_bytes)
+{
+  _vec_find (v)->len = n_elts;
+  _vec_set_grow_elts (v, unused_bytes / elt_sz);
+  clib_mem_unpoison (v, n_data_bytes);
+  clib_mem_poison (v + n_data_bytes, unused_bytes);
+}
+
+__clib_export void *
+_vec_realloc_internal (void *v, uword n_elts, const vec_attr_t *const attr)
+{
+  uword old_alloc_sz, new_alloc_sz, new_data_size, n_data_bytes, data_offset;
+  uword elt_sz;
+
+  if (PREDICT_FALSE (v == 0))
+    return _vec_alloc_internal (n_elts, attr);
+
+  elt_sz = attr->elt_sz;
+  n_data_bytes = n_elts * elt_sz;
+  data_offset = vec_get_header_size (v);
+  new_data_size = data_offset + n_data_bytes;
+  new_alloc_sz = old_alloc_sz = clib_mem_size (vec_header (v));
+
+  /* realloc if new size cannot fit into existing allocation */
+  if (old_alloc_sz < new_data_size)
+    {
+      uword n_bytes, req_size = new_data_size;
+      void *p = v - data_offset;
+
+      req_size += CLIB_VECTOR_GROW_BY_ONE ? 0 : n_data_bytes / 2;
+
+      p = clib_mem_heap_realloc_aligned (vec_get_heap (v), p, req_size,
+					 vec_get_align (v));
+      new_alloc_sz = clib_mem_size (p);
       v = p + data_offset;
-      _vec_find (v)->hdr_size = data_offset / VEC_MIN_ALIGN;
-      _vec_find (v)->log2_align = min_log2 (align);
-      if (heap)
-	{
-	  _vec_find (v)->default_heap = 0;
-	  _vec_heap (v) = heap;
-	}
-      else
-	_vec_find (v)->default_heap = 1;
+
+      /* zero out new allocation */
+      n_bytes = new_alloc_sz - old_alloc_sz;
+      clib_mem_unpoison (p + old_alloc_sz, n_bytes);
+      clib_memset_u8 (p + old_alloc_sz, 0, n_bytes);
     }
 
-  clib_mem_poison (p + new_data_size, alloc_size - new_data_size);
-  _vec_find (v)->len = n_elts;
+  _vec_update_len (v, n_elts, elt_sz, n_data_bytes,
+		   new_alloc_sz - new_data_size);
   return v;
 }
 
+__clib_export void *
+_vec_resize_internal (void *v, uword n_elts, const vec_attr_t *const attr)
+{
+  uword elt_sz = attr->elt_sz;
+  if (PREDICT_TRUE (v != 0))
+    {
+      uword hs = _vec_find (v)->hdr_size * VEC_MIN_ALIGN;
+      uword alloc_sz = clib_mem_size (v - hs);
+      uword n_data_bytes = elt_sz * n_elts;
+      word unused_bytes = alloc_sz - (n_data_bytes + hs);
+
+      if (PREDICT_TRUE (unused_bytes >= 0))
+	{
+	  _vec_update_len (v, n_elts, elt_sz, n_data_bytes, unused_bytes);
+	  return v;
+	}
+    }
+
+  /* this shouled emit tail jump and likely avoid stack usasge inside this
+   * function */
+  return _vec_realloc_internal (v, n_elts, attr);
+}
+
 __clib_export u32
 vec_len_not_inline (void *v)
 {
diff --git a/src/vppinfra/vec.h b/src/vppinfra/vec.h
index bfb7b1b..f45f45e 100644
--- a/src/vppinfra/vec.h
+++ b/src/vppinfra/vec.h
@@ -101,8 +101,20 @@
     @param align alignment (may be zero)
     @return v_prime pointer to resized vector, may or may not equal v
 */
-void *_vec_realloc (void *v, uword n_elts, uword elt_sz, uword hdr_sz,
-		    uword align, void *heap);
+
+typedef struct
+{
+  void *heap;
+  u32 elt_sz;
+  u16 hdr_sz;
+  u16 align;
+} vec_attr_t;
+
+void *_vec_alloc_internal (uword n_elts, const vec_attr_t *const attr);
+void *_vec_realloc_internal (void *v, uword n_elts,
+			     const vec_attr_t *const attr);
+void *_vec_resize_internal (void *v, uword n_elts,
+			    const vec_attr_t *const attr);
 
 /* calculate minimum alignment out of data natural alignment and provided
  * value, should not be < VEC_MIN_ALIGN */
@@ -139,36 +151,24 @@
   return _vec_heap (v);
 }
 
-static_always_inline void *
-_vec_realloc_inline (void *v, uword n_elts, uword elt_sz, uword hdr_sz,
-		     uword align, void *heap)
+static_always_inline uword
+vec_get_align (void *v)
 {
-  if (PREDICT_TRUE (v != 0))
-    {
-      /* Vector header must start heap object. */
-      ASSERT (clib_mem_heap_is_heap_object (vec_get_heap (v), vec_header (v)));
-
-      /* Typically we'll not need to resize. */
-      if ((n_elts * elt_sz) <= vec_max_bytes (v))
-	{
-	  _vec_set_len (v, n_elts, elt_sz);
-	  return v;
-	}
-    }
-
-  /* Slow path: call helper function. */
-  return _vec_realloc (v, n_elts, elt_sz, hdr_sz, align, heap);
+  return 1ULL << _vec_find (v)->log2_align;
 }
 
 static_always_inline void
 _vec_prealloc (void **vp, uword n_elts, uword hdr_sz, uword align, void *heap,
 	       uword elt_sz)
 {
+  const vec_attr_t va = {
+    .elt_sz = elt_sz, .hdr_sz = hdr_sz, .align = align, .heap = heap
+  };
   void *v;
 
   ASSERT (vp[0] == 0);
 
-  v = _vec_realloc (0, n_elts, elt_sz, hdr_sz, align, heap);
+  v = _vec_alloc_internal (n_elts, &va);
   _vec_set_len (v, 0, elt_sz);
   _vec_update_pointer (vp, v);
 }
@@ -247,9 +247,26 @@
 static_always_inline void
 _vec_resize (void **vp, uword n_add, uword hdr_sz, uword align, uword elt_sz)
 {
-  void *v = vp[0];
-  v = _vec_realloc_inline (v, vec_len (v) + n_add, elt_sz, hdr_sz, align, 0);
-  _vec_update_pointer (vp, v);
+  void *v = *vp;
+  if (PREDICT_FALSE (v == 0))
+    {
+      const vec_attr_t va = { .elt_sz = elt_sz,
+			      .align = align,
+			      .hdr_sz = hdr_sz };
+      *vp = _vec_alloc_internal (n_add, &va);
+      return;
+    }
+
+  if (PREDICT_FALSE (_vec_find (v)->grow_elts < n_add))
+    {
+      const vec_attr_t va = { .elt_sz = elt_sz,
+			      .align = align,
+			      .hdr_sz = hdr_sz };
+      v = _vec_resize_internal (v, _vec_len (v) + n_add, &va);
+      _vec_update_pointer (vp, v);
+    }
+  else
+    _vec_set_len (v, _vec_len (v) + n_add, elt_sz);
 }
 
 #define vec_resize_ha(V, N, H, A)                                             \
@@ -324,7 +341,10 @@
     @return V new vector
 */
 #define vec_new_generic(T, N, H, A, P)                                        \
-  _vec_realloc (0, N, sizeof (T), H, _vec_align ((T *) 0, A), P)
+  _vec_alloc_internal (N, &((vec_attr_t){ .align = _vec_align ((T *) 0, A),   \
+					  .hdr_sz = (H),                      \
+					  .heap = (P),                        \
+					  .elt_sz = sizeof (T) }))
 
 /** \brief Create new vector of given type and length
     (unspecified alignment, no header).
@@ -390,11 +410,12 @@
 _vec_dup (void *v, uword hdr_size, uword align, uword elt_sz)
 {
   uword len = vec_len (v);
+  const vec_attr_t va = { .elt_sz = elt_sz, .align = align };
   void *n = 0;
 
   if (len)
     {
-      n = _vec_realloc (0, len, elt_sz, hdr_size, align, 0);
+      n = _vec_alloc_internal (len, &va);
       clib_memcpy_fast (n, v, len * elt_sz);
     }
   return n;
@@ -438,7 +459,8 @@
 static_always_inline void
 _vec_clone (void **v1p, void *v2, uword align, uword elt_sz)
 {
-  v1p[0] = _vec_realloc (0, vec_len (v2), elt_sz, 0, align, 0);
+  const vec_attr_t va = { .elt_sz = elt_sz, .align = align };
+  v1p[0] = _vec_alloc_internal (vec_len (v2), &va);
 }
 #define vec_clone(NEW_V, OLD_V)                                               \
   _vec_clone ((void **) &(NEW_V), OLD_V, _vec_align (NEW_V, 0),               \
@@ -464,14 +486,35 @@
 _vec_validate (void **vp, uword index, uword header_size, uword align,
 	       void *heap, uword elt_sz)
 {
-  void *v = vp[0];
-  uword vl = vec_len (v);
-  if (index >= vl)
+  void *v = *vp;
+  uword vl, n_elts = index + 1;
+
+  if (PREDICT_FALSE (v == 0))
     {
-      v = _vec_realloc_inline (v, index + 1, elt_sz, header_size, align, heap);
-      _vec_zero_elts (v, vl, index - vl + 1, elt_sz);
+      const vec_attr_t va = { .elt_sz = elt_sz,
+			      .align = align,
+			      .hdr_sz = header_size };
+      *vp = _vec_alloc_internal (n_elts, &va);
+      return;
+    }
+
+  vl = _vec_len (v);
+
+  if (PREDICT_FALSE (index < vl))
+    return;
+
+  if (PREDICT_FALSE (index >= _vec_find (v)->grow_elts + vl))
+    {
+      const vec_attr_t va = { .elt_sz = elt_sz,
+			      .align = align,
+			      .hdr_sz = header_size };
+      v = _vec_resize_internal (v, n_elts, &va);
       _vec_update_pointer (vp, v);
     }
+  else
+    _vec_set_len (v, n_elts, elt_sz);
+
+  _vec_zero_elts (v, vl, n_elts - vl, elt_sz);
 }
 
 #define vec_validate_hap(V, I, H, A, P)                                       \
@@ -572,10 +615,28 @@
 _vec_add1 (void **vp, uword hdr_sz, uword align, uword elt_sz)
 {
   void *v = vp[0];
-  uword len = vec_len (v);
-  v = _vec_realloc_inline (v, len + 1, elt_sz, hdr_sz, align, 0);
+  uword len;
 
-  _vec_update_pointer (vp, v);
+  if (PREDICT_FALSE (v == 0))
+    {
+      const vec_attr_t va = { .elt_sz = elt_sz,
+			      .align = align,
+			      .hdr_sz = hdr_sz };
+      return *vp = _vec_alloc_internal (1, &va);
+    }
+
+  len = _vec_len (v);
+
+  if (PREDICT_FALSE (_vec_find (v)->grow_elts == 0))
+    {
+      const vec_attr_t va = { .elt_sz = elt_sz,
+			      .align = align,
+			      .hdr_sz = hdr_sz };
+      v = _vec_resize_internal (v, len + 1, &va);
+      _vec_update_pointer (vp, v);
+    }
+  else
+    _vec_set_len (v, len + 1, elt_sz);
 
   return v + len * elt_sz;
 }
@@ -616,11 +677,31 @@
 _vec_add2 (void **vp, void **pp, uword n_add, uword hdr_sz, uword align,
 	   uword elt_sz)
 {
-  void *v = vp[0];
-  uword len = vec_len (vp[0]);
-  v = _vec_realloc_inline (v, len + n_add, elt_sz, hdr_sz, align, 0);
-  _vec_update_pointer (vp, v);
-  pp[0] = v + len * elt_sz;
+  void *v = *vp;
+  uword len;
+
+  if (PREDICT_FALSE (v == 0))
+    {
+      const vec_attr_t va = { .elt_sz = elt_sz,
+			      .align = align,
+			      .hdr_sz = hdr_sz };
+      *vp = *pp = _vec_alloc_internal (n_add, &va);
+      return;
+    }
+
+  len = _vec_len (v);
+  if (PREDICT_FALSE (_vec_find (v)->grow_elts < n_add))
+    {
+      const vec_attr_t va = { .elt_sz = elt_sz,
+			      .align = align,
+			      .hdr_sz = hdr_sz };
+      v = _vec_resize_internal (v, len + n_add, &va);
+      _vec_update_pointer (vp, v);
+    }
+  else
+    _vec_set_len (v, len + n_add, elt_sz);
+
+  *pp = v + len * elt_sz;
 }
 
 #define vec_add2_ha(V, P, N, H, A)                                            \
@@ -663,17 +744,38 @@
 _vec_add (void **vp, void *e, word n_add, uword hdr_sz, uword align,
 	  uword elt_sz)
 {
-  void *v = vp[0];
-  uword len = vec_len (v);
+  void *v = *vp;
+  uword len;
 
   ASSERT (n_add >= 0);
 
   if (n_add < 1)
     return;
 
-  v = _vec_realloc_inline (v, len + n_add, elt_sz, hdr_sz, align, 0);
+  if (PREDICT_FALSE (v == 0))
+    {
+      const vec_attr_t va = { .elt_sz = elt_sz,
+			      .align = align,
+			      .hdr_sz = hdr_sz };
+      *vp = v = _vec_alloc_internal (n_add, &va);
+      clib_memcpy_fast (v, e, n_add * elt_sz);
+      return;
+    }
+
+  len = _vec_len (v);
+
+  if (PREDICT_FALSE (_vec_find (v)->grow_elts < n_add))
+    {
+      const vec_attr_t va = { .elt_sz = elt_sz,
+			      .align = align,
+			      .hdr_sz = hdr_sz };
+      v = _vec_resize_internal (v, len + n_add, &va);
+      _vec_update_pointer (vp, v);
+    }
+  else
+    _vec_set_len (v, len + n_add, elt_sz);
+
   clib_memcpy_fast (v + len * elt_sz, e, n_add * elt_sz);
-  _vec_update_pointer (vp, v);
 }
 
 #define vec_add_ha(V, E, N, H, A)                                             \
@@ -747,11 +849,12 @@
 {
   void *v = vp[0];
   uword len = vec_len (v);
+  const vec_attr_t va = { .elt_sz = elt_sz, .align = align, .hdr_sz = hdr_sz };
 
   ASSERT (ins_pt <= len);
 
-  v = _vec_realloc_inline (v, len + n_insert, elt_sz, hdr_sz, align, 0);
-  clib_memmove (v + elt_sz * (ins_pt + n_insert), v + ins_pt * elt_sz,
+  v = _vec_resize_internal (v, len + n_insert, &va);
+  clib_memmove (v + va.elt_sz * (ins_pt + n_insert), v + ins_pt * elt_sz,
 		(len - ins_pt) * elt_sz);
   _vec_zero_elts (v, ins_pt, n_insert, elt_sz);
   _vec_update_pointer (vp, v);
@@ -839,10 +942,11 @@
 {
   void *v = vp[0];
   uword len = vec_len (v);
+  const vec_attr_t va = { .elt_sz = elt_sz, .align = align, .hdr_sz = hdr_sz };
 
   ASSERT (ins_pt <= len);
 
-  v = _vec_realloc_inline (v, len + n_insert, elt_sz, hdr_sz, align, 0);
+  v = _vec_resize_internal (v, len + n_insert, &va);
   clib_memmove (v + elt_sz * (ins_pt + n_insert), v + ins_pt * elt_sz,
 		(len - ins_pt) * elt_sz);
   _vec_zero_elts (v, ins_pt, n_insert, elt_sz);
@@ -938,7 +1042,8 @@
 
   if (PREDICT_TRUE (len2 > 0))
     {
-      v1 = _vec_realloc_inline (v1, len1 + len2, v2_elt_sz, 0, align, 0);
+      const vec_attr_t va = { .elt_sz = v2_elt_sz, .align = align };
+      v1 = _vec_resize_internal (v1, len1 + len2, &va);
       clib_memcpy_fast (v1 + len1 * v1_elt_sz, v2, len2 * v2_elt_sz);
       _vec_update_pointer (v1p, v1);
     }
@@ -971,7 +1076,8 @@
 
   if (PREDICT_TRUE (len2 > 0))
     {
-      v1 = _vec_realloc_inline (v1, len1 + len2, v2_elt_sz, 0, align, 0);
+      const vec_attr_t va = { .elt_sz = v2_elt_sz, .align = align };
+      v1 = _vec_resize_internal (v1, len1 + len2, &va);
       clib_memmove (v1 + len2 * v2_elt_sz, v1p[0], len1 * v1_elt_sz);
       clib_memcpy_fast (v1, v2, len2 * v2_elt_sz);
       _vec_update_pointer (v1p, v1);
diff --git a/src/vppinfra/vec_bootstrap.h b/src/vppinfra/vec_bootstrap.h
index a94c1a1..5670415 100644
--- a/src/vppinfra/vec_bootstrap.h
+++ b/src/vppinfra/vec_bootstrap.h
@@ -58,7 +58,8 @@
   u8 hdr_size;	      /**< header size divided by VEC_MIN_ALIGN */
   u8 log2_align : 7;  /**< data alignment */
   u8 default_heap : 1; /**< vector uses default heap */
-  u8 vpad[2];	      /**< pad to 8 bytes */
+  u8 grow_elts;	       /**< number of elts vector can grow without realloc */
+  u8 vpad[1];	       /**< pad to 8 bytes */
   u8 vector_data[0];  /**< Vector data . */
 } vec_header_t;
 
@@ -168,18 +169,31 @@
 
 #define vec_max_len(v) _vec_max_len (v, _vec_elt_sz (v))
 
+static_always_inline void
+_vec_set_grow_elts (void *v, uword n_elts)
+{
+  uword max = pow2_mask (BITS (_vec_find (0)->grow_elts));
+
+  if (PREDICT_FALSE (n_elts > max))
+    n_elts = max;
+
+  _vec_find (v)->grow_elts = n_elts;
+}
+
 always_inline void
 _vec_set_len (void *v, uword len, uword elt_sz)
 {
   ASSERT (v);
   ASSERT (len <= _vec_max_len (v, elt_sz));
   uword old_len = _vec_len (v);
+  uword grow_elts = _vec_find (v)->grow_elts;
 
   if (len > old_len)
     clib_mem_unpoison (v + old_len * elt_sz, (len - old_len) * elt_sz);
   else if (len > old_len)
     clib_mem_poison (v + len * elt_sz, (old_len - len) * elt_sz);
 
+  _vec_set_grow_elts (v, old_len + grow_elts - len);
   _vec_find (v)->len = len;
 }