vlib: introduce vlib frame aux data

Type: improvement
Change-Id: I53890a13210cfb0d2b2d9d8cfd9b15118d3bb273
Signed-off-by: Damjan Marion <damarion@cisco.com>
diff --git a/src/vlib/main.c b/src/vlib/main.c
index 2f21995..189884a 100644
--- a/src/vlib/main.c
+++ b/src/vlib/main.c
@@ -45,71 +45,12 @@
 
 #include <vlib/unix/unix.h>
 
-/* Actually allocate a few extra slots of vector data to support
-   speculative vector enqueues which overflow vector data in next frame. */
-#define VLIB_FRAME_SIZE_ALLOC (VLIB_FRAME_SIZE + 4)
-
-always_inline u32
-vlib_frame_bytes (u32 n_scalar_bytes, u32 n_vector_bytes)
-{
-  u32 n_bytes;
-
-  /* Make room for vlib_frame_t plus scalar arguments. */
-  n_bytes = vlib_frame_vector_byte_offset (n_scalar_bytes);
-
-  /* Make room for vector arguments.
-     Allocate a few extra slots of vector data to support
-     speculative vector enqueues which overflow vector data in next frame. */
-#define VLIB_FRAME_SIZE_EXTRA 4
-  n_bytes += (VLIB_FRAME_SIZE + VLIB_FRAME_SIZE_EXTRA) * n_vector_bytes;
-
-  /* Magic number is first 32bit number after vector data.
-     Used to make sure that vector data is never overrun. */
 #define VLIB_FRAME_MAGIC (0xabadc0ed)
-  n_bytes += sizeof (u32);
-
-  /* Pad to cache line. */
-  n_bytes = round_pow2 (n_bytes, CLIB_CACHE_LINE_BYTES);
-
-  return n_bytes;
-}
 
 always_inline u32 *
 vlib_frame_find_magic (vlib_frame_t * f, vlib_node_t * node)
 {
-  void *p = f;
-
-  p += vlib_frame_vector_byte_offset (node->scalar_size);
-
-  p += (VLIB_FRAME_SIZE + VLIB_FRAME_SIZE_EXTRA) * node->vector_size;
-
-  return p;
-}
-
-static inline vlib_frame_size_t *
-get_frame_size_info (vlib_node_main_t * nm,
-		     u32 n_scalar_bytes, u32 n_vector_bytes)
-{
-#ifdef VLIB_SUPPORTS_ARBITRARY_SCALAR_SIZES
-  uword key = (n_scalar_bytes << 16) | n_vector_bytes;
-  uword *p, i;
-
-  p = hash_get (nm->frame_size_hash, key);
-  if (p)
-    i = p[0];
-  else
-    {
-      i = vec_len (nm->frame_sizes);
-      vec_validate (nm->frame_sizes, i);
-      hash_set (nm->frame_size_hash, key, i);
-    }
-
-  return vec_elt_at_index (nm->frame_sizes, i);
-#else
-  ASSERT (vlib_frame_bytes (n_scalar_bytes, n_vector_bytes)
-	  == (vlib_frame_bytes (0, 4)));
-  return vec_elt_at_index (nm->frame_sizes, 0);
-#endif
+  return (void *) f + node->magic_offset;
 }
 
 static vlib_frame_t *
@@ -120,17 +61,21 @@
   vlib_frame_size_t *fs;
   vlib_node_t *to_node;
   vlib_frame_t *f;
-  u32 l, n, scalar_size, vector_size;
+  u32 l, n;
 
   ASSERT (vm == vlib_get_main ());
 
   to_node = vlib_get_node (vm, to_node_index);
 
-  scalar_size = to_node->scalar_size;
-  vector_size = to_node->vector_size;
+  vec_validate (nm->frame_sizes, to_node->frame_size_index);
+  fs = vec_elt_at_index (nm->frame_sizes, to_node->frame_size_index);
 
-  fs = get_frame_size_info (nm, scalar_size, vector_size);
-  n = vlib_frame_bytes (scalar_size, vector_size);
+  if (fs->frame_size == 0)
+    fs->frame_size = to_node->frame_size;
+  else
+    ASSERT (fs->frame_size == to_node->frame_size);
+
+  n = fs->frame_size;
   if ((l = vec_len (fs->free_frames)) > 0)
     {
       /* Allocate from end of free list. */
@@ -139,12 +84,12 @@
     }
   else
     {
-      f = clib_mem_alloc_aligned_no_fail (n, VLIB_FRAME_ALIGN);
+      f = clib_mem_alloc_aligned_no_fail (n, CLIB_CACHE_LINE_BYTES);
     }
 
   /* Poison frame when debugging. */
   if (CLIB_DEBUG > 0)
-    clib_memset (f, 0xfe, n);
+    clib_memset_u8 (f, 0xfe, n);
 
   /* Insert magic number. */
   {
@@ -156,8 +101,9 @@
 
   f->frame_flags = VLIB_FRAME_IS_ALLOCATED | frame_flags;
   f->n_vectors = 0;
-  f->scalar_size = scalar_size;
-  f->vector_size = vector_size;
+  f->scalar_offset = to_node->scalar_offset;
+  f->vector_offset = to_node->vector_offset;
+  f->aux_offset = to_node->aux_offset;
   f->flags = 0;
 
   fs->n_alloc_frames += 1;
@@ -249,7 +195,7 @@
   ASSERT (f->frame_flags & VLIB_FRAME_IS_ALLOCATED);
 
   node = vlib_get_node (vm, r->node_index);
-  fs = get_frame_size_info (nm, node->scalar_size, node->vector_size);
+  fs = vec_elt_at_index (nm->frame_sizes, node->frame_size_index);
 
   ASSERT (f->frame_flags & VLIB_FRAME_IS_ALLOCATED);
 
@@ -271,19 +217,24 @@
 show_frame_stats (vlib_main_t * vm,
 		  unformat_input_t * input, vlib_cli_command_t * cmd)
 {
-  vlib_node_main_t *nm = &vm->node_main;
   vlib_frame_size_t *fs;
 
-  vlib_cli_output (vm, "%=6s%=12s%=12s", "Size", "# Alloc", "# Free");
-  vec_foreach (fs, nm->frame_sizes)
-  {
-    u32 n_alloc = fs->n_alloc_frames;
-    u32 n_free = vec_len (fs->free_frames);
+  vlib_cli_output (vm, "%=8s%=6s%=12s%=12s", "Thread", "Size", "# Alloc",
+		   "# Free");
+  foreach_vlib_main ()
+    {
+      vlib_node_main_t *nm = &this_vlib_main->node_main;
+      vec_foreach (fs, nm->frame_sizes)
+	{
+	  u32 n_alloc = fs->n_alloc_frames;
+	  u32 n_free = vec_len (fs->free_frames);
 
-    if (n_alloc + n_free > 0)
-      vlib_cli_output (vm, "%=6d%=12d%=12d",
-		       fs - nm->frame_sizes, n_alloc, n_free);
-  }
+	  if (n_alloc + n_free > 0)
+	    vlib_cli_output (vm, "%=8d%=6d%=12d%=12d",
+			     this_vlib_main->thread_index, fs->frame_size,
+			     n_alloc, n_free);
+	}
+    }
 
   return 0;
 }
diff --git a/src/vlib/node.c b/src/vlib/node.c
index f4329e7..41b9ee7 100644
--- a/src/vlib/node.c
+++ b/src/vlib/node.c
@@ -333,6 +333,7 @@
 {
   vlib_node_main_t *nm = &vm->node_main;
   vlib_node_t *n;
+  u32 size;
   int i;
 
   if (CLIB_DEBUG > 0)
@@ -400,13 +401,66 @@
   _(type);
   _(flags);
   _(state);
-  _(scalar_size);
-  _(vector_size);
   _(format_buffer);
   _(unformat_buffer);
   _(format_trace);
   _(validate_frame);
 
+  size = round_pow2 (sizeof (vlib_frame_t), VLIB_FRAME_DATA_ALIGN);
+
+  /* scalar data size */
+  if (r->scalar_size)
+    {
+      n->scalar_offset = size;
+      size += round_pow2 (r->scalar_size, VLIB_FRAME_DATA_ALIGN);
+    }
+  else
+    n->scalar_offset = 0;
+
+  /* Vecor data size */
+  n->vector_offset = size;
+  size += r->vector_size * VLIB_FRAME_SIZE;
+
+  /* Allocate a few extra slots of vector data to support
+     speculative vector enqueues which overflow vector data in next frame. */
+  size += r->vector_size * VLIB_FRAME_SIZE_EXTRA;
+
+  /* space for VLIB_FRAME_MAGIC */
+  n->magic_offset = size;
+  size += sizeof (u32);
+
+  /* round size to VLIB_FRAME_DATA_ALIGN */
+  size = round_pow2 (size, VLIB_FRAME_DATA_ALIGN);
+
+  if (r->aux_size)
+    {
+      n->aux_offset = size;
+      size += r->aux_size * VLIB_FRAME_SIZE;
+    }
+  else
+    n->aux_offset = 0;
+
+  /* final size */
+  n->frame_size = size = round_pow2 (size, CLIB_CACHE_LINE_BYTES);
+  ASSERT (size <= __UINT16_MAX__);
+
+  vlib_frame_size_t *fs = 0;
+
+  n->frame_size_index = (u16) ~0;
+  vec_foreach (fs, nm->frame_sizes)
+    if (fs->frame_size == size)
+      {
+	n->frame_size_index = fs - nm->frame_sizes;
+	break;
+      }
+
+  if (n->frame_size_index == (u16) ~0)
+    {
+      vec_add2 (nm->frame_sizes, fs, 1);
+      fs->frame_size = size;
+      n->frame_size_index = fs - nm->frame_sizes;
+    }
+
   /* Register error counters. */
   vlib_register_errors (vm, n->index, r->n_errors, r->error_strings,
 			r->error_counters);
@@ -669,10 +723,6 @@
   vlib_node_t *n;
   uword ni;
 
-  nm->frame_sizes = vec_new (vlib_frame_size_t, 1);
-#ifdef VLIB_SUPPORTS_ARBITRARY_SCALAR_SIZES
-  nm->frame_size_hash = hash_create (0, sizeof (uword));
-#endif
   nm->flags |= VLIB_NODE_MAIN_RUNTIME_STARTED;
 
   /* Generate sibling relationships */
diff --git a/src/vlib/node.h b/src/vlib/node.h
index 75a0adb..66a9992 100644
--- a/src/vlib/node.h
+++ b/src/vlib/node.h
@@ -149,7 +149,7 @@
   u8 protocol_hint;
 
   /* Size of scalar and vector arguments in bytes. */
-  u16 scalar_size, vector_size;
+  u8 scalar_size, vector_size, aux_size;
 
   /* Number of error codes used by this node. */
   u16 n_errors;
@@ -309,7 +309,8 @@
   u16 n_errors;
 
   /* Size of scalar and vector arguments in bytes. */
-  u16 scalar_size, vector_size;
+  u16 frame_size, scalar_offset, vector_offset, magic_offset, aux_offset;
+  u16 frame_size_index;
 
   /* Handle/index in error heap for this node. */
   u32 error_heap_handle;
@@ -367,7 +368,10 @@
 
 /* Max number of vector elements to process at once per node. */
 #define VLIB_FRAME_SIZE 256
-#define VLIB_FRAME_ALIGN CLIB_CACHE_LINE_BYTES
+/* Number of extra elements allocated at the end of vecttor. */
+#define VLIB_FRAME_SIZE_EXTRA 4
+/* Frame data alignment */
+#define VLIB_FRAME_DATA_ALIGN 16
 
 /* Calling frame (think stack frame) for a node. */
 typedef struct vlib_frame_t
@@ -378,11 +382,8 @@
   /* User flags. Used for sending hints to the next node. */
   u16 flags;
 
-  /* Number of scalar bytes in arguments. */
-  u8 scalar_size;
-
-  /* Number of bytes per vector argument. */
-  u8 vector_size;
+  /* Scalar, vector and aux offsets in this frame. */
+  u16 scalar_offset, vector_offset, aux_offset;
 
   /* Number of vector elements currently in frame. */
   u16 n_vectors;
@@ -521,10 +522,15 @@
   /* Number of allocated frames for this scalar/vector size. */
   u32 n_alloc_frames;
 
+  /* Frame size */
+  u16 frame_size;
+
   /* Vector of free frames for this scalar/vector size. */
   vlib_frame_t **free_frames;
 } vlib_frame_size_t;
 
+STATIC_ASSERT_SIZEOF (vlib_frame_size_t, 16);
+
 typedef struct
 {
   /* Users opaque value for event type. */
@@ -721,9 +727,6 @@
   /* Current counts of nodes in each state. */
   u32 input_node_counts_by_state[VLIB_N_NODE_STATE];
 
-  /* Hash of (scalar_size,vector_size) to frame_sizes index. */
-  uword *frame_size_hash;
-
   /* Per-size frame allocation information. */
   vlib_frame_size_t *frame_sizes;
 
diff --git a/src/vlib/node_funcs.h b/src/vlib/node_funcs.h
index b1d5c7b..46db46d 100644
--- a/src/vlib/node_funcs.h
+++ b/src/vlib/node_funcs.h
@@ -283,16 +283,6 @@
   f->frame_flags |= VLIB_FRAME_NO_APPEND;
 }
 
-/* Byte alignment for vector arguments. */
-#define VLIB_FRAME_VECTOR_ALIGN (1 << 4)
-
-always_inline u32
-vlib_frame_vector_byte_offset (u32 scalar_size)
-{
-  return round_pow2 (sizeof (vlib_frame_t) + scalar_size,
-		     VLIB_FRAME_VECTOR_ALIGN);
-}
-
 /** \brief Get pointer to frame vector data.
  @param f vlib_frame_t pointer
  @return pointer to first vector element in frame
@@ -300,7 +290,19 @@
 always_inline void *
 vlib_frame_vector_args (vlib_frame_t * f)
 {
-  return (void *) f + vlib_frame_vector_byte_offset (f->scalar_size);
+  ASSERT (f->vector_offset);
+  return (void *) f + f->vector_offset;
+}
+
+/** \brief Get pointer to frame vector aux data.
+ @param f vlib_frame_t pointer
+ @return pointer to first vector aux data element in frame
+*/
+always_inline void *
+vlib_frame_aux_args (vlib_frame_t *f)
+{
+  ASSERT (f->aux_offset);
+  return (void *) f + f->aux_offset;
 }
 
 /** \brief Get pointer to frame scalar data.
@@ -314,7 +316,8 @@
 always_inline void *
 vlib_frame_scalar_args (vlib_frame_t * f)
 {
-  return vlib_frame_vector_args (f) - f->scalar_size;
+  ASSERT (f->scalar_offset);
+  return (void *) f + f->scalar_offset;
 }
 
 always_inline vlib_next_frame_t *
diff --git a/src/vlib/threads.c b/src/vlib/threads.c
index f45e935..760aa8b 100644
--- a/src/vlib/threads.c
+++ b/src/vlib/threads.c
@@ -626,8 +626,9 @@
 
 	      vm_clone->thread_index = worker_thread_index;
 	      vm_clone->heap_base = w->thread_mheap;
-	      vm_clone->heap_aligned_base = (void *)
-		(((uword) w->thread_mheap) & ~(VLIB_FRAME_ALIGN - 1));
+	      vm_clone->heap_aligned_base =
+		(void *) (((uword) w->thread_mheap) &
+			  ~(CLIB_CACHE_LINE_BYTES - 1));
 	      vm_clone->pending_rpc_requests = 0;
 	      vec_validate (vm_clone->pending_rpc_requests, 0);
 	      _vec_len (vm_clone->pending_rpc_requests) = 0;
@@ -730,10 +731,7 @@
 						     CLIB_CACHE_LINE_BYTES);
 
 	      /* Create per-thread frame freelist */
-	      nm_clone->frame_sizes = vec_new (vlib_frame_size_t, 1);
-#ifdef VLIB_SUPPORTS_ARBITRARY_SCALAR_SIZES
-	      nm_clone->frame_size_hash = hash_create (0, sizeof (uword));
-#endif
+	      nm_clone->frame_sizes = 0;
 	      nm_clone->node_by_error = nm->node_by_error;
 
 	      /* Packet trace buffers are guaranteed to be empty, nothing to do here */
diff --git a/src/vlib/unix/main.c b/src/vlib/unix/main.c
index 4ef9665..69959fd 100644
--- a/src/vlib/unix/main.c
+++ b/src/vlib/unix/main.c
@@ -704,8 +704,8 @@
   vm->argv = (u8 **) argv;
   vgm->name = argv[0];
   vm->heap_base = clib_mem_get_heap ();
-  vm->heap_aligned_base = (void *)
-    (((uword) vm->heap_base) & ~(VLIB_FRAME_ALIGN - 1));
+  vm->heap_aligned_base =
+    (void *) (((uword) vm->heap_base) & ~(CLIB_CACHE_LINE_BYTES - 1));
   ASSERT (vm->heap_base);
 
   clib_time_init (&vm->clib_time);