vlib: multiarch vlib_frame_queue_dequeue()

Fact that it needs to copy buffer indices justifies this move.

Type: improvement
Change-Id: I5eb815ccc4cca0ef70b092eb83a49b713efdcbeb
Signed-off-by: Damjan Marion <damarion@cisco.com>
diff --git a/src/vlib/buffer_funcs.c b/src/vlib/buffer_funcs.c
index a0edd7e..fcef2d8 100644
--- a/src/vlib/buffer_funcs.c
+++ b/src/vlib/buffer_funcs.c
@@ -361,6 +361,127 @@
 
 CLIB_MARCH_FN_REGISTRATION (vlib_buffer_enqueue_to_thread_fn);
 
+/*
+ * Check the frame queue to see if any frames are available.
+ * If so, pull the packets off the frames and put them to
+ * the handoff node.
+ */
+u32 __clib_section (".vlib_frame_queue_dequeue_fn")
+CLIB_MULTIARCH_FN (vlib_frame_queue_dequeue_fn)
+(vlib_main_t *vm, vlib_frame_queue_main_t *fqm)
+{
+  u32 thread_id = vm->thread_index;
+  vlib_frame_queue_t *fq = fqm->vlib_frame_queues[thread_id];
+  vlib_frame_queue_elt_t *elt;
+  u32 *from, *to;
+  vlib_frame_t *f;
+  int msg_type;
+  int processed = 0;
+  u32 vectors = 0;
+
+  ASSERT (fq);
+  ASSERT (vm == vlib_global_main.vlib_mains[thread_id]);
+
+  if (PREDICT_FALSE (fqm->node_index == ~0))
+    return 0;
+  /*
+   * Gather trace data for frame queues
+   */
+  if (PREDICT_FALSE (fq->trace))
+    {
+      frame_queue_trace_t *fqt;
+      frame_queue_nelt_counter_t *fqh;
+      u32 elix;
+
+      fqt = &fqm->frame_queue_traces[thread_id];
+
+      fqt->nelts = fq->nelts;
+      fqt->head = fq->head;
+      fqt->head_hint = fq->head_hint;
+      fqt->tail = fq->tail;
+      fqt->threshold = fq->vector_threshold;
+      fqt->n_in_use = fqt->tail - fqt->head;
+      if (fqt->n_in_use >= fqt->nelts)
+	{
+	  // if beyond max then use max
+	  fqt->n_in_use = fqt->nelts - 1;
+	}
+
+      /* Record the number of elements in use in the histogram */
+      fqh = &fqm->frame_queue_histogram[thread_id];
+      fqh->count[fqt->n_in_use]++;
+
+      /* Record a snapshot of the elements in use */
+      for (elix = 0; elix < fqt->nelts; elix++)
+	{
+	  elt = fq->elts + ((fq->head + 1 + elix) & (fq->nelts - 1));
+	  if (1 || elt->valid)
+	    {
+	      fqt->n_vectors[elix] = elt->n_vectors;
+	    }
+	}
+      fqt->written = 1;
+    }
+
+  while (1)
+    {
+      vlib_buffer_t *b;
+      if (fq->head == fq->tail)
+	{
+	  fq->head_hint = fq->head;
+	  return processed;
+	}
+
+      elt = fq->elts + ((fq->head + 1) & (fq->nelts - 1));
+
+      if (!elt->valid)
+	{
+	  fq->head_hint = fq->head;
+	  return processed;
+	}
+
+      from = elt->buffer_index;
+      msg_type = elt->msg_type;
+
+      ASSERT (msg_type == VLIB_FRAME_QUEUE_ELT_DISPATCH_FRAME);
+      ASSERT (elt->n_vectors <= VLIB_FRAME_SIZE);
+
+      f = vlib_get_frame_to_node (vm, fqm->node_index);
+
+      /* If the first vector is traced, set the frame trace flag */
+      b = vlib_get_buffer (vm, from[0]);
+      if (b->flags & VLIB_BUFFER_IS_TRACED)
+	f->frame_flags |= VLIB_NODE_FLAG_TRACE;
+
+      to = vlib_frame_vector_args (f);
+
+      vlib_buffer_copy_indices (to, from, elt->n_vectors);
+
+      vectors += elt->n_vectors;
+      f->n_vectors = elt->n_vectors;
+      vlib_put_frame_to_node (vm, fqm->node_index, f);
+
+      elt->valid = 0;
+      elt->n_vectors = 0;
+      elt->msg_type = 0xfefefefe;
+      CLIB_MEMORY_BARRIER ();
+      fq->head++;
+      processed++;
+
+      /*
+       * Limit the number of packets pushed into the graph
+       */
+      if (vectors >= fq->vector_threshold)
+	{
+	  fq->head_hint = fq->head;
+	  return processed;
+	}
+    }
+  ASSERT (0);
+  return processed;
+}
+CLIB_MARCH_FN_REGISTRATION (vlib_frame_queue_dequeue_fn);
+
 #ifndef CLIB_MARCH_VARIANT
 vlib_buffer_func_main_t vlib_buffer_func_main;
 
@@ -374,6 +495,8 @@
     CLIB_MARCH_FN_POINTER (vlib_buffer_enqueue_to_single_next_fn);
   bfm->buffer_enqueue_to_thread_fn =
     CLIB_MARCH_FN_POINTER (vlib_buffer_enqueue_to_thread_fn);
+  bfm->frame_queue_dequeue_fn =
+    CLIB_MARCH_FN_POINTER (vlib_frame_queue_dequeue_fn);
   return 0;
 }
 
diff --git a/src/vlib/buffer_funcs.h b/src/vlib/buffer_funcs.h
index 7829986..d579e1b 100644
--- a/src/vlib/buffer_funcs.h
+++ b/src/vlib/buffer_funcs.h
@@ -62,11 +62,16 @@
 typedef u32 (vlib_buffer_enqueue_to_thread_fn_t) (
   vlib_main_t *vm, u32 frame_queue_index, u32 *buffer_indices,
   u16 *thread_indices, u32 n_packets, int drop_on_congestion);
+
+typedef u32 (vlib_frame_queue_dequeue_fn_t) (vlib_main_t *vm,
+					     vlib_frame_queue_main_t *fqm);
+
 typedef struct
 {
   vlib_buffer_enqueue_to_next_fn_t *buffer_enqueue_to_next_fn;
   vlib_buffer_enqueue_to_single_next_fn_t *buffer_enqueue_to_single_next_fn;
   vlib_buffer_enqueue_to_thread_fn_t *buffer_enqueue_to_thread_fn;
+  vlib_frame_queue_dequeue_fn_t *frame_queue_dequeue_fn;
 } vlib_buffer_func_main_t;
 
 extern vlib_buffer_func_main_t vlib_buffer_func_main;
diff --git a/src/vlib/main.c b/src/vlib/main.c
index f833aa2..c7c4aba 100644
--- a/src/vlib/main.c
+++ b/src/vlib/main.c
@@ -1581,6 +1581,8 @@
       if (PREDICT_FALSE (vm->check_frame_queues + frame_queue_check_counter))
 	{
 	  u32 processed = 0;
+	  vlib_frame_queue_dequeue_fn_t *fn =
+	    vlib_buffer_func_main.frame_queue_dequeue_fn;
 
 	  if (vm->check_frame_queues)
 	    {
@@ -1589,7 +1591,7 @@
 	    }
 
 	  vec_foreach (fqm, tm->frame_queue_mains)
-	    processed += vlib_frame_queue_dequeue (vm, fqm);
+	    processed += (fn) (vm, fqm);
 
 	  /* No handoff queue work found? */
 	  if (processed)
diff --git a/src/vlib/threads.c b/src/vlib/threads.c
index 11d5a72..c65460e 100644
--- a/src/vlib/threads.c
+++ b/src/vlib/threads.c
@@ -1644,145 +1644,6 @@
   return;
 }
 
-/*
- * Check the frame queue to see if any frames are available.
- * If so, pull the packets off the frames and put them to
- * the handoff node.
- */
-int
-vlib_frame_queue_dequeue (vlib_main_t * vm, vlib_frame_queue_main_t * fqm)
-{
-  u32 thread_id = vm->thread_index;
-  vlib_frame_queue_t *fq = fqm->vlib_frame_queues[thread_id];
-  vlib_frame_queue_elt_t *elt;
-  u32 *from, *to;
-  vlib_frame_t *f;
-  int msg_type;
-  int processed = 0;
-  u32 n_left_to_node;
-  u32 vectors = 0;
-
-  ASSERT (fq);
-  ASSERT (vm == vlib_global_main.vlib_mains[thread_id]);
-
-  if (PREDICT_FALSE (fqm->node_index == ~0))
-    return 0;
-  /*
-   * Gather trace data for frame queues
-   */
-  if (PREDICT_FALSE (fq->trace))
-    {
-      frame_queue_trace_t *fqt;
-      frame_queue_nelt_counter_t *fqh;
-      u32 elix;
-
-      fqt = &fqm->frame_queue_traces[thread_id];
-
-      fqt->nelts = fq->nelts;
-      fqt->head = fq->head;
-      fqt->head_hint = fq->head_hint;
-      fqt->tail = fq->tail;
-      fqt->threshold = fq->vector_threshold;
-      fqt->n_in_use = fqt->tail - fqt->head;
-      if (fqt->n_in_use >= fqt->nelts)
-	{
-	  // if beyond max then use max
-	  fqt->n_in_use = fqt->nelts - 1;
-	}
-
-      /* Record the number of elements in use in the histogram */
-      fqh = &fqm->frame_queue_histogram[thread_id];
-      fqh->count[fqt->n_in_use]++;
-
-      /* Record a snapshot of the elements in use */
-      for (elix = 0; elix < fqt->nelts; elix++)
-	{
-	  elt = fq->elts + ((fq->head + 1 + elix) & (fq->nelts - 1));
-	  if (1 || elt->valid)
-	    {
-	      fqt->n_vectors[elix] = elt->n_vectors;
-	    }
-	}
-      fqt->written = 1;
-    }
-
-  while (1)
-    {
-      vlib_buffer_t *b;
-      if (fq->head == fq->tail)
-	{
-	  fq->head_hint = fq->head;
-	  return processed;
-	}
-
-      elt = fq->elts + ((fq->head + 1) & (fq->nelts - 1));
-
-      if (!elt->valid)
-	{
-	  fq->head_hint = fq->head;
-	  return processed;
-	}
-
-      from = elt->buffer_index;
-      msg_type = elt->msg_type;
-
-      ASSERT (msg_type == VLIB_FRAME_QUEUE_ELT_DISPATCH_FRAME);
-      ASSERT (elt->n_vectors <= VLIB_FRAME_SIZE);
-
-      f = vlib_get_frame_to_node (vm, fqm->node_index);
-
-      /* If the first vector is traced, set the frame trace flag */
-      b = vlib_get_buffer (vm, from[0]);
-      if (b->flags & VLIB_BUFFER_IS_TRACED)
-	f->frame_flags |= VLIB_NODE_FLAG_TRACE;
-
-      to = vlib_frame_vector_args (f);
-
-      n_left_to_node = elt->n_vectors;
-
-      while (n_left_to_node >= 4)
-	{
-	  to[0] = from[0];
-	  to[1] = from[1];
-	  to[2] = from[2];
-	  to[3] = from[3];
-	  to += 4;
-	  from += 4;
-	  n_left_to_node -= 4;
-	}
-
-      while (n_left_to_node > 0)
-	{
-	  to[0] = from[0];
-	  to++;
-	  from++;
-	  n_left_to_node--;
-	}
-
-      vectors += elt->n_vectors;
-      f->n_vectors = elt->n_vectors;
-      vlib_put_frame_to_node (vm, fqm->node_index, f);
-
-      elt->valid = 0;
-      elt->n_vectors = 0;
-      elt->msg_type = 0xfefefefe;
-      CLIB_MEMORY_BARRIER ();
-      fq->head++;
-      processed++;
-
-      /*
-       * Limit the number of packets pushed into the graph
-       */
-      if (vectors >= fq->vector_threshold)
-	{
-	  fq->head_hint = fq->head;
-	  return processed;
-	}
-    }
-  ASSERT (0);
-  return processed;
-}
-
 void
 vlib_worker_thread_fn (void *arg)
 {
diff --git a/src/vlib/threads.h b/src/vlib/threads.h
index 1da9d22..9d9d387 100644
--- a/src/vlib/threads.h
+++ b/src/vlib/threads.h
@@ -179,9 +179,6 @@
 			      u32 frame_queue_index, vlib_frame_t * frame,
 			      vlib_frame_queue_msg_type_t type);
 
-int
-vlib_frame_queue_dequeue (vlib_main_t * vm, vlib_frame_queue_main_t * fqm);
-
 void vlib_worker_thread_node_runtime_update (void);
 
 void vlib_create_worker_threads (vlib_main_t * vm, int n,