Move RPC calls off the binary API input queue

Change-Id: I2476e3e916a42b41d1e66bfc1ec4f8c4264c1720
Signed-off-by: Dave Barach <dbarach@cisco.com>
diff --git a/src/vlibmemory/memory_api.c b/src/vlibmemory/memory_api.c
index a444ec7..7a7644a 100644
--- a/src/vlibmemory/memory_api.c
+++ b/src/vlibmemory/memory_api.c
@@ -103,6 +103,14 @@
 	  break;
 	}
     }
+  if (vec_len (vm->pending_rpc_requests))
+    {
+      vm->queue_signal_pending = 1;
+      vm->api_queue_nonempty = 1;
+      vlib_process_signal_event (vm, vl_api_clnt_node.index,
+				 /* event_type */ QUEUE_SIGNAL_EVENT,
+				 /* event_data */ 0);
+    }
 }
 
 /*
@@ -704,6 +712,32 @@
 }
 
 int
+vl_mem_api_handle_rpc (vlib_main_t * vm, vlib_node_runtime_t * node)
+{
+  api_main_t *am = &api_main;
+  int i;
+  uword *tmp, mp;
+
+  /*
+   * Swap pending and processing vectors, then process the RPCs
+   * Avoid deadlock conditions by construction.
+   */
+  clib_spinlock_lock_if_init (&vm->pending_rpc_lock);
+  tmp = vm->processing_rpc_requests;
+  vec_reset_length (tmp);
+  vm->processing_rpc_requests = vm->pending_rpc_requests;
+  vm->pending_rpc_requests = tmp;
+  clib_spinlock_unlock_if_init (&vm->pending_rpc_lock);
+
+  for (i = 0; i < vec_len (vm->processing_rpc_requests); i++)
+    {
+      mp = vm->processing_rpc_requests[i];
+      vl_msg_api_handler_with_vm_node (am, (void *) mp, vm, node);
+    }
+  return 0;
+}
+
+int
 vl_mem_api_handle_msg_private (vlib_main_t * vm, vlib_node_runtime_t * node,
 			       u32 reg_index)
 {
diff --git a/src/vlibmemory/memory_api.h b/src/vlibmemory/memory_api.h
index 4cda04b..f658006 100644
--- a/src/vlibmemory/memory_api.h
+++ b/src/vlibmemory/memory_api.h
@@ -32,6 +32,8 @@
 int vl_mem_api_handle_msg_main (vlib_main_t * vm, vlib_node_runtime_t * node);
 int vl_mem_api_handle_msg_private (vlib_main_t * vm,
 				   vlib_node_runtime_t * node, u32 reg_index);
+int vl_mem_api_handle_rpc (vlib_main_t * vm, vlib_node_runtime_t * node);
+
 vl_api_registration_t *vl_mem_api_client_index_to_registration (u32 handle);
 void vl_mem_api_enable_disable (vlib_main_t * vm, int yesno);
 u32 vl_api_memclnt_create_internal (char *, svm_queue_t *);
diff --git a/src/vlibmemory/vlib_api.c b/src/vlibmemory/vlib_api.c
index b72f133..16e6402 100644
--- a/src/vlibmemory/vlib_api.c
+++ b/src/vlibmemory/vlib_api.c
@@ -346,7 +346,8 @@
       start_time = vlib_time_now (vm);
       while (1)
 	{
-	  if (vl_mem_api_handle_msg_main (vm, node))
+	  if (vl_mem_api_handle_rpc (vm, node)
+	      || vl_mem_api_handle_msg_main (vm, node))
 	    {
 	      vm->api_queue_nonempty = 0;
 	      VL_MEM_API_LOG_Q_LEN ("q-underflow: len %d", 0);
@@ -564,36 +565,14 @@
 void
 vl_api_send_pending_rpc_requests (vlib_main_t * vm)
 {
-  api_main_t *am = &api_main;
-  vl_shmem_hdr_t *shmem_hdr = am->shmem_hdr;
-  svm_queue_t *q;
-  int i;
+  vlib_main_t *vm_global = &vlib_global_main;
 
-  /*
-   * Use the "normal" control-plane mechanism for the main thread.
-   * Well, almost. if the main input queue is full, we cannot
-   * block. Otherwise, we can expect a barrier sync timeout.
-   */
-  q = shmem_hdr->vl_input_queue;
+  ASSERT (vm != vm_global);
 
-  for (i = 0; i < vec_len (vm->pending_rpc_requests); i++)
-    {
-      while (pthread_mutex_trylock (&q->mutex))
-	vlib_worker_thread_barrier_check ();
-
-      while (PREDICT_FALSE (svm_queue_is_full (q)))
-	{
-	  pthread_mutex_unlock (&q->mutex);
-	  vlib_worker_thread_barrier_check ();
-	  while (pthread_mutex_trylock (&q->mutex))
-	    vlib_worker_thread_barrier_check ();
-	}
-
-      vl_msg_api_send_shmem_nolock (q, (u8 *) (vm->pending_rpc_requests + i));
-
-      pthread_mutex_unlock (&q->mutex);
-    }
-  _vec_len (vm->pending_rpc_requests) = 0;
+  clib_spinlock_lock_if_init (&vm_global->pending_rpc_lock);
+  vec_append (vm_global->pending_rpc_requests, vm->pending_rpc_requests);
+  vec_reset_length (vm->pending_rpc_requests);
+  clib_spinlock_unlock_if_init (&vm_global->pending_rpc_lock);
 }
 
 always_inline void
@@ -601,6 +580,7 @@
 				    u8 force_rpc)
 {
   vl_api_rpc_call_t *mp;
+  vlib_main_t *vm_global = &vlib_global_main;
   vlib_main_t *vm = vlib_get_main ();
 
   /* Main thread and not a forced RPC: call the function directly */
@@ -626,7 +606,12 @@
   mp->function = pointer_to_uword (fp);
   mp->need_barrier_sync = 1;
 
+  /* Add to the pending vector. Thread 0 requires locking. */
+  if (vm == vm_global)
+    clib_spinlock_lock_if_init (&vm_global->pending_rpc_lock);
   vec_add1 (vm->pending_rpc_requests, (uword) mp);
+  if (vm == vm_global)
+    clib_spinlock_unlock_if_init (&vm_global->pending_rpc_lock);
 }
 
 /*