Refork worker thread data structures in parallel (VPP-970)

Change the rebuilding of worker thread clone datastructures
to run in parallel on the workers, instead of serially
on main.

Change-Id: Ib76bcfbef1e51f2399972090f4057be7aaa84e08
Signed-off-by: Colin Tregenza Dancer <ctd@metaswitch.com>
diff --git a/src/vlib/threads.h b/src/vlib/threads.h
index 572ce77..c3f1cad 100644
--- a/src/vlib/threads.h
+++ b/src/vlib/threads.h
@@ -102,6 +102,7 @@
   vlib_thread_registration_t *registration;
   u8 *name;
   u64 barrier_sync_count;
+  volatile u32 *node_reforks_required;
 
   long lwp;
   int lcore_id;
@@ -180,6 +181,7 @@
 
 void vlib_worker_thread_barrier_sync (vlib_main_t * vm);
 void vlib_worker_thread_barrier_release (vlib_main_t * vm);
+void vlib_worker_thread_node_refork (void);
 
 static_always_inline uword
 vlib_get_thread_index (void)
@@ -369,6 +371,15 @@
       if (CLIB_DEBUG > 0)
 	vm->parked_at_barrier = 0;
       clib_smp_atomic_add (vlib_worker_threads->workers_at_barrier, -1);
+
+      if (PREDICT_FALSE (*vlib_worker_threads->node_reforks_required))
+	{
+	  vlib_worker_thread_node_refork ();
+	  clib_smp_atomic_add (vlib_worker_threads->node_reforks_required,
+			       -1);
+	  while (*vlib_worker_threads->node_reforks_required)
+	    ;
+	}
     }
 }