Stat segment / client: show run" works now

Seems to have minimal-to-zero performance consequences. Data appears
accurate: result match the debug CLI output. Checked at low rates, 27
MPPS sprayed across two worker threads.

Change-Id: I09ede5150b88a91547feeee448a2854997613004
Signed-off-by: Dave Barach <dave@barachs.net>
diff --git a/src/vlib/cli.c b/src/vlib/cli.c
index f684289..ca8d2ab 100644
--- a/src/vlib/cli.c
+++ b/src/vlib/cli.c
@@ -811,7 +811,7 @@
 
   while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
     {
-      if (!unformat (line_input, "%U", unformat_vlib_enable_disable, &enable))
+      if (unformat (line_input, "%U", unformat_vlib_enable_disable, &enable))
 	;
       else if (unformat (line_input, "api-segment"))
 	api_segment = 1;
diff --git a/src/vlib/node.c b/src/vlib/node.c
index cc1732b..805c69e 100644
--- a/src/vlib/node.c
+++ b/src/vlib/node.c
@@ -563,20 +563,20 @@
     }
 }
 
-vlib_node_t ***
-vlib_node_get_nodes (vlib_main_t * vm, u32 max_threads, int include_stats)
+void
+vlib_node_get_nodes (vlib_main_t * vm, u32 max_threads, int include_stats,
+		     int barrier_sync, vlib_node_t **** node_dupsp,
+		     vlib_main_t *** stat_vmsp)
 {
   vlib_node_main_t *nm = &vm->node_main;
   vlib_node_t *n;
-  static vlib_node_t ***node_dups;
+  vlib_node_t ***node_dups = *node_dupsp;
   vlib_node_t **nodes;
-  static vlib_main_t **stat_vms;
+  vlib_main_t **stat_vms = *stat_vmsp;
   vlib_main_t *stat_vm;
   uword i, j;
   u32 threads_to_serialize;
 
-  vec_reset_length (node_dups);
-
   if (vec_len (stat_vms) == 0)
     {
       for (i = 0; i < vec_len (vlib_mains); i++)
@@ -589,11 +589,14 @@
 
   threads_to_serialize = clib_min (max_threads, vec_len (stat_vms));
 
+  vec_validate (node_dups, threads_to_serialize - 1);
+
   /*
    * Barrier sync across stats scraping.
    * Otherwise, the counts will be grossly inaccurate.
    */
-  vlib_worker_thread_barrier_sync (vm);
+  if (barrier_sync)
+    vlib_worker_thread_barrier_sync (vm);
 
   for (j = 0; j < threads_to_serialize; j++)
     {
@@ -609,12 +612,17 @@
 	    }
 	}
 
-      nodes = vec_dup (nm->nodes);
-      vec_add1 (node_dups, nodes);
+      nodes = node_dups[j];
+      vec_validate (nodes, vec_len (nm->nodes) - 1);
+      clib_memcpy (nodes, nm->nodes, vec_len (nm->nodes) * sizeof (nodes[0]));
+      node_dups[j] = nodes;
     }
-  vlib_worker_thread_barrier_release (vm);
 
-  return node_dups;
+  if (barrier_sync)
+    vlib_worker_thread_barrier_release (vm);
+
+  *node_dupsp = node_dups;
+  *stat_vmsp = stat_vms;
 }
 
 clib_error_t *
diff --git a/src/vlib/node_funcs.h b/src/vlib/node_funcs.h
index 547f09b..bb302f7 100644
--- a/src/vlib/node_funcs.h
+++ b/src/vlib/node_funcs.h
@@ -1127,8 +1127,10 @@
 /**
  * Get list of nodes
  */
-vlib_node_t ***vlib_node_get_nodes (vlib_main_t * vm, u32 max_threads,
-				    int include_stats);
+void
+vlib_node_get_nodes (vlib_main_t * vm, u32 max_threads, int include_stats,
+		     int barrier_sync, vlib_node_t **** node_dupsp,
+		     vlib_main_t *** stat_vmsp);
 
 /* Query node given name. */
 vlib_node_t *vlib_get_node_by_name (vlib_main_t * vm, u8 * name);
diff --git a/src/vlib/threads.c b/src/vlib/threads.c
index bbe94c7..487c501 100644
--- a/src/vlib/threads.c
+++ b/src/vlib/threads.c
@@ -1492,6 +1492,18 @@
 
 }
 
+void vlib_stat_segment_lock (void) __attribute__ ((weak));
+void
+vlib_stat_segment_lock (void)
+{
+}
+
+void vlib_stat_segment_unlock (void) __attribute__ ((weak));
+void
+vlib_stat_segment_unlock (void)
+{
+}
+
 void
 vlib_worker_thread_barrier_release (vlib_main_t * vm)
 {
@@ -1521,6 +1533,13 @@
   /* Update (all) node runtimes before releasing the barrier, if needed */
   if (vm->need_vlib_worker_thread_node_runtime_update)
     {
+      /*
+       * Lock stat segment here, so we's safe when
+       * rebuilding the stat segment node clones from the
+       * stat thread...
+       */
+      vlib_stat_segment_lock ();
+
       /* Do stats elements on main thread */
       worker_thread_node_runtime_update_internal ();
       vm->need_vlib_worker_thread_node_runtime_update = 0;
@@ -1562,6 +1581,7 @@
 	      os_panic ();
 	    }
 	}
+      vlib_stat_segment_unlock ();
     }
 
   t_closed_total = now - vm->barrier_epoch;