vlib: improve summary vector-rate statistics

Type: refactor

Signed-off-by: Dave Barach <dave@barachs.net>
Change-Id: I4b77879b0a84fdec3c1518a972cf003d5135222d
Signed-off-by: Ole Troan <ot@cisco.com>
diff --git a/src/vlib/main.c b/src/vlib/main.c
index f49790d..dc11128 100644
--- a/src/vlib/main.c
+++ b/src/vlib/main.c
@@ -52,8 +52,6 @@
    speculative vector enqueues which overflow vector data in next frame. */
 #define VLIB_FRAME_SIZE_ALLOC (VLIB_FRAME_SIZE + 4)
 
-u32 wraps;
-
 always_inline u32
 vlib_frame_bytes (u32 n_scalar_bytes, u32 n_vector_bytes)
 {
@@ -1376,6 +1374,12 @@
 				   VLIB_NODE_TYPE_INTERNAL,
 				   VLIB_NODE_STATE_POLLING,
 				   f, last_time_stamp);
+  /* Internal node vector-rate accounting, for summary stats */
+  vm->internal_node_vectors += f->n_vectors;
+  vm->internal_node_calls++;
+  vm->internal_node_last_vectors_per_main_loop =
+    (f->n_vectors > vm->internal_node_last_vectors_per_main_loop) ?
+    f->n_vectors : vm->internal_node_last_vectors_per_main_loop;
 
   f->frame_flags &= ~(VLIB_FRAME_PENDING | VLIB_FRAME_NO_APPEND);
 
@@ -1915,7 +1919,6 @@
 	    }
 	}
       vlib_increment_main_loop_counter (vm);
-
       /* Record time stamp in case there are no enabled nodes and above
          calls do not update time stamp. */
       cpu_time_now = clib_cpu_time_now ();
diff --git a/src/vlib/main.h b/src/vlib/main.h
index 2b50b4e..e230ddf 100644
--- a/src/vlib/main.h
+++ b/src/vlib/main.h
@@ -95,11 +95,14 @@
   u32 main_loop_vectors_processed;
   u32 main_loop_nodes_processed;
 
-  /* Circular buffer of input node vector counts.
-     Indexed by low bits of
-     (main_loop_count >> VLIB_LOG2_INPUT_VECTORS_PER_MAIN_LOOP). */
-  u32 vector_counts_per_main_loop[2];
-  u32 node_counts_per_main_loop[2];
+  /* Internal node vectors, calls */
+  u64 internal_node_vectors;
+  u64 internal_node_calls;
+  u64 internal_node_vectors_last_clear;
+  u64 internal_node_calls_last_clear;
+
+  /* Instantaneous vector rate */
+  u32 internal_node_last_vectors_per_main_loop;
 
   /* Main loop hw / sw performance counters */
   void (**vlib_node_runtime_perf_counter_cbs) (struct vlib_main_t *,
@@ -323,75 +326,46 @@
   vlib_panic_with_error (vm, 0);
 }
 
-always_inline u32
-vlib_vector_input_stats_index (vlib_main_t * vm, word delta)
-{
-  u32 i;
-  i = vm->main_loop_count >> VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE;
-  ASSERT (is_pow2 (ARRAY_LEN (vm->vector_counts_per_main_loop)));
-  return (i + delta) & (ARRAY_LEN (vm->vector_counts_per_main_loop) - 1);
-}
 
-/* Estimate input rate based on previous
-   2^VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE
-   samples. */
-always_inline u32
-vlib_last_vectors_per_main_loop (vlib_main_t * vm)
-{
-  u32 i = vlib_vector_input_stats_index (vm, -1);
-  u32 n = vm->vector_counts_per_main_loop[i];
-  return n >> VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE;
-}
-
-/* Total ave vector count per iteration of main loop. */
 always_inline f64
-vlib_last_vectors_per_main_loop_as_f64 (vlib_main_t * vm)
+vlib_internal_node_vector_rate (vlib_main_t * vm)
 {
-  u32 i = vlib_vector_input_stats_index (vm, -1);
-  u32 v = vm->vector_counts_per_main_loop[i];
-  return (f64) v / (f64) (1 << VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE);
+  u64 vectors;
+  u64 calls;
+
+  calls = vm->internal_node_calls - vm->internal_node_calls_last_clear;
+
+  if (PREDICT_FALSE (calls == 0))
+    return 0.0;
+
+  vectors = vm->internal_node_vectors - vm->internal_node_vectors_last_clear;
+
+  return (f64) vectors / (f64) calls;
 }
 
-/* Total ave vectors/node count per iteration of main loop. */
-always_inline f64
-vlib_last_vector_length_per_node (vlib_main_t * vm)
+always_inline void
+vlib_clear_internal_node_vector_rate (vlib_main_t * vm)
 {
-  u32 i = vlib_vector_input_stats_index (vm, -1);
-  u32 v = vm->vector_counts_per_main_loop[i];
-  u32 n = vm->node_counts_per_main_loop[i];
-  return n == 0 ? 0 : (f64) v / (f64) n;
+  vm->internal_node_calls_last_clear = vm->internal_node_calls;
+  vm->internal_node_vectors_last_clear = vm->internal_node_vectors;
 }
 
-extern u32 wraps;
-
 always_inline void
 vlib_increment_main_loop_counter (vlib_main_t * vm)
 {
-  u32 i, c, n, v, is_wrap;
-
-  c = vm->main_loop_count++;
-
-  is_wrap = (c & pow2_mask (VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE)) == 0;
-
-  if (is_wrap)
-    wraps++;
-
-  i = vlib_vector_input_stats_index (vm, /* delta */ is_wrap);
-
-  v = is_wrap ? 0 : vm->vector_counts_per_main_loop[i];
-  n = is_wrap ? 0 : vm->node_counts_per_main_loop[i];
-
-  v += vm->main_loop_vectors_processed;
-  n += vm->main_loop_nodes_processed;
-  vm->main_loop_vectors_processed = 0;
-  vm->main_loop_nodes_processed = 0;
-  vm->vector_counts_per_main_loop[i] = v;
-  vm->node_counts_per_main_loop[i] = n;
+  vm->main_loop_count++;
+  vm->internal_node_last_vectors_per_main_loop = 0;
 
   if (PREDICT_FALSE (vm->main_loop_exit_now))
     clib_longjmp (&vm->main_loop_exit, VLIB_MAIN_LOOP_EXIT_CLI);
 }
 
+always_inline u32
+vlib_last_vectors_per_main_loop (vlib_main_t * vm)
+{
+  return vm->internal_node_last_vectors_per_main_loop;
+}
+
 always_inline void vlib_set_queue_signal_callback
   (vlib_main_t * vm, void (*fp) (vlib_main_t *))
 {
diff --git a/src/vlib/node_cli.c b/src/vlib/node_cli.c
index 58b63c3..5f0617d 100644
--- a/src/vlib/node_cli.c
+++ b/src/vlib/node_cli.c
@@ -299,6 +299,13 @@
   return s;
 }
 
+f64 vlib_get_stat_segment_update_rate (void) __attribute__ ((weak));
+f64
+vlib_get_stat_segment_update_rate (void)
+{
+  return 1e70;
+}
+
 static clib_error_t *
 show_node_runtime (vlib_main_t * vm,
 		   unformat_input_t * input, vlib_cli_command_t * cmd)
@@ -308,8 +315,7 @@
   f64 time_now;
   u32 node_index;
   vlib_node_t ***node_dups = 0;
-  f64 *vectors_per_main_loop = 0;
-  f64 *last_vector_length_per_node = 0;
+  f64 *internal_node_vector_rates = 0;
 
   time_now = vlib_time_now (vm);
 
@@ -367,10 +373,8 @@
 	  nodes = vec_dup (nm->nodes);
 
 	  vec_add1 (node_dups, nodes);
-	  vec_add1 (vectors_per_main_loop,
-		    vlib_last_vectors_per_main_loop_as_f64 (stat_vm));
-	  vec_add1 (last_vector_length_per_node,
-		    vlib_last_vector_length_per_node (stat_vm));
+	  vec_add1 (internal_node_vector_rates,
+		    vlib_internal_node_vector_rate (stat_vm));
 	}
       vlib_worker_thread_barrier_release (vm);
 
@@ -434,15 +438,11 @@
 	  dt = time_now - nm->time_last_runtime_stats_clear;
 	  vlib_cli_output
 	    (vm,
-	     "Time %.1f, average vectors/node %.2f, last %d main loops %.2f per node %.2f"
-	     "\n  vector rates in %.4e, out %.4e, drop %.4e, punt %.4e",
+	     "Time %.1f, %f sec internal node vector rate %.2f \n"
+	     "  vector rates in %.4e, out %.4e, drop %.4e, punt %.4e",
 	     dt,
-	     (n_internal_calls > 0
-	      ? (f64) n_internal_vectors / (f64) n_internal_calls
-	      : 0),
-	     1 << VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE,
-	     vectors_per_main_loop[j],
-	     last_vector_length_per_node[j],
+	     vlib_get_stat_segment_update_rate (),
+	     internal_node_vector_rates[j],
 	     (f64) n_input / dt,
 	     (f64) n_output / dt, (f64) n_drop / dt, (f64) n_punt / dt);
 
@@ -465,8 +465,7 @@
 	}
       vec_free (stat_vms);
       vec_free (node_dups);
-      vec_free (vectors_per_main_loop);
-      vec_free (last_vector_length_per_node);
+      vec_free (internal_node_vector_rates);
     }
 
   return 0;
diff --git a/src/vlib/threads.c b/src/vlib/threads.c
index 7454d5a..07e1d79 100644
--- a/src/vlib/threads.c
+++ b/src/vlib/threads.c
@@ -1429,7 +1429,7 @@
   for (i = 1; i < vec_len (vlib_mains); i++)
     max_vector_rate =
       clib_max (max_vector_rate,
-		vlib_last_vectors_per_main_loop_as_f64 (vlib_mains[i]));
+		(f64) vlib_last_vectors_per_main_loop (vlib_mains[i]));
 
   vlib_worker_threads[0].barrier_sync_count++;