vlib: calculate per-worker loops/second metric

Use exponential smoothing. Each sample has a half-life of 1
second. reported_rate(t) = reported_rate(t-1) * K + rate(t)*(1-K)

Sample every 20ms, i.e. 50 samples per second
  K = exp (-1.0/20.0);
  K = 0.95;

Type: feature

Signed-off-by: Dave Barach <dave@barachs.net>
Change-Id: I9aea5dd5fecfaefffb78245316adb4bf62eb2bd4
diff --git a/src/vlib/main.c b/src/vlib/main.c
index 94218d3..f723d10 100644
--- a/src/vlib/main.c
+++ b/src/vlib/main.c
@@ -1683,6 +1683,7 @@
   vlib_thread_main_t *tm = vlib_get_thread_main ();
   uword i;
   u64 cpu_time_now;
+  f64 now;
   vlib_frame_queue_main_t *fqm;
   u32 *last_node_runtime_indices = 0;
   u32 frame_queue_check_counter = 0;
@@ -1926,6 +1927,33 @@
       /* Record time stamp in case there are no enabled nodes and above
          calls do not update time stamp. */
       cpu_time_now = clib_cpu_time_now ();
+      vm->loops_this_reporting_interval++;
+      now = clib_time_now_internal (&vm->clib_time, cpu_time_now);
+      /* Time to update loops_per_second? */
+      if (PREDICT_FALSE (now >= vm->loop_interval_end))
+	{
+	  /* Next sample ends in 20ms */
+	  if (vm->loop_interval_start)
+	    {
+	      f64 this_loops_per_second;
+
+	      this_loops_per_second =
+		((f64) vm->loops_this_reporting_interval) / (now -
+							     vm->loop_interval_start);
+
+	      vm->loops_per_second =
+		vm->loops_per_second * vm->damping_constant +
+		(1.0 - vm->damping_constant) * this_loops_per_second;
+	      if (vm->loops_per_second != 0.0)
+		vm->seconds_per_loop = 1.0 / vm->loops_per_second;
+	      else
+		vm->seconds_per_loop = 0.0;
+	    }
+	  /* New interval starts now, and ends in 20ms */
+	  vm->loop_interval_start = now;
+	  vm->loop_interval_end = now + 2e-4;
+	  vm->loops_this_reporting_interval = 0;
+	}
     }
 }
 
@@ -2122,6 +2150,16 @@
   if ((error = vlib_call_all_config_functions (vm, input, 0 /* is_early */ )))
     goto done;
 
+  /*
+   * Use exponential smoothing, with a half-life of 1 second
+   * reported_rate(t) = reported_rate(t-1) * K + rate(t)*(1-K)
+   *
+   * Sample every 20ms, aka 50 samples per second
+   * K = exp (-1.0/20.0);
+   * K = 0.95
+   */
+  vm->damping_constant = exp (-1.0 / 20.0);
+
   /* Sort per-thread init functions before we start threads */
   vlib_sort_init_exit_functions (&vm->worker_init_function_registrations);
 
diff --git a/src/vlib/main.h b/src/vlib/main.h
index af6539e..0e65c81 100644
--- a/src/vlib/main.h
+++ b/src/vlib/main.h
@@ -251,6 +251,14 @@
    */
   int need_vlib_worker_thread_node_runtime_update;
 
+  /* Dispatch loop time accounting */
+  u64 loops_this_reporting_interval;
+  f64 loop_interval_end;
+  f64 loop_interval_start;
+  f64 loops_per_second;
+  f64 seconds_per_loop;
+  f64 damping_constant;
+
   /*
    * Barrier epoch - Set to current time, each time barrier_sync or
    * barrier_release is called with zero recursion.
diff --git a/src/vlib/node_cli.c b/src/vlib/node_cli.c
index bf18702..1ba0575 100644
--- a/src/vlib/node_cli.c
+++ b/src/vlib/node_cli.c
@@ -442,11 +442,12 @@
 	  dt = time_now - nm->time_last_runtime_stats_clear;
 	  vlib_cli_output
 	    (vm,
-	     "Time %.1f, %f sec internal node vector rate %.2f \n"
+	     "Time %.1f, %f sec internal node vector rate %.2f loops/sec %.2f\n"
 	     "  vector rates in %.4e, out %.4e, drop %.4e, punt %.4e",
 	     dt,
 	     vlib_get_stat_segment_update_rate (),
 	     internal_node_vector_rates[j],
+	     stat_vm->loops_per_second,
 	     (f64) n_input / dt,
 	     (f64) n_output / dt, (f64) n_drop / dt, (f64) n_punt / dt);