vlib: improve summary vector-rate statistics
Type: refactor
Signed-off-by: Dave Barach <dave@barachs.net>
Change-Id: I4b77879b0a84fdec3c1518a972cf003d5135222d
Signed-off-by: Ole Troan <ot@cisco.com>
diff --git a/src/vlib/main.c b/src/vlib/main.c
index f49790d..dc11128 100644
--- a/src/vlib/main.c
+++ b/src/vlib/main.c
@@ -52,8 +52,6 @@
speculative vector enqueues which overflow vector data in next frame. */
#define VLIB_FRAME_SIZE_ALLOC (VLIB_FRAME_SIZE + 4)
-u32 wraps;
-
always_inline u32
vlib_frame_bytes (u32 n_scalar_bytes, u32 n_vector_bytes)
{
@@ -1376,6 +1374,12 @@
VLIB_NODE_TYPE_INTERNAL,
VLIB_NODE_STATE_POLLING,
f, last_time_stamp);
+ /* Internal node vector-rate accounting, for summary stats */
+ vm->internal_node_vectors += f->n_vectors;
+ vm->internal_node_calls++;
+ vm->internal_node_last_vectors_per_main_loop =
+ (f->n_vectors > vm->internal_node_last_vectors_per_main_loop) ?
+ f->n_vectors : vm->internal_node_last_vectors_per_main_loop;
f->frame_flags &= ~(VLIB_FRAME_PENDING | VLIB_FRAME_NO_APPEND);
@@ -1915,7 +1919,6 @@
}
}
vlib_increment_main_loop_counter (vm);
-
/* Record time stamp in case there are no enabled nodes and above
calls do not update time stamp. */
cpu_time_now = clib_cpu_time_now ();
diff --git a/src/vlib/main.h b/src/vlib/main.h
index 2b50b4e..e230ddf 100644
--- a/src/vlib/main.h
+++ b/src/vlib/main.h
@@ -95,11 +95,14 @@
u32 main_loop_vectors_processed;
u32 main_loop_nodes_processed;
- /* Circular buffer of input node vector counts.
- Indexed by low bits of
- (main_loop_count >> VLIB_LOG2_INPUT_VECTORS_PER_MAIN_LOOP). */
- u32 vector_counts_per_main_loop[2];
- u32 node_counts_per_main_loop[2];
+ /* Internal node vectors, calls */
+ u64 internal_node_vectors;
+ u64 internal_node_calls;
+ u64 internal_node_vectors_last_clear;
+ u64 internal_node_calls_last_clear;
+
+ /* Instantaneous vector rate */
+ u32 internal_node_last_vectors_per_main_loop;
/* Main loop hw / sw performance counters */
void (**vlib_node_runtime_perf_counter_cbs) (struct vlib_main_t *,
@@ -323,75 +326,46 @@
vlib_panic_with_error (vm, 0);
}
-always_inline u32
-vlib_vector_input_stats_index (vlib_main_t * vm, word delta)
-{
- u32 i;
- i = vm->main_loop_count >> VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE;
- ASSERT (is_pow2 (ARRAY_LEN (vm->vector_counts_per_main_loop)));
- return (i + delta) & (ARRAY_LEN (vm->vector_counts_per_main_loop) - 1);
-}
-/* Estimate input rate based on previous
- 2^VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE
- samples. */
-always_inline u32
-vlib_last_vectors_per_main_loop (vlib_main_t * vm)
-{
- u32 i = vlib_vector_input_stats_index (vm, -1);
- u32 n = vm->vector_counts_per_main_loop[i];
- return n >> VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE;
-}
-
-/* Total ave vector count per iteration of main loop. */
always_inline f64
-vlib_last_vectors_per_main_loop_as_f64 (vlib_main_t * vm)
+vlib_internal_node_vector_rate (vlib_main_t * vm)
{
- u32 i = vlib_vector_input_stats_index (vm, -1);
- u32 v = vm->vector_counts_per_main_loop[i];
- return (f64) v / (f64) (1 << VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE);
+ u64 vectors;
+ u64 calls;
+
+ calls = vm->internal_node_calls - vm->internal_node_calls_last_clear;
+
+ if (PREDICT_FALSE (calls == 0))
+ return 0.0;
+
+ vectors = vm->internal_node_vectors - vm->internal_node_vectors_last_clear;
+
+ return (f64) vectors / (f64) calls;
}
-/* Total ave vectors/node count per iteration of main loop. */
-always_inline f64
-vlib_last_vector_length_per_node (vlib_main_t * vm)
+always_inline void
+vlib_clear_internal_node_vector_rate (vlib_main_t * vm)
{
- u32 i = vlib_vector_input_stats_index (vm, -1);
- u32 v = vm->vector_counts_per_main_loop[i];
- u32 n = vm->node_counts_per_main_loop[i];
- return n == 0 ? 0 : (f64) v / (f64) n;
+ vm->internal_node_calls_last_clear = vm->internal_node_calls;
+ vm->internal_node_vectors_last_clear = vm->internal_node_vectors;
}
-extern u32 wraps;
-
always_inline void
vlib_increment_main_loop_counter (vlib_main_t * vm)
{
- u32 i, c, n, v, is_wrap;
-
- c = vm->main_loop_count++;
-
- is_wrap = (c & pow2_mask (VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE)) == 0;
-
- if (is_wrap)
- wraps++;
-
- i = vlib_vector_input_stats_index (vm, /* delta */ is_wrap);
-
- v = is_wrap ? 0 : vm->vector_counts_per_main_loop[i];
- n = is_wrap ? 0 : vm->node_counts_per_main_loop[i];
-
- v += vm->main_loop_vectors_processed;
- n += vm->main_loop_nodes_processed;
- vm->main_loop_vectors_processed = 0;
- vm->main_loop_nodes_processed = 0;
- vm->vector_counts_per_main_loop[i] = v;
- vm->node_counts_per_main_loop[i] = n;
+ vm->main_loop_count++;
+ vm->internal_node_last_vectors_per_main_loop = 0;
if (PREDICT_FALSE (vm->main_loop_exit_now))
clib_longjmp (&vm->main_loop_exit, VLIB_MAIN_LOOP_EXIT_CLI);
}
+always_inline u32
+vlib_last_vectors_per_main_loop (vlib_main_t * vm)
+{
+ return vm->internal_node_last_vectors_per_main_loop;
+}
+
always_inline void vlib_set_queue_signal_callback
(vlib_main_t * vm, void (*fp) (vlib_main_t *))
{
diff --git a/src/vlib/node_cli.c b/src/vlib/node_cli.c
index 58b63c3..5f0617d 100644
--- a/src/vlib/node_cli.c
+++ b/src/vlib/node_cli.c
@@ -299,6 +299,13 @@
return s;
}
+f64 vlib_get_stat_segment_update_rate (void) __attribute__ ((weak));
+f64
+vlib_get_stat_segment_update_rate (void)
+{
+ return 1e70;
+}
+
static clib_error_t *
show_node_runtime (vlib_main_t * vm,
unformat_input_t * input, vlib_cli_command_t * cmd)
@@ -308,8 +315,7 @@
f64 time_now;
u32 node_index;
vlib_node_t ***node_dups = 0;
- f64 *vectors_per_main_loop = 0;
- f64 *last_vector_length_per_node = 0;
+ f64 *internal_node_vector_rates = 0;
time_now = vlib_time_now (vm);
@@ -367,10 +373,8 @@
nodes = vec_dup (nm->nodes);
vec_add1 (node_dups, nodes);
- vec_add1 (vectors_per_main_loop,
- vlib_last_vectors_per_main_loop_as_f64 (stat_vm));
- vec_add1 (last_vector_length_per_node,
- vlib_last_vector_length_per_node (stat_vm));
+ vec_add1 (internal_node_vector_rates,
+ vlib_internal_node_vector_rate (stat_vm));
}
vlib_worker_thread_barrier_release (vm);
@@ -434,15 +438,11 @@
dt = time_now - nm->time_last_runtime_stats_clear;
vlib_cli_output
(vm,
- "Time %.1f, average vectors/node %.2f, last %d main loops %.2f per node %.2f"
- "\n vector rates in %.4e, out %.4e, drop %.4e, punt %.4e",
+ "Time %.1f, %f sec internal node vector rate %.2f \n"
+ " vector rates in %.4e, out %.4e, drop %.4e, punt %.4e",
dt,
- (n_internal_calls > 0
- ? (f64) n_internal_vectors / (f64) n_internal_calls
- : 0),
- 1 << VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE,
- vectors_per_main_loop[j],
- last_vector_length_per_node[j],
+ vlib_get_stat_segment_update_rate (),
+ internal_node_vector_rates[j],
(f64) n_input / dt,
(f64) n_output / dt, (f64) n_drop / dt, (f64) n_punt / dt);
@@ -465,8 +465,7 @@
}
vec_free (stat_vms);
vec_free (node_dups);
- vec_free (vectors_per_main_loop);
- vec_free (last_vector_length_per_node);
+ vec_free (internal_node_vector_rates);
}
return 0;
diff --git a/src/vlib/threads.c b/src/vlib/threads.c
index 7454d5a..07e1d79 100644
--- a/src/vlib/threads.c
+++ b/src/vlib/threads.c
@@ -1429,7 +1429,7 @@
for (i = 1; i < vec_len (vlib_mains); i++)
max_vector_rate =
clib_max (max_vector_rate,
- vlib_last_vectors_per_main_loop_as_f64 (vlib_mains[i]));
+ (f64) vlib_last_vectors_per_main_loop (vlib_mains[i]));
vlib_worker_threads[0].barrier_sync_count++;