perfmon: refactor perf metric support

Refactoring perf metric support to remove branching on bundle type in
the dispatch wrapper. This change includes caching the rdpmc index at
perfmon_start(), so that the mmap_page.index doesn't need to be looked
up each time. It also exclude the effects of mmap_page.index.

This patch prepares the path for bundles that support general, fixed and
metrics counters simulataneously.

Type: refactor

Signed-off-by: Ray Kinsella <mdr@ashroe.eu>
Change-Id: I9c5b4917bd02fea960e546e8558452c4362eabc4
diff --git a/src/plugins/perfmon/dispatch_wrapper.c b/src/plugins/perfmon/dispatch_wrapper.c
index 5e53417..64df7f5 100644
--- a/src/plugins/perfmon/dispatch_wrapper.c
+++ b/src/plugins/perfmon/dispatch_wrapper.c
@@ -25,77 +25,41 @@
 
 #include <perfmon/perfmon.h>
 
-static_always_inline u64
-perfmon_mmap_read_pmc1 (const struct perf_event_mmap_page *mmap_page)
-{
-  u64 count;
-  u32 seq;
-
-  /* See documentation in /usr/include/linux/perf_event.h, for more details
-   * but the 2 main important things are:
-   *  1) if seq != mmap_page->lock, it means the kernel is currently updating
-   *     the user page and we need to read it again
-   *  2) if idx == 0, it means the perf event is currently turned off and we
-   *     just need to read the kernel-updated 'offset', otherwise we must also
-   *     add the current hw value (hence rdmpc) */
-  do
-    {
-      u32 idx;
-
-      seq = mmap_page->lock;
-      CLIB_COMPILER_BARRIER ();
-
-      idx = mmap_page->index;
-      count = mmap_page->offset;
-      if (idx)
-	count += _rdpmc (idx - 1);
-
-      CLIB_COMPILER_BARRIER ();
-    }
-  while (mmap_page->lock != seq);
-
-  return count;
-}
-
 static_always_inline void
-perfmon_mmap_read_pmcs (u64 *counters,
-			struct perf_event_mmap_page **mmap_pages,
-			u8 n_counters)
+perfmon_read_pmcs (u64 *counters, u32 *indexes, u8 n_counters)
 {
   switch (n_counters)
     {
-    default:
     case 12:
-      counters[11] = perfmon_mmap_read_pmc1 (mmap_pages[11]);
+      counters[11] = _rdpmc (indexes[11] - 1);
     case 11:
-      counters[10] = perfmon_mmap_read_pmc1 (mmap_pages[10]);
+      counters[10] = _rdpmc (indexes[10] - 1);
     case 10:
-      counters[9] = perfmon_mmap_read_pmc1 (mmap_pages[9]);
+      counters[9] = _rdpmc (indexes[9] - 1);
     case 9:
-      counters[8] = perfmon_mmap_read_pmc1 (mmap_pages[8]);
+      counters[8] = _rdpmc (indexes[8] - 1);
     case 8:
-      counters[7] = perfmon_mmap_read_pmc1 (mmap_pages[7]);
+      counters[7] = _rdpmc (indexes[7] - 1);
     case 7:
-      counters[6] = perfmon_mmap_read_pmc1 (mmap_pages[6]);
+      counters[6] = _rdpmc (indexes[6] - 1);
     case 6:
-      counters[5] = perfmon_mmap_read_pmc1 (mmap_pages[5]);
+      counters[5] = _rdpmc (indexes[5] - 1);
     case 5:
-      counters[4] = perfmon_mmap_read_pmc1 (mmap_pages[4]);
+      counters[4] = _rdpmc (indexes[4] - 1);
     case 4:
-      counters[3] = perfmon_mmap_read_pmc1 (mmap_pages[3]);
+      counters[3] = _rdpmc (indexes[3] - 1);
     case 3:
-      counters[2] = perfmon_mmap_read_pmc1 (mmap_pages[2]);
+      counters[2] = _rdpmc (indexes[2] - 1);
     case 2:
-      counters[1] = perfmon_mmap_read_pmc1 (mmap_pages[1]);
+      counters[1] = _rdpmc (indexes[1] - 1);
     case 1:
-      counters[0] = perfmon_mmap_read_pmc1 (mmap_pages[0]);
-      break;
+      counters[0] = _rdpmc (indexes[0] - 1);
     }
 }
 
 uword
-perfmon_dispatch_wrapper_mmap (vlib_main_t *vm, vlib_node_runtime_t *node,
-			       vlib_frame_t *frame)
+perfmon_dispatch_wrapper (vlib_main_t *vm, vlib_node_runtime_t *node,
+			  vlib_frame_t *frame)
 {
   perfmon_main_t *pm = &perfmon_main;
   perfmon_thread_runtime_t *rt =
@@ -105,15 +69,17 @@
 
   u8 n_events = rt->n_events;
 
-  u64 before[PERF_MAX_EVENTS];
-  u64 after[PERF_MAX_EVENTS];
+  struct
+  {
+    u64 t[2][PERF_MAX_EVENTS];
+  } samples;
   uword rv;
 
   clib_prefetch_load (s);
 
-  perfmon_mmap_read_pmcs (&before[0], rt->mmap_pages, n_events);
+  perfmon_read_pmcs (&samples.t[0][0], &rt->indexes[0], n_events);
   rv = node->function (vm, node, frame);
-  perfmon_mmap_read_pmcs (&after[0], rt->mmap_pages, n_events);
+  perfmon_read_pmcs (&samples.t[1][0], &rt->indexes[0], n_events);
 
   if (rv == 0)
     return rv;
@@ -122,110 +88,17 @@
   s->n_packets += rv;
 
   for (int i = 0; i < n_events; i++)
-    s->value[i] += after[i] - before[i];
-
-  return rv;
-}
-
-static_always_inline void
-perfmon_metric_read_pmcs (u64 *counters, int *pmc_index, u8 n_counters)
-{
-  switch (n_counters)
     {
-    default:
-    case 12:
-      counters[11] = _rdpmc (pmc_index[11]);
-    case 11:
-      counters[10] = _rdpmc (pmc_index[10]);
-    case 10:
-      counters[9] = _rdpmc (pmc_index[9]);
-    case 9:
-      counters[8] = _rdpmc (pmc_index[8]);
-    case 8:
-      counters[7] = _rdpmc (pmc_index[7]);
-    case 7:
-      counters[6] = _rdpmc (pmc_index[6]);
-    case 6:
-      counters[5] = _rdpmc (pmc_index[5]);
-    case 5:
-      counters[4] = _rdpmc (pmc_index[4]);
-    case 4:
-      counters[3] = _rdpmc (pmc_index[3]);
-    case 3:
-      counters[2] = _rdpmc (pmc_index[2]);
-    case 2:
-      counters[1] = _rdpmc (pmc_index[1]);
-    case 1:
-      counters[0] = _rdpmc (pmc_index[0]);
-      break;
+      if (!(rt->preserve_samples & 1 << i))
+	{
+	  s->value[i] += samples.t[1][i] - samples.t[0][i];
+	}
+      else
+	{
+	  s->t[0].value[i] = samples.t[0][i];
+	  s->t[1].value[i] = samples.t[1][i];
+	}
     }
-}
-
-static_always_inline int
-perfmon_metric_index (perfmon_bundle_t *b, u8 i)
-{
-  return (int) (b->metrics[i]);
-}
-
-uword
-perfmon_dispatch_wrapper_metrics (vlib_main_t *vm, vlib_node_runtime_t *node,
-				  vlib_frame_t *frame)
-{
-  perfmon_main_t *pm = &perfmon_main;
-  perfmon_thread_runtime_t *rt =
-    vec_elt_at_index (pm->thread_runtimes, vm->thread_index);
-  perfmon_node_stats_t *s =
-    vec_elt_at_index (rt->node_stats, node->node_index);
-
-  u8 n_events = rt->n_events;
-
-  u64 before[PERF_MAX_EVENTS];
-  int pmc_index[PERF_MAX_EVENTS];
-  uword rv;
-
-  clib_prefetch_load (s);
-
-  switch (n_events)
-    {
-    default:
-    case 12:
-      pmc_index[11] = perfmon_metric_index (rt->bundle, 11);
-    case 11:
-      pmc_index[10] = perfmon_metric_index (rt->bundle, 10);
-    case 10:
-      pmc_index[9] = perfmon_metric_index (rt->bundle, 9);
-    case 9:
-      pmc_index[8] = perfmon_metric_index (rt->bundle, 8);
-    case 8:
-      pmc_index[7] = perfmon_metric_index (rt->bundle, 7);
-    case 7:
-      pmc_index[6] = perfmon_metric_index (rt->bundle, 6);
-    case 6:
-      pmc_index[5] = perfmon_metric_index (rt->bundle, 5);
-    case 5:
-      pmc_index[4] = perfmon_metric_index (rt->bundle, 4);
-    case 4:
-      pmc_index[3] = perfmon_metric_index (rt->bundle, 3);
-    case 3:
-      pmc_index[2] = perfmon_metric_index (rt->bundle, 2);
-    case 2:
-      pmc_index[1] = perfmon_metric_index (rt->bundle, 1);
-    case 1:
-      pmc_index[0] = perfmon_metric_index (rt->bundle, 0);
-      break;
-    }
-
-  perfmon_metric_read_pmcs (&before[0], pmc_index, n_events);
-  rv = node->function (vm, node, frame);
-
-  clib_memcpy_fast (&s->t[0].value[0], &before, sizeof (before));
-  perfmon_metric_read_pmcs (&s->t[1].value[0], pmc_index, n_events);
-
-  if (rv == 0)
-    return rv;
-
-  s->n_calls += 1;
-  s->n_packets += rv;
 
   return rv;
 }
diff --git a/src/plugins/perfmon/intel/bundle/topdown_metrics.c b/src/plugins/perfmon/intel/bundle/topdown_metrics.c
index af1c108..a28c4e7 100644
--- a/src/plugins/perfmon/intel/bundle/topdown_metrics.c
+++ b/src/plugins/perfmon/intel/bundle/topdown_metrics.c
@@ -122,16 +122,13 @@
   .name = "topdown-level1",
   .description = "Top-down Microarchitecture Analysis Level 1",
   .source = "intel-core",
-  .offset_type = PERFMON_OFFSET_TYPE_METRICS,
   .events[0] = INTEL_CORE_E_TOPDOWN_SLOTS,
   .events[1] = INTEL_CORE_E_TOPDOWN_L1_RETIRING_METRIC,
   .events[2] = INTEL_CORE_E_TOPDOWN_L1_BAD_SPEC_METRIC,
   .events[3] = INTEL_CORE_E_TOPDOWN_L1_FE_BOUND_METRIC,
   .events[4] = INTEL_CORE_E_TOPDOWN_L1_BE_BOUND_METRIC,
   .n_events = 5,
-  .metrics[0] = RDPMC_SLOTS | FIXED_COUNTER_SLOTS,
-  .metrics[1] = RDPMC_METRICS | METRIC_COUNTER_TOPDOWN_L1_L2,
-  .n_metrics = 2,
+  .preserve_samples = 0x1F,
   .cpu_supports = topdown_lvl1_cpu_supports,
   .n_cpu_supports = ARRAY_LEN (topdown_lvl1_cpu_supports),
   .format_fn = format_topdown_lvl1,
@@ -251,7 +248,6 @@
   .name = "topdown-level2",
   .description = "Top-down Microarchitecture Analysis Level 2",
   .source = "intel-core",
-  .offset_type = PERFMON_OFFSET_TYPE_METRICS,
   .events[0] = INTEL_CORE_E_TOPDOWN_SLOTS,
   .events[1] = INTEL_CORE_E_TOPDOWN_L1_RETIRING_METRIC,
   .events[2] = INTEL_CORE_E_TOPDOWN_L1_BAD_SPEC_METRIC,
@@ -262,9 +258,7 @@
   .events[7] = INTEL_CORE_E_TOPDOWN_L2_FETCHLAT_METRIC,
   .events[8] = INTEL_CORE_E_TOPDOWN_L2_MEMBOUND_METRIC,
   .n_events = 9,
-  .metrics[0] = RDPMC_SLOTS | FIXED_COUNTER_SLOTS,
-  .metrics[1] = RDPMC_METRICS | METRIC_COUNTER_TOPDOWN_L1_L2,
-  .n_metrics = 2,
+  .preserve_samples = 0x1FF,
   .cpu_supports = topdown_lvl2_cpu_supports,
   .n_cpu_supports = ARRAY_LEN (topdown_lvl2_cpu_supports),
   .format_fn = format_topdown_lvl2,
diff --git a/src/plugins/perfmon/perfmon.c b/src/plugins/perfmon/perfmon.c
index 799659d..0917892 100644
--- a/src/plugins/perfmon/perfmon.c
+++ b/src/plugins/perfmon/perfmon.c
@@ -196,6 +196,7 @@
 	  rt->bundle = b;
 	  rt->n_events = b->n_events;
 	  rt->n_nodes = n_nodes;
+	  rt->preserve_samples = b->preserve_samples;
 	  vec_validate_aligned (rt->node_stats, n_nodes - 1,
 				CLIB_CACHE_LINE_BYTES);
 	}
@@ -212,6 +213,33 @@
   return err;
 }
 
+static_always_inline u32
+perfmon_mmap_read_index (const struct perf_event_mmap_page *mmap_page)
+{
+  u32 idx;
+  u32 seq;
+
+  /* See documentation in /usr/include/linux/perf_event.h, for more details
+   * but the 2 main important things are:
+   *  1) if seq != mmap_page->lock, it means the kernel is currently updating
+   *     the user page and we need to read it again
+   *  2) if idx == 0, it means the perf event is currently turned off and we
+   *     just need to read the kernel-updated 'offset', otherwise we must also
+   *     add the current hw value (hence rdmpc) */
+  do
+    {
+      seq = mmap_page->lock;
+      CLIB_COMPILER_BARRIER ();
+
+      idx = mmap_page->index;
+
+      CLIB_COMPILER_BARRIER ();
+    }
+  while (mmap_page->lock != seq);
+
+  return idx;
+}
+
 clib_error_t *
 perfmon_start (vlib_main_t *vm, perfmon_bundle_t *b)
 {
@@ -238,20 +266,28 @@
     }
   if (b->active_type == PERFMON_BUNDLE_TYPE_NODE)
     {
+      for (int i = 0; i < vec_len (pm->thread_runtimes); i++)
+	{
+	  perfmon_thread_runtime_t *tr;
+	  tr = vec_elt_at_index (pm->thread_runtimes, i);
 
-      vlib_node_function_t *funcs[PERFMON_OFFSET_TYPE_MAX];
-#define _(type, pfunc) funcs[type] = pfunc;
+	  for (int j = 0; j < b->n_events; j++)
+	    {
+	      tr->indexes[j] = perfmon_mmap_read_index (tr->mmap_pages[j]);
 
-      foreach_permon_offset_type
-#undef _
-
-	ASSERT (funcs[b->offset_type]);
+	      /* if a zero index is returned generate error */
+	      if (!tr->indexes[j])
+		{
+		  perfmon_reset (vm);
+		  return clib_error_return (0, "invalid rdpmc index");
+		}
+	    }
+	}
 
       for (int i = 0; i < vlib_get_n_threads (); i++)
 	vlib_node_set_dispatch_wrapper (vlib_get_main_by_index (i),
-					funcs[b->offset_type]);
+					perfmon_dispatch_wrapper);
     }
-
   pm->sample_time = vlib_time_now (vm);
   pm->is_running = 1;
 
diff --git a/src/plugins/perfmon/perfmon.h b/src/plugins/perfmon/perfmon.h
index 0b46e52..5003e59 100644
--- a/src/plugins/perfmon/perfmon.h
+++ b/src/plugins/perfmon/perfmon.h
@@ -49,13 +49,6 @@
 
 } perfmon_bundle_type_flag_t;
 
-typedef enum
-{
-  PERFMON_OFFSET_TYPE_MMAP,
-  PERFMON_OFFSET_TYPE_METRICS,
-  PERFMON_OFFSET_TYPE_MAX,
-} perfmon_offset_type_t;
-
 typedef struct
 {
   u32 type_from_instance : 1;
@@ -85,12 +78,7 @@
 } perfmon_instance_type_t;
 
 struct perfmon_source;
-vlib_node_function_t perfmon_dispatch_wrapper_mmap;
-vlib_node_function_t perfmon_dispatch_wrapper_metrics;
-
-#define foreach_permon_offset_type                                            \
-  _ (PERFMON_OFFSET_TYPE_MMAP, perfmon_dispatch_wrapper_mmap)                 \
-  _ (PERFMON_OFFSET_TYPE_METRICS, perfmon_dispatch_wrapper_metrics)
+vlib_node_function_t perfmon_dispatch_wrapper;
 
 typedef clib_error_t *(perfmon_source_init_fn_t) (vlib_main_t *vm,
 						  struct perfmon_source *);
@@ -131,12 +119,10 @@
   };
   perfmon_bundle_type_t active_type;
 
-  perfmon_offset_type_t offset_type;
   u32 events[PERF_MAX_EVENTS];
   u32 n_events;
 
-  u32 metrics[PERF_MAX_EVENTS];
-  u32 n_metrics;
+  u16 preserve_samples;
 
   perfmon_cpu_supports_t *cpu_supports;
   u32 n_cpu_supports;
@@ -180,6 +166,8 @@
   u16 n_nodes;
   perfmon_node_stats_t *node_stats;
   perfmon_bundle_t *bundle;
+  u32 indexes[PERF_MAX_EVENTS];
+  u16 preserve_samples;
   struct perf_event_mmap_page *mmap_pages[PERF_MAX_EVENTS];
 } perfmon_thread_runtime_t;