perfmon: top down level 1 support

Adding perfmon node TMAM support on ICX.

Type: improvement

Signed-off-by: Ray Kinsella <mdr@ashroe.eu>
Change-Id: I48a9a9ff6a72efc28eaf0cb11ef39fb62cebb126
diff --git a/src/plugins/perfmon/CMakeLists.txt b/src/plugins/perfmon/CMakeLists.txt
index c6113aa..af0bd3c 100644
--- a/src/plugins/perfmon/CMakeLists.txt
+++ b/src/plugins/perfmon/CMakeLists.txt
@@ -30,4 +30,5 @@
   intel/bundle/cache_hit_miss.c
   intel/bundle/branch_mispred.c
   intel/bundle/power_license.c
+  intel/bundle/topdown_metrics.c
 )
diff --git a/src/plugins/perfmon/cli.c b/src/plugins/perfmon/cli.c
index 39e0319..f667877 100644
--- a/src/plugins/perfmon/cli.c
+++ b/src/plugins/perfmon/cli.c
@@ -128,7 +128,8 @@
     vlib_cli_output (vm, "%U\n", format_perfmon_bundle, 0, 0);
 
   for (int i = 0; i < vec_len (vb); i++)
-    vlib_cli_output (vm, "%U\n", format_perfmon_bundle, vb[i], verbose);
+    if (!vb[i]->cpu_supports || vb[i]->cpu_supports ())
+      vlib_cli_output (vm, "%U\n", format_perfmon_bundle, vb[i], verbose);
 
   vec_free (vb);
   return 0;
@@ -290,7 +291,8 @@
   n_instances = vec_len (it->instances);
   vec_validate (readings, n_instances - 1);
 
-  for (int i = 0; i < n_instances; i++)
+  /*Only perform read() for THREAD or SYSTEM bundles*/
+  for (int i = 0; i < n_instances && b->type != PERFMON_BUNDLE_TYPE_NODE; i++)
     {
       in = vec_elt_at_index (it->instances, i);
       r = vec_elt_at_index (readings, i);
@@ -340,6 +342,7 @@
 		table_set_cell_align (t, col, -1, TTAA_RIGHT);
 		table_set_cell_fg_color (t, col, -1, TTAC_CYAN);
 		clib_memcpy_fast (&ns, tr->node_stats + j, sizeof (ns));
+
 		for (int j = 0; j < n_row; j++)
 		  table_format_cell (t, col, j, "%U", b->format_fn, &ns, j);
 	      }
diff --git a/src/plugins/perfmon/dispatch_wrapper.c b/src/plugins/perfmon/dispatch_wrapper.c
index 4ae9c77..fe0a449 100644
--- a/src/plugins/perfmon/dispatch_wrapper.c
+++ b/src/plugins/perfmon/dispatch_wrapper.c
@@ -13,6 +13,7 @@
  * limitations under the License.
  */
 
+#include "vppinfra/string.h"
 #include <vnet/vnet.h>
 
 #include <vlibapi/api.h>
@@ -49,24 +50,32 @@
 }
 
 static_always_inline int
-perfmon_calc_pmc_index (perfmon_thread_runtime_t *tr, u8 i)
+perfmon_calc_mmap_offset (perfmon_thread_runtime_t *tr, u8 i)
 {
   return (int) (tr->mmap_pages[i]->index + tr->mmap_pages[i]->offset);
 }
 
+static_always_inline int
+perfmon_metric_index (perfmon_bundle_t *b, u8 i)
+{
+  return (int) (b->metrics[i]);
+}
+
 uword
-perfmon_dispatch_wrapper (vlib_main_t *vm, vlib_node_runtime_t *node,
-			  vlib_frame_t *frame)
+perfmon_dispatch_wrapper_mmap (vlib_main_t *vm, vlib_node_runtime_t *node,
+			       vlib_frame_t *frame)
 {
   perfmon_main_t *pm = &perfmon_main;
   perfmon_thread_runtime_t *rt =
     vec_elt_at_index (pm->thread_runtimes, vm->thread_index);
   perfmon_node_stats_t *s =
     vec_elt_at_index (rt->node_stats, node->node_index);
+
   u8 n_events = rt->n_events;
-  int pmc_index[PERF_MAX_EVENTS];
+
   u64 before[PERF_MAX_EVENTS];
   u64 after[PERF_MAX_EVENTS];
+  int pmc_index[PERF_MAX_EVENTS];
   uword rv;
 
   clib_prefetch_load (s);
@@ -75,33 +84,87 @@
     {
     default:
     case 7:
-      pmc_index[6] = perfmon_calc_pmc_index (rt, 6);
+      pmc_index[6] = perfmon_calc_mmap_offset (rt, 6);
     case 6:
-      pmc_index[5] = perfmon_calc_pmc_index (rt, 5);
+      pmc_index[5] = perfmon_calc_mmap_offset (rt, 5);
     case 5:
-      pmc_index[4] = perfmon_calc_pmc_index (rt, 4);
+      pmc_index[4] = perfmon_calc_mmap_offset (rt, 4);
     case 4:
-      pmc_index[3] = perfmon_calc_pmc_index (rt, 3);
+      pmc_index[3] = perfmon_calc_mmap_offset (rt, 3);
     case 3:
-      pmc_index[2] = perfmon_calc_pmc_index (rt, 2);
+      pmc_index[2] = perfmon_calc_mmap_offset (rt, 2);
     case 2:
-      pmc_index[1] = perfmon_calc_pmc_index (rt, 1);
+      pmc_index[1] = perfmon_calc_mmap_offset (rt, 1);
     case 1:
-      pmc_index[0] = perfmon_calc_pmc_index (rt, 0);
+      pmc_index[0] = perfmon_calc_mmap_offset (rt, 0);
       break;
     }
 
-  perfmon_read_pmcs (before, pmc_index, n_events);
+  perfmon_read_pmcs (&before[0], pmc_index, n_events);
   rv = node->function (vm, node, frame);
-  perfmon_read_pmcs (after, pmc_index, n_events);
+  perfmon_read_pmcs (&after[0], pmc_index, n_events);
 
   if (rv == 0)
     return rv;
 
   s->n_calls += 1;
   s->n_packets += rv;
+
   for (int i = 0; i < n_events; i++)
     s->value[i] += after[i] - before[i];
 
   return rv;
 }
+
+uword
+perfmon_dispatch_wrapper_metrics (vlib_main_t *vm, vlib_node_runtime_t *node,
+				  vlib_frame_t *frame)
+{
+  perfmon_main_t *pm = &perfmon_main;
+  perfmon_thread_runtime_t *rt =
+    vec_elt_at_index (pm->thread_runtimes, vm->thread_index);
+  perfmon_node_stats_t *s =
+    vec_elt_at_index (rt->node_stats, node->node_index);
+
+  u8 n_events = rt->n_events;
+
+  u64 before[PERF_MAX_EVENTS];
+  int pmc_index[PERF_MAX_EVENTS];
+  uword rv;
+
+  clib_prefetch_load (s);
+
+  switch (n_events)
+    {
+    default:
+    case 7:
+      pmc_index[6] = perfmon_metric_index (rt->bundle, 6);
+    case 6:
+      pmc_index[5] = perfmon_metric_index (rt->bundle, 5);
+    case 5:
+      pmc_index[4] = perfmon_metric_index (rt->bundle, 4);
+    case 4:
+      pmc_index[3] = perfmon_metric_index (rt->bundle, 3);
+    case 3:
+      pmc_index[2] = perfmon_metric_index (rt->bundle, 2);
+    case 2:
+      pmc_index[1] = perfmon_metric_index (rt->bundle, 1);
+    case 1:
+      pmc_index[0] = perfmon_metric_index (rt->bundle, 0);
+      break;
+    }
+
+  perfmon_read_pmcs (&before[0], pmc_index, n_events);
+  rv = node->function (vm, node, frame);
+
+  clib_memcpy_fast (&s->t[0].value[0], &before, sizeof (before));
+  perfmon_read_pmcs (&s->t[1].value[0], pmc_index, n_events);
+
+  if (rv == 0)
+    return rv;
+
+  s->n_calls += 1;
+  s->n_packets += rv;
+
+  return rv;
+}
diff --git a/src/plugins/perfmon/intel/bundle/topdown_metrics.c b/src/plugins/perfmon/intel/bundle/topdown_metrics.c
new file mode 100644
index 0000000..386f384
--- /dev/null
+++ b/src/plugins/perfmon/intel/bundle/topdown_metrics.c
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2021 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <perfmon/perfmon.h>
+#include <perfmon/intel/core.h>
+
+#define GET_METRIC(m, i)  (((m) >> (i * 8)) & 0xff)
+#define GET_RATIO(m, i)	  (((m) >> (i * 32)) & 0xffffffff)
+#define RDPMC_FIXED_SLOTS (1 << 30) /* fixed slots */
+#define RDPMC_L1_METRICS  (1 << 29) /* l1 metric counters */
+
+#define FIXED_COUNTER_SLOTS	  3
+#define METRIC_COUNTER_TOPDOWN_L1 0
+
+typedef enum
+{
+  TOPDOWN_E_METRIC_RETIRING = 0,
+  TOPDOWN_E_METRIC_BAD_SPEC,
+  TOPDOWN_E_METRIC_FE_BOUND,
+  TOPDOWN_E_METRIC_BE_BOUND,
+} topdown_lvl1_counters_t;
+
+enum
+{
+  TOPDOWN_SLOTS = 0,
+  TOPDOWN_METRICS,
+} topdown_lvl1_metrics_t;
+
+static_always_inline f32
+topdown_lvl1_parse_row (perfmon_node_stats_t *ns, topdown_lvl1_counters_t e)
+{
+  f64 slots_t0 =
+    ns->t[0].value[TOPDOWN_SLOTS] *
+    ((f64) GET_METRIC (ns->t[0].value[TOPDOWN_METRICS], e) / 0xff);
+  f64 slots_t1 =
+    ns->t[1].value[TOPDOWN_SLOTS] *
+    ((f64) GET_METRIC (ns->t[1].value[TOPDOWN_METRICS], e) / 0xff);
+  u64 slots_delta =
+    ns->t[1].value[TOPDOWN_SLOTS] - ns->t[0].value[TOPDOWN_SLOTS];
+
+  slots_t1 = slots_t1 - slots_t0;
+
+  return (slots_t1 / slots_delta) * 100;
+}
+
+static u8 *
+format_topdown_lvl1 (u8 *s, va_list *args)
+{
+  perfmon_node_stats_t *st = va_arg (*args, perfmon_node_stats_t *);
+  u64 row = va_arg (*args, int);
+
+  switch (row)
+    {
+    case 0:
+      s = format (s, "%f",
+		  topdown_lvl1_parse_row (st, TOPDOWN_E_METRIC_BAD_SPEC) +
+		    topdown_lvl1_parse_row (st, TOPDOWN_E_METRIC_RETIRING));
+      break;
+    case 1:
+      s = format (s, "%f",
+		  topdown_lvl1_parse_row (st, TOPDOWN_E_METRIC_BE_BOUND) +
+		    topdown_lvl1_parse_row (st, TOPDOWN_E_METRIC_FE_BOUND));
+      break;
+    case 2:
+      s = format (s, "%f",
+		  topdown_lvl1_parse_row (st, TOPDOWN_E_METRIC_RETIRING));
+      break;
+    case 3:
+      s = format (s, "%f",
+		  topdown_lvl1_parse_row (st, TOPDOWN_E_METRIC_BAD_SPEC));
+      break;
+    case 4:
+      s = format (s, "%f",
+		  topdown_lvl1_parse_row (st, TOPDOWN_E_METRIC_FE_BOUND));
+      break;
+    case 5:
+      s = format (s, "%f",
+		  topdown_lvl1_parse_row (st, TOPDOWN_E_METRIC_BE_BOUND));
+      break;
+    }
+  return s;
+}
+
+PERFMON_REGISTER_BUNDLE (topdown_lvl1) = {
+  .name = "topdown-level1",
+  .description = "Top-down Microarchitecture Analysis Level 1",
+  .source = "intel-core",
+  .type = PERFMON_BUNDLE_TYPE_NODE,
+  .offset_type = PERFMON_OFFSET_TYPE_METRICS,
+  .events[0] = INTEL_CORE_E_TOPDOWN_SLOTS,
+  .events[1] = INTEL_CORE_E_TOPDOWN_L1_METRICS,
+  .metrics[0] = RDPMC_FIXED_SLOTS | FIXED_COUNTER_SLOTS,
+  .metrics[1] = RDPMC_L1_METRICS | METRIC_COUNTER_TOPDOWN_L1,
+  .n_events = 2,
+  .cpu_supports = clib_cpu_supports_avx512_bitalg,
+  .format_fn = format_topdown_lvl1,
+  .column_headers = PERFMON_STRINGS ("% NS", "% ST", "% NS.RT", "% NS.BS",
+				     "% ST.FE", "% ST.BE"),
+  .footer = "Not Stalled (NS),STalled (ST),\n"
+	    " Retiring (RT), Bad Speculation (BS),\n"
+	    " FrontEnd bound (FE), BackEnd bound (BE)",
+};
diff --git a/src/plugins/perfmon/intel/core.c b/src/plugins/perfmon/intel/core.c
index b4267e8..cef6f32 100644
--- a/src/plugins/perfmon/intel/core.c
+++ b/src/plugins/perfmon/intel/core.c
@@ -20,12 +20,12 @@
 
 static perfmon_event_t events[] = {
 #define _(event, umask, edge, any, inv, cmask, n, suffix, desc)               \
-  [INTEL_CORE_E_##n##_##suffix] = {                                           \
-    .type = PERF_TYPE_RAW,                                                    \
-    .config = PERF_INTEL_CODE (event, umask, edge, any, inv, cmask),          \
-    .name = #n "." #suffix,                                                   \
-    .description = desc,                                                      \
-  },
+  [INTEL_CORE_E_##n##_##suffix] = { .type = PERF_TYPE_RAW,                    \
+				    .config = PERF_INTEL_CODE (               \
+				      event, umask, edge, any, inv, cmask),   \
+				    .name = #n "." #suffix,                   \
+				    .description = desc,                      \
+				    .exclude_kernel = 1 },
 
   foreach_perf_intel_core_event
 #undef _
diff --git a/src/plugins/perfmon/intel/core.h b/src/plugins/perfmon/intel/core.h
index 01945d2..cd5c31b 100644
--- a/src/plugins/perfmon/intel/core.h
+++ b/src/plugins/perfmon/intel/core.h
@@ -27,6 +27,10 @@
      "Core cycles when the thread is not in halt state")                      \
   _ (0x00, 0x03, 0, 0, 0, 0x00, CPU_CLK_UNHALTED, REF_TSC,                    \
      "Reference cycles when the core is not in halt state.")                  \
+  _ (0x00, 0x04, 0, 0, 0, 0x00, TOPDOWN, SLOTS,                               \
+     "TMA slots available for an unhalted logical processor.")                \
+  _ (0x00, 0x80, 0, 0, 0, 0x00, TOPDOWN, L1_METRICS,                          \
+     "TMA slots metrics for an unhalted logical processor.")                  \
   _ (0x03, 0x02, 0, 0, 0, 0x00, LD_BLOCKS, STORE_FORWARD,                     \
      "Loads blocked due to overlapping with a preceding store that cannot be" \
      " forwarded.")                                                           \
diff --git a/src/plugins/perfmon/perfmon.c b/src/plugins/perfmon/perfmon.c
index f9402f8..46c8cf9 100644
--- a/src/plugins/perfmon/perfmon.c
+++ b/src/plugins/perfmon/perfmon.c
@@ -193,6 +193,7 @@
 	{
 	  perfmon_thread_runtime_t *rt;
 	  rt = vec_elt_at_index (pm->thread_runtimes, i);
+	  rt->bundle = b;
 	  rt->n_events = b->n_events;
 	  rt->n_nodes = n_nodes;
 	  vec_validate_aligned (rt->node_stats, n_nodes - 1,
@@ -235,11 +236,20 @@
 	  return clib_error_return_unix (0, "ioctl(PERF_EVENT_IOC_ENABLE)");
 	}
     }
-  if (pm->active_bundle->type == PERFMON_BUNDLE_TYPE_NODE)
+  if (b->type == PERFMON_BUNDLE_TYPE_NODE)
     {
+
+      vlib_node_function_t *funcs[PERFMON_OFFSET_TYPE_MAX];
+#define _(type, pfunc) funcs[type] = pfunc;
+
+      foreach_permon_offset_type
+#undef _
+
+	ASSERT (funcs[b->offset_type]);
+
       for (int i = 0; i < vlib_get_n_threads (); i++)
 	vlib_node_set_dispatch_wrapper (vlib_get_main_by_index (i),
-					perfmon_dispatch_wrapper);
+					funcs[b->offset_type]);
     }
 
   pm->sample_time = vlib_time_now (vm);
diff --git a/src/plugins/perfmon/perfmon.h b/src/plugins/perfmon/perfmon.h
index f24a23b..3d0bc06 100644
--- a/src/plugins/perfmon/perfmon.h
+++ b/src/plugins/perfmon/perfmon.h
@@ -20,6 +20,7 @@
 #include <vppinfra/clib.h>
 #include <vppinfra/format.h>
 #include <vppinfra/error.h>
+#include <vppinfra/cpu.h>
 #include <vlib/vlib.h>
 
 #define PERF_MAX_EVENTS 7 /* 3 fixed and 4 programmable */
@@ -32,6 +33,13 @@
   PERFMON_BUNDLE_TYPE_SYSTEM,
 } perfmon_bundle_type_t;
 
+typedef enum
+{
+  PERFMON_OFFSET_TYPE_MMAP,
+  PERFMON_OFFSET_TYPE_METRICS,
+  PERFMON_OFFSET_TYPE_MAX,
+} perfmon_offset_type_t;
+
 typedef struct
 {
   u32 type_from_instance : 1;
@@ -61,7 +69,12 @@
 } perfmon_instance_type_t;
 
 struct perfmon_source;
-vlib_node_function_t perfmon_dispatch_wrapper;
+vlib_node_function_t perfmon_dispatch_wrapper_mmap;
+vlib_node_function_t perfmon_dispatch_wrapper_metrics;
+
+#define foreach_permon_offset_type                                            \
+  _ (PERFMON_OFFSET_TYPE_MMAP, perfmon_dispatch_wrapper_mmap)                 \
+  _ (PERFMON_OFFSET_TYPE_METRICS, perfmon_dispatch_wrapper_metrics)
 
 typedef clib_error_t *(perfmon_source_init_fn_t) (vlib_main_t *vm,
 						  struct perfmon_source *);
@@ -78,8 +91,10 @@
 } perfmon_source_t;
 
 struct perfmon_bundle;
+
 typedef clib_error_t *(perfmon_bundle_init_fn_t) (vlib_main_t *vm,
 						  struct perfmon_bundle *);
+
 typedef struct perfmon_bundle
 {
   char *name;
@@ -87,7 +102,9 @@
   char *source;
   char *footer;
   perfmon_bundle_type_t type;
+  perfmon_offset_type_t offset_type;
   u32 events[PERF_MAX_EVENTS];
+  u32 metrics[PERF_MAX_EVENTS];
   u32 n_events;
 
   perfmon_bundle_init_fn_t *init_fn;
@@ -95,6 +112,7 @@
   char **column_headers;
   char **raw_column_headers;
   format_function_t *format_fn;
+  clib_cpu_supports_func_t cpu_supports;
 
   /* do not set manually */
   perfmon_source_t *src;
@@ -114,7 +132,14 @@
   CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
   u64 n_calls;
   u64 n_packets;
-  u64 value[PERF_MAX_EVENTS];
+  union
+  {
+    struct
+    {
+      u64 value[PERF_MAX_EVENTS];
+    } t[2];
+    u64 value[PERF_MAX_EVENTS * 2];
+  };
 } perfmon_node_stats_t;
 
 typedef struct
@@ -122,6 +147,7 @@
   u8 n_events;
   u16 n_nodes;
   perfmon_node_stats_t *node_stats;
+  perfmon_bundle_t *bundle;
   struct perf_event_mmap_page *mmap_pages[PERF_MAX_EVENTS];
 } perfmon_thread_runtime_t;
 
diff --git a/src/vppinfra/cpu.h b/src/vppinfra/cpu.h
index 6925d58..8d2465b 100644
--- a/src/vppinfra/cpu.h
+++ b/src/vppinfra/cpu.h
@@ -185,6 +185,7 @@
   return 1;
 }
 
+typedef int (*clib_cpu_supports_func_t) ();
 
 #define _(flag, func, reg, bit) \
 static inline int							\