acl: implement counters

implement per-acl-number counters in the stats segment.
They are created during the ACL creation,
the counters are incremented in the dataplane using
the new inline function with the extra parameter being
the packet size. Counting in shared segment adds
a noticeable overhead, so add also an API to
turn the counters on.

Type: feature

Change-Id: I8af7b0c31a3d986b68089eb52452aed45df66c7b
Signed-off-by: Andrew Yourtchenko <ayourtch@gmail.com>
diff --git a/src/plugins/acl/acl.api b/src/plugins/acl/acl.api
index cde46c4..0c230c5 100644
--- a/src/plugins/acl/acl.api
+++ b/src/plugins/acl/acl.api
@@ -547,3 +547,15 @@
   u16 whitelist[count];
 };
 
+/** \brief Enable or disable incrementing ACL counters in stats segment by interface processing
+    @param client_index - opaque cookie to identify the sender
+    @param context - sender context, to match reply w/ request
+    @param enable - whether to enable or disable incrementing the counters
+*/
+
+autoreply define acl_stats_intf_counters_enable
+{
+  u32 client_index;
+  u32 context;
+  bool enable;
+};
diff --git a/src/plugins/acl/acl.c b/src/plugins/acl/acl.c
index 9ebb349..24dd53b 100644
--- a/src/plugins/acl/acl.c
+++ b/src/plugins/acl/acl.c
@@ -86,7 +86,8 @@
 _(MACIP_ACL_INTERFACE_LIST_DUMP, macip_acl_interface_list_dump) \
 _(ACL_INTERFACE_SET_ETYPE_WHITELIST, acl_interface_set_etype_whitelist) \
 _(ACL_INTERFACE_ETYPE_WHITELIST_DUMP, acl_interface_etype_whitelist_dump) \
-_(ACL_PLUGIN_GET_CONN_TABLE_MAX_ENTRIES,acl_plugin_get_conn_table_max_entries)
+_(ACL_PLUGIN_GET_CONN_TABLE_MAX_ENTRIES,acl_plugin_get_conn_table_max_entries) \
+_(ACL_STATS_INTF_COUNTERS_ENABLE, acl_stats_intf_counters_enable)
 
 
 /* *INDENT-OFF* */
@@ -373,6 +374,33 @@
 }
 
 
+static void
+validate_and_reset_acl_counters (acl_main_t * am, u32 acl_index)
+{
+  int i;
+  /* counters are set as vectors [acl#] pointing to vectors of [acl rule] */
+  acl_plugin_counter_lock (am);
+
+  int old_len = vec_len (am->combined_acl_counters);
+
+  vec_validate (am->combined_acl_counters, acl_index);
+
+  for (i = old_len; i < vec_len (am->combined_acl_counters); i++)
+    {
+      am->combined_acl_counters[i].name = 0;
+      /* filled in once only */
+      am->combined_acl_counters[i].stat_segment_name = (void *)
+	format (0, "/acl/%d/matches%c", i, 0);
+      clib_warning ("add stats segment: %s",
+		    am->combined_acl_counters[i].stat_segment_name);
+      i32 rule_count = vec_len (am->acls[acl_index].rules);
+      /* Validate one extra so we always have at least one counter for an ACL */
+      vlib_validate_combined_counter (&am->combined_acl_counters[i],
+				      rule_count);
+      vlib_zero_combined_counter (&am->combined_acl_counters[i], rule_count);
+    }
+  acl_plugin_counter_unlock (am);
+}
 
 static int
 acl_add_list (u32 count, vl_api_acl_rule_t rules[],
@@ -465,6 +493,11 @@
       policy_notify_acl_change (am, *acl_list_index);
     }
 
+  /* stats segment expects global heap, so restore it temporarily */
+  clib_mem_set_heap (oldheap);
+  validate_and_reset_acl_counters (am, *acl_list_index);
+  oldheap = acl_set_heap (am);
+
   /* notify the lookup contexts about the ACL changes */
   acl_plugin_lookup_context_notify_acl_change (*acl_list_index);
   clib_mem_set_heap (oldheap);
@@ -662,6 +695,16 @@
 }
 
 static int
+acl_stats_intf_counters_enable_disable (acl_main_t * am, int enable_disable)
+{
+  int rv = 0;
+
+  am->interface_acl_counters_enabled = enable_disable;
+
+  return rv;
+}
+
+static int
 acl_interface_inout_enable_disable (acl_main_t * am, u32 sw_if_index,
 				    int is_input, int enable_disable)
 {
@@ -1893,6 +1936,21 @@
   REPLY_MACRO (VL_API_ACL_DEL_REPLY);
 }
 
+
+static void
+  vl_api_acl_stats_intf_counters_enable_t_handler
+  (vl_api_acl_stats_intf_counters_enable_t * mp)
+{
+  acl_main_t *am = &acl_main;
+  vl_api_acl_stats_intf_counters_enable_reply_t *rmp;
+  int rv;
+
+  rv = acl_stats_intf_counters_enable_disable (am, ntohl (mp->enable));
+
+  REPLY_MACRO (VL_API_ACL_DEL_REPLY);
+}
+
+
 static void
 vl_api_acl_interface_add_del_t_handler (vl_api_acl_interface_add_del_t * mp)
 {
@@ -3390,6 +3448,8 @@
       show_applied_info = 1;
       show_bihash = 1;
     }
+  vlib_cli_output (vm, "Stats counters enabled for interface ACLs: %d",
+		   acl_main.interface_acl_counters_enabled);
   if (show_mask_type)
     acl_plugin_show_tables_mask_type ();
   if (show_acl_hash_info)
@@ -3659,6 +3719,10 @@
     acl_plugin.register_user_module ("interface ACL", "sw_if_index",
 				     "is_input");
 
+  am->acl_counter_lock = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES,
+						 CLIB_CACHE_LINE_BYTES);
+  am->acl_counter_lock[0] = 0;	/* should be no need */
+
   return error;
 }
 
diff --git a/src/plugins/acl/acl.h b/src/plugins/acl/acl.h
index 5365091..229ee05 100644
--- a/src/plugins/acl/acl.h
+++ b/src/plugins/acl/acl.h
@@ -27,6 +27,7 @@
 #include <vppinfra/bihash_48_8.h>
 #include <vppinfra/bihash_40_8.h>
 #include <vppinfra/bihash_16_8.h>
+#include <vlib/counter.h>
 
 #include "types.h"
 #include "fa_node.h"
@@ -34,7 +35,7 @@
 #include "lookup_context.h"
 
 #define  ACL_PLUGIN_VERSION_MAJOR 1
-#define  ACL_PLUGIN_VERSION_MINOR 3
+#define  ACL_PLUGIN_VERSION_MINOR 4
 
 #define UDP_SESSION_IDLE_TIMEOUT_SEC 600
 #define TCP_SESSION_IDLE_TIMEOUT_SEC (3600*24)
@@ -314,6 +315,11 @@
   vnet_main_t * vnet_main;
   /* logging */
   vlib_log_class_t log_default;
+  /* acl counters exposed via stats segment */
+  volatile u32 *acl_counter_lock;
+  vlib_combined_counter_main_t *combined_acl_counters;
+  /* enable/disable ACL counters for interface processing */
+  u32 interface_acl_counters_enabled;
 } acl_main_t;
 
 #define acl_log_err(...) \
@@ -326,6 +332,21 @@
   vlib_log(VLIB_LOG_LEVEL_INFO, acl_main.log_default, __VA_ARGS__)
 
 
+static inline void
+acl_plugin_counter_lock (acl_main_t * am)
+{
+  if (am->acl_counter_lock)
+    while (clib_atomic_test_and_set (am->acl_counter_lock))
+      /* zzzz */ ;
+}
+
+static inline void
+acl_plugin_counter_unlock (acl_main_t * am)
+{
+  if (am->acl_counter_lock)
+    clib_atomic_release (am->acl_counter_lock);
+}
+
 
 #define foreach_acl_eh                                          \
    _(HOPBYHOP , 0  , "IPv6ExtHdrHopByHop")                      \
diff --git a/src/plugins/acl/dataplane_node.c b/src/plugins/acl/dataplane_node.c
index 0bdcc85..c738f66 100644
--- a/src/plugins/acl/dataplane_node.c
+++ b/src/plugins/acl/dataplane_node.c
@@ -565,6 +565,11 @@
   u32 *sw_if_index;
   fa_5tuple_t *fa_5tuple;
   u64 *hash;
+  /* for the delayed counters */
+  u32 saved_matched_acl_index = 0;
+  u32 saved_matched_ace_index = 0;
+  u32 saved_packet_count = 0;
+  u32 saved_byte_count = 0;
 
   from = vlib_frame_vector_args (frame);
   error_node = vlib_node_get_runtime (vm, node->node_index);
@@ -690,13 +695,34 @@
 		  am->output_lc_index_by_sw_if_index[sw_if_index[0]];
 
 	      action = 0;	/* deny by default */
-	      acl_plugin_match_5tuple_inline (am, lc_index0,
-					      (fa_5tuple_opaque_t *) &
-					      fa_5tuple[0], is_ip6, &action,
-					      &match_acl_pos,
-					      &match_acl_in_index,
-					      &match_rule_index,
-					      &trace_bitmap);
+	      int is_match = acl_plugin_match_5tuple_inline (am, lc_index0,
+							     (fa_5tuple_opaque_t *) & fa_5tuple[0], is_ip6,
+							     &action,
+							     &match_acl_pos,
+							     &match_acl_in_index,
+							     &match_rule_index,
+							     &trace_bitmap);
+	      if (PREDICT_FALSE
+		  (is_match && am->interface_acl_counters_enabled))
+		{
+		  u32 buf_len = vlib_buffer_length_in_chain (vm, b[0]);
+		  vlib_increment_combined_counter (am->combined_acl_counters +
+						   saved_matched_acl_index,
+						   thread_index,
+						   saved_matched_ace_index,
+						   saved_packet_count,
+						   saved_byte_count);
+		  saved_matched_acl_index = match_acl_in_index;
+		  saved_matched_ace_index = match_rule_index;
+		  saved_packet_count = 1;
+		  saved_byte_count = buf_len;
+		  /* prefetch the counter that we are going to increment */
+		  vlib_prefetch_combined_counter (am->combined_acl_counters +
+						  saved_matched_acl_index,
+						  thread_index,
+						  saved_matched_ace_index);
+		}
+
 	      b[0]->error = error_node->errors[action];
 
 	      if (1 == action)
@@ -778,6 +804,16 @@
 
   vlib_buffer_enqueue_to_next (vm, node, from, pw->nexts, frame->n_vectors);
 
+  /*
+   * if we were had an acl match then we have a counter to increment.
+   * else it is all zeroes, so this will be harmless.
+   */
+  vlib_increment_combined_counter (am->combined_acl_counters +
+				   saved_matched_acl_index,
+				   thread_index,
+				   saved_matched_ace_index,
+				   saved_packet_count, saved_byte_count);
+
   vlib_node_increment_counter (vm, node->node_index,
 			       ACL_FA_ERROR_ACL_CHECK, frame->n_vectors);
   vlib_node_increment_counter (vm, node->node_index,
diff --git a/src/plugins/acl/public_inlines.h b/src/plugins/acl/public_inlines.h
index 03b6401..6b69bce 100644
--- a/src/plugins/acl/public_inlines.h
+++ b/src/plugins/acl/public_inlines.h
@@ -682,5 +682,46 @@
 }
 
 
+always_inline int
+acl_plugin_match_5tuple_inline_and_count (void *p_acl_main, u32 lc_index,
+                                           fa_5tuple_opaque_t * pkt_5tuple,
+                                           int is_ip6, u8 * r_action,
+                                           u32 * r_acl_pos_p,
+                                           u32 * r_acl_match_p,
+                                           u32 * r_rule_match_p,
+                                           u32 * trace_bitmap,
+					   u32 packet_size)
+{
+  acl_main_t *am = p_acl_main;
+  int ret = 0;
+  fa_5tuple_t * pkt_5tuple_internal = (fa_5tuple_t *)pkt_5tuple;
+  pkt_5tuple_internal->pkt.lc_index = lc_index;
+  if (PREDICT_TRUE(am->use_hash_acl_matching)) {
+    if (PREDICT_FALSE(pkt_5tuple_internal->pkt.is_nonfirst_fragment)) {
+      /*
+       * tuplemerge does not take fragments into account,
+       * and in general making fragments first class citizens has
+       * proved more overhead than it's worth - so just fall back to linear
+       * matching in that case.
+       */
+      ret = linear_multi_acl_match_5tuple(p_acl_main, lc_index, pkt_5tuple_internal, is_ip6, r_action,
+                                 r_acl_pos_p, r_acl_match_p, r_rule_match_p, trace_bitmap);
+    } else {
+      ret = hash_multi_acl_match_5tuple(p_acl_main, lc_index, pkt_5tuple_internal, is_ip6, r_action,
+                                 r_acl_pos_p, r_acl_match_p, r_rule_match_p, trace_bitmap);
+    }
+  } else {
+    ret = linear_multi_acl_match_5tuple(p_acl_main, lc_index, pkt_5tuple_internal, is_ip6, r_action,
+                                 r_acl_pos_p, r_acl_match_p, r_rule_match_p, trace_bitmap);
+  }
+  if (PREDICT_TRUE(ret)) {
+	  u16 thread_index = os_get_thread_index ();
+	  vlib_increment_combined_counter(am->combined_acl_counters + *r_acl_match_p, thread_index, *r_rule_match_p, 1, packet_size);
+  }
+  return ret;
+}
+
+
+
 
 #endif
diff --git a/test/test_acl_plugin.py b/test/test_acl_plugin.py
index 1ca74d1..eca0231 100644
--- a/test/test_acl_plugin.py
+++ b/test/test_acl_plugin.py
@@ -253,7 +253,7 @@
             self.vapi.acl_interface_set_acl_list(sw_if_index=i.sw_if_index,
                                                  n_input=1,
                                                  acls=[reply.acl_index])
-        return
+        return reply.acl_index
 
     def apply_rules_to(self, rules, tag=b'', sw_if_index=0xFFFFFFFF):
         reply = self.vapi.acl_add_replace(acl_index=4294967295, r=rules,
@@ -264,7 +264,7 @@
         self.vapi.acl_interface_set_acl_list(sw_if_index=sw_if_index,
                                              n_input=1,
                                              acls=[reply.acl_index])
-        return
+        return reply.acl_index
 
     def etype_whitelist(self, whitelist, n_input):
         # Apply whitelists on all the interfaces
@@ -647,10 +647,27 @@
                      0, self.proto[self.IP][self.TCP]))
 
         # Apply rules
-        self.apply_rules(rules, b"permit per-flow")
+        acl_idx = self.apply_rules(rules, b"permit per-flow")
+
+        # enable counters
+        reply = self.vapi.papi.acl_stats_intf_counters_enable(enable=1)
 
         # Traffic should still pass
         self.run_verify_test(self.IP, self.IPV4, -1)
+
+        matches = self.statistics.get_counter('/acl/%d/matches' % acl_idx)
+        self.logger.info("stat segment counters: %s" % repr(matches))
+        cli = "show acl-plugin acl"
+        self.logger.info(self.vapi.ppcli(cli))
+        cli = "show acl-plugin tables"
+        self.logger.info(self.vapi.ppcli(cli))
+
+        total_hits = matches[0][0]['packets'] + matches[0][1]['packets']
+        self.assertEqual(total_hits, 64)
+
+        # disable counters
+        reply = self.vapi.papi.acl_stats_intf_counters_enable(enable=0)
+
         self.logger.info("ACLP_TEST_FINISH_0002")
 
     def test_0003_acl_deny_apply(self):
@@ -666,13 +683,26 @@
                                       self.PORTS_ALL, 0))
 
         # Apply rules
-        self.apply_rules(rules, b"deny per-flow;permit all")
+        acl_idx = self.apply_rules(rules, b"deny per-flow;permit all")
+
+        # enable counters
+        reply = self.vapi.papi.acl_stats_intf_counters_enable(enable=1)
 
         # Traffic should not pass
         self.run_verify_negat_test(self.IP, self.IPV4,
                                    self.proto[self.IP][self.UDP])
+
+        matches = self.statistics.get_counter('/acl/%d/matches' % acl_idx)
+        self.logger.info("stat segment counters: %s" % repr(matches))
+        cli = "show acl-plugin acl"
+        self.logger.info(self.vapi.ppcli(cli))
+        cli = "show acl-plugin tables"
+        self.logger.info(self.vapi.ppcli(cli))
+        self.assertEqual(matches[0][0]['packets'], 64)
+        # disable counters
+        reply = self.vapi.papi.acl_stats_intf_counters_enable(enable=0)
         self.logger.info("ACLP_TEST_FINISH_0003")
-        # self.assertEqual(1, 0)
+        # self.assertEqual(, 0)
 
     def test_0004_vpp624_permit_icmpv4(self):
         """ VPP_624 permit ICMPv4