ethernet: fix DMAC check and skip unnecessary ones (VPP-1868)

Fix and optimize DMAC check in ethernet-input node to utilize NIC or
driver which support L3 DMAC-filtering mode so that DMAC check can be
bypassed safely for interfaces/sub-interfaces in L3 mode.
Checking of interface in L3-DMAC-filtering state to avoid DMAC check
require the following:
a) Fix interface driver init sequence for devices which supports L3
   DMAC-filtering to indicate its capability and initialize interface
   to L3 DMAC-filtering state.
b) Fix ethernet_set_flags() function and its associated callback
   flags_change() functions registered by various drivers in interface
   infra to provide proper L3 DMAC filtering status.
Maintain interface/sub-interface L3 config count so DMAC checks can be
bypassed if L3 forwarding is not setup on any main/sub-interfaces.

Type: fix
Ticket: VPP-1868

Signed-off-by: John Lo <loj@cisco.com>
Change-Id: I204d90459c13e9e486cfcba4e64e3d479bc9f2ae
diff --git a/src/vnet/devices/af_packet/af_packet.c b/src/vnet/devices/af_packet/af_packet.c
index 22ddf4e..455e76b 100644
--- a/src/vnet/devices/af_packet/af_packet.c
+++ b/src/vnet/devices/af_packet/af_packet.c
@@ -67,7 +67,7 @@
   af_packet_if_t *apif =
     pool_elt_at_index (apm->interfaces, hi->dev_instance);
 
-  if (ETHERNET_INTERFACE_FLAG_MTU == (flags & ETHERNET_INTERFACE_FLAG_MTU))
+  if (flags == ETHERNET_INTERFACE_FLAG_MTU)
     {
       s = format (0, "/sys/class/net/%s/mtu%c", apif->host_if_name, 0);
 
diff --git a/src/vnet/ethernet/ethernet.h b/src/vnet/ethernet/ethernet.h
index 2f544bb..afb9329 100644
--- a/src/vnet/ethernet/ethernet.h
+++ b/src/vnet/ethernet/ethernet.h
@@ -137,15 +137,22 @@
 {
   u32 flags;
 
-  /* Accept all packets (promiscuous mode). */
-#define ETHERNET_INTERFACE_FLAG_ACCEPT_ALL (1 << 0)
-#define ETHERNET_INTERFACE_FLAG_CONFIG_PROMISC(flags) \
-  (((flags) & ~ETHERNET_INTERFACE_FLAG_ACCEPT_ALL) == 0)
+  /* Top 16 bits for status and bottom 16 bits for set operation */
+#define ETHERNET_INTERFACE_FLAGS_STATUS_MASK  (0xffff0000)
+#define ETHERNET_INTERFACE_FLAGS_SET_OPN_MASK (0x0000ffff)
+
+  /* Interface driver/hw is in L3/non-promiscuous mode so packet DMAC
+     would already be filtered */
+#define ETHERNET_INTERFACE_FLAG_STATUS_L3 (1 << 16)
+
+  /* Set interface to default L3 mode */
+#define ETHERNET_INTERFACE_FLAG_DEFAULT_L3 0
+
+  /* Set interface to accept all packets (promiscuous mode). */
+#define ETHERNET_INTERFACE_FLAG_ACCEPT_ALL 1
 
   /* Change MTU on interface from hw interface structure */
-#define ETHERNET_INTERFACE_FLAG_MTU (1 << 1)
-#define ETHERNET_INTERFACE_FLAG_CONFIG_MTU(flags) \
-  ((flags) & ETHERNET_INTERFACE_FLAG_MTU)
+#define ETHERNET_INTERFACE_FLAG_MTU        2
 
   /* Callback, e.g. to turn on/off promiscuous mode */
   ethernet_flag_change_function_t *flag_change;
diff --git a/src/vnet/ethernet/interface.c b/src/vnet/ethernet/interface.c
index 629f190..7b11fda 100644
--- a/src/vnet/ethernet/interface.c
+++ b/src/vnet/ethernet/interface.c
@@ -428,16 +428,43 @@
   ethernet_main_t *em = &ethernet_main;
   vnet_hw_interface_t *hi;
   ethernet_interface_t *ei;
+  u32 opn_flags = flags & ETHERNET_INTERFACE_FLAGS_SET_OPN_MASK;
 
   hi = vnet_get_hw_interface (vnm, hw_if_index);
 
   ASSERT (hi->hw_class_index == ethernet_hw_interface_class.index);
 
   ei = pool_elt_at_index (em->interfaces, hi->hw_instance);
-  ei->flags = flags;
+
+  /* preserve status bits and update last set operation bits */
+  ei->flags = (ei->flags & ETHERNET_INTERFACE_FLAGS_STATUS_MASK) | opn_flags;
+
   if (ei->flag_change)
-    return ei->flag_change (vnm, hi, flags);
-  return (u32) ~ 0;
+    {
+      switch (opn_flags)
+	{
+	case ETHERNET_INTERFACE_FLAG_DEFAULT_L3:
+	  if (hi->flags & VNET_HW_INTERFACE_FLAG_SUPPORTS_MAC_FILTER)
+	    {
+	      if (ei->flag_change (vnm, hi, opn_flags) != ~0)
+		{
+		  ei->flags |= ETHERNET_INTERFACE_FLAG_STATUS_L3;
+		  return 0;
+		}
+	      ei->flags &= ~ETHERNET_INTERFACE_FLAG_STATUS_L3;
+	      return ~0;
+	    }
+	  /* fall through */
+	case ETHERNET_INTERFACE_FLAG_ACCEPT_ALL:
+	  ei->flags &= ~ETHERNET_INTERFACE_FLAG_STATUS_L3;
+	  /* fall through */
+	case ETHERNET_INTERFACE_FLAG_MTU:
+	  return ei->flag_change (vnm, hi, opn_flags);
+	default:
+	  return ~0;
+	}
+    }
+  return ~0;
 }
 
 /**
diff --git a/src/vnet/ethernet/node.c b/src/vnet/ethernet/node.c
index 3c4330e..e26c361 100644
--- a/src/vnet/ethernet/node.c
+++ b/src/vnet/ethernet/node.c
@@ -220,9 +220,10 @@
   if (matched)
     {
       // Perform L3 my-mac filter
-      // A unicast packet arriving on an L3 interface must have a dmac matching the interface mac.
-      // This is required for promiscuous mode, else we will forward packets we aren't supposed to.
-      if (!(*is_l2))
+      // A unicast packet arriving on an L3 interface must have a dmac
+      // matching the interface mac. If interface has STATUS_L3 bit set
+      // mac filter is already done.
+      if (!(*is_l2 || (hi->flags & ETHERNET_INTERFACE_FLAG_STATUS_L3)))
 	{
 	  u64 dmacs[2];
 	  u8 dmacs_bad[2];
@@ -241,7 +242,6 @@
 	    ethernet_input_inline_dmac_check (hi, dmacs, dmacs_bad,
 					      1 /* n_packets */ , ei0,
 					      0 /* have_sec_dmac */ );
-
 	  if (dmacs_bad[0])
 	    *error0 = ETHERNET_ERROR_L3_MAC_MISMATCH;
 	}
@@ -1085,29 +1085,35 @@
   subint_config_t *subint0 = &intf0->untagged_subint;
 
   int main_is_l3 = (subint0->flags & SUBINT_CONFIG_L2) == 0;
-  int promisc = (ei->flags & ETHERNET_INTERFACE_FLAG_ACCEPT_ALL) != 0;
+  int int_is_l3 = ei->flags & ETHERNET_INTERFACE_FLAG_STATUS_L3;
 
   if (main_is_l3)
     {
-      /* main interface is L3, we dont expect tagged packets and interface
-         is not in promisc node, so we dont't need to check DMAC */
-      int is_l3 = 1;
-
-      if (promisc == 0)
-	eth_input_process_frame (vm, node, hi, from, n_pkts, is_l3,
-				 ip4_cksum_ok, 0);
+      if (int_is_l3 ||		/* DMAC filter already done by NIC */
+	  ((hi->l2_if_count != 0) && (hi->l3_if_count == 0)))
+	{			/* All L2 usage - DMAC check not needed */
+	  eth_input_process_frame (vm, node, hi, from, n_pkts,
+				   /*is_l3 */ 1, ip4_cksum_ok, 0);
+	}
       else
-	/* subinterfaces and promisc mode so DMAC check is needed */
-	eth_input_process_frame (vm, node, hi, from, n_pkts, is_l3,
-				 ip4_cksum_ok, 1);
+	{			/* DMAC check needed for L3 */
+	  eth_input_process_frame (vm, node, hi, from, n_pkts,
+				   /*is_l3 */ 1, ip4_cksum_ok, 1);
+	}
       return;
     }
   else
     {
-      /* untagged packets are treated as L2 */
-      int is_l3 = 0;
-      eth_input_process_frame (vm, node, hi, from, n_pkts, is_l3,
-			       ip4_cksum_ok, 1);
+      if (hi->l3_if_count == 0)
+	{			/* All L2 usage - DMAC check not needed */
+	  eth_input_process_frame (vm, node, hi, from, n_pkts,
+				   /*is_l3 */ 0, ip4_cksum_ok, 0);
+	}
+      else
+	{			/* DMAC check needed for L3 */
+	  eth_input_process_frame (vm, node, hi, from, n_pkts,
+				   /*is_l3 */ 0, ip4_cksum_ok, 1);
+	}
       return;
     }
 }
@@ -1325,6 +1331,9 @@
 		}
 	      else
 		{
+		  if (hi->flags & ETHERNET_INTERFACE_FLAG_STATUS_L3)
+		    goto skip_dmac_check01;
+
 		  dmacs[0] = *(u64 *) e0;
 		  dmacs[1] = *(u64 *) e1;
 
@@ -1346,6 +1355,7 @@
 		  if (dmacs_bad[1])
 		    error1 = ETHERNET_ERROR_L3_MAC_MISMATCH;
 
+		skip_dmac_check01:
 		  vlib_buffer_advance (b0, sizeof (ethernet_header_t));
 		  determine_next_node (em, variant, 0, type0, b0,
 				       &error0, &next0);
@@ -1563,6 +1573,9 @@
 		}
 	      else
 		{
+		  if (hi->flags & ETHERNET_INTERFACE_FLAG_STATUS_L3)
+		    goto skip_dmac_check0;
+
 		  dmacs[0] = *(u64 *) e0;
 
 		  if (ei && vec_len (ei->secondary_addrs))
@@ -1581,6 +1594,7 @@
 		  if (dmacs_bad[0])
 		    error0 = ETHERNET_ERROR_L3_MAC_MISMATCH;
 
+		skip_dmac_check0:
 		  vlib_buffer_advance (b0, sizeof (ethernet_header_t));
 		  determine_next_node (em, variant, 0, type0, b0,
 				       &error0, &next0);
diff --git a/src/vnet/interface.h b/src/vnet/interface.h
index ee64a81..5f4fd06 100644
--- a/src/vnet/interface.h
+++ b/src/vnet/interface.h
@@ -503,6 +503,9 @@
 
   /* non-broadcast multiple access */
   VNET_HW_INTERFACE_FLAG_NBMA = (1 << 19),
+
+  /* hw/driver can switch between l2-promisc and l3-dmac-filter modes */
+  VNET_HW_INTERFACE_FLAG_SUPPORTS_MAC_FILTER = (1 << 20),
 } vnet_hw_interface_flags_t;
 
 #define VNET_HW_INTERFACE_FLAG_DUPLEX_SHIFT 1
@@ -570,8 +573,9 @@
   /* Hash table mapping sub interface id to sw_if_index. */
   uword *sub_interface_sw_if_index_by_id;
 
-  /* Count of number of L2 subinterfaces */
+  /* Count of number of L2 and L3 subinterfaces */
   u32 l2_if_count;
+  u32 l3_if_count;
 
   /* Bonded interface info -
      0       - not a bonded interface nor a slave
diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c
index 231912b..b76da0a 100644
--- a/src/vnet/ip/ip4_forward.c
+++ b/src/vnet/ip/ip4_forward.c
@@ -640,6 +640,8 @@
 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
 {
   ip4_main_t *im = &ip4_main;
+  vnet_main_t *vnm = vnet_get_main ();
+  vnet_hw_interface_t *hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
 
   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
 
@@ -664,6 +666,11 @@
   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
 			       sw_if_index, !is_enable, 0, 0);
 
+  if (is_enable)
+    hi->l3_if_count++;
+  else if (hi->l3_if_count)
+    hi->l3_if_count--;
+
   {
     ip4_enable_disable_interface_callback_t *cb;
     vec_foreach (cb, im->enable_disable_interface_callbacks)
diff --git a/src/vnet/ip/ip6_forward.c b/src/vnet/ip/ip6_forward.c
index 207d968..e4d8fc0 100644
--- a/src/vnet/ip/ip6_forward.c
+++ b/src/vnet/ip/ip6_forward.c
@@ -239,6 +239,8 @@
 ip6_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
 {
   ip6_main_t *im = &ip6_main;
+  vnet_main_t *vnm = vnet_get_main ();
+  vnet_hw_interface_t *hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
 
   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
 
@@ -264,6 +266,11 @@
 
   vnet_feature_enable_disable ("ip6-multicast", "ip6-not-enabled",
 			       sw_if_index, !is_enable, 0, 0);
+
+  if (is_enable)
+    hi->l3_if_count++;
+  else if (hi->l3_if_count)
+    hi->l3_if_count--;
 }
 
 /* get first interface address */
diff --git a/src/vnet/mpls/interface.c b/src/vnet/mpls/interface.c
index 46d80f0..0742312 100644
--- a/src/vnet/mpls/interface.c
+++ b/src/vnet/mpls/interface.c
@@ -42,6 +42,8 @@
                                   u8 is_api)
 {
   fib_node_index_t lfib_index;
+  vnet_main_t *vnm = vnet_get_main ();
+  vnet_hw_interface_t *hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
 
   vec_validate_init_empty (mm->mpls_enabled_by_sw_if_index, sw_if_index, 0);
 
@@ -79,6 +81,11 @@
   vnet_feature_enable_disable ("mpls-input", "mpls-not-enabled",
                                sw_if_index, !is_enable, 0, 0);
 
+  if (is_enable)
+    hi->l3_if_count++;
+  else if (hi->l3_if_count)
+    hi->l3_if_count--;
+
   return (0);
 }