Improve dpdk-input node to handle drivers not setting mbuf PTYPE

For drivers that do not provide dpdk rte_mbuf PTYPE information,
check ethernet header Etype to acccelerate IP4 and IP6 forwarding
path. Update packet trace for dpdk-input node to provide more info
from DPDK rte_mbuf offload flags and packet types.

Change-Id: I207158797a155305314d002726c0af97b8cb0eb3
Signed-off-by: John Lo <loj@cisco.com>
diff --git a/vnet/vnet/devices/dpdk/dpdk.h b/vnet/vnet/devices/dpdk/dpdk.h
index 771c91f..3669bc5 100644
--- a/vnet/vnet/devices/dpdk/dpdk.h
+++ b/vnet/vnet/devices/dpdk/dpdk.h
@@ -189,13 +189,13 @@
   i8 cpu_socket;
 
   u16 flags;
-#define DPDK_DEVICE_FLAG_ADMIN_UP       (1 << 0)
-#define DPDK_DEVICE_FLAG_PROMISC        (1 << 1)
-#define DPDK_DEVICE_FLAG_PMD            (1 << 2)
-#define DPDK_DEVICE_FLAG_MAYBE_MULTISEG (1 << 3)
-
-#define DPDK_DEVICE_FLAG_HAVE_SUBIF     (1 << 5)
-#define DPDK_DEVICE_FLAG_HQOS           (1 << 6)
+#define DPDK_DEVICE_FLAG_ADMIN_UP           (1 << 0)
+#define DPDK_DEVICE_FLAG_PROMISC            (1 << 1)
+#define DPDK_DEVICE_FLAG_PMD                (1 << 2)
+#define DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE (1 << 3)
+#define DPDK_DEVICE_FLAG_MAYBE_MULTISEG     (1 << 4)
+#define DPDK_DEVICE_FLAG_HAVE_SUBIF         (1 << 5)
+#define DPDK_DEVICE_FLAG_HQOS               (1 << 6)
 
   u16 nb_tx_desc;
     CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
diff --git a/vnet/vnet/devices/dpdk/format.c b/vnet/vnet/devices/dpdk/format.c
index 212f109..0b8a693 100644
--- a/vnet/vnet/devices/dpdk/format.c
+++ b/vnet/vnet/devices/dpdk/format.c
@@ -79,20 +79,39 @@
   _(DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM, "outer-ipv4-cksum") \
   _(DEV_TX_OFFLOAD_QINQ_INSERT, "qinq-insert")
 
+#if RTE_VERSION < RTE_VERSION_NUM(16, 11, 0, 0)
+/* New ol_flags bits added in DPDK-16.11 */
+#define PKT_RX_IP_CKSUM_GOOD    (1ULL << 7)
+#define PKT_RX_L4_CKSUM_GOOD    (1ULL << 8)
+#endif
+
 #define foreach_dpdk_pkt_rx_offload_flag                                \
   _ (PKT_RX_VLAN_PKT, "RX packet is a 802.1q VLAN packet")              \
   _ (PKT_RX_RSS_HASH, "RX packet with RSS hash result")                 \
   _ (PKT_RX_FDIR, "RX packet with FDIR infos")                          \
   _ (PKT_RX_L4_CKSUM_BAD, "L4 cksum of RX pkt. is not OK")              \
   _ (PKT_RX_IP_CKSUM_BAD, "IP cksum of RX pkt. is not OK")              \
+  _ (PKT_RX_VLAN_STRIPPED, "RX packet VLAN tag stripped")               \
+  _ (PKT_RX_IP_CKSUM_GOOD, "IP cksum of RX pkt. is valid")              \
+  _ (PKT_RX_L4_CKSUM_GOOD, "L4 cksum of RX pkt. is valid")              \
   _ (PKT_RX_IEEE1588_PTP, "RX IEEE1588 L2 Ethernet PT Packet")          \
-  _ (PKT_RX_IEEE1588_TMST, "RX IEEE1588 L2/L4 timestamped packet")
+  _ (PKT_RX_IEEE1588_TMST, "RX IEEE1588 L2/L4 timestamped packet")      \
+  _ (PKT_RX_QINQ_STRIPPED, "RX packet QinQ tags stripped")
+
+#if RTE_VERSION < RTE_VERSION_NUM(16, 11, 0, 0)
+/* PTYPE added in DPDK-16.11 */
+#define RTE_PTYPE_L2_ETHER_VLAN             0x00000006
+#define RTE_PTYPE_L2_ETHER_QINQ             0x00000007
+#endif
 
 #define foreach_dpdk_pkt_type                                           \
   _ (L2, ETHER, "Ethernet packet")                                      \
   _ (L2, ETHER_TIMESYNC, "Ethernet packet for time sync")               \
   _ (L2, ETHER_ARP, "ARP packet")                                       \
   _ (L2, ETHER_LLDP, "LLDP (Link Layer Discovery Protocol) packet")     \
+  _ (L2, ETHER_NSH, "NSH (Network Service Header) packet")              \
+  _ (L2, ETHER_VLAN, "VLAN packet")                                     \
+  _ (L2, ETHER_QINQ, "QinQ packet")                                     \
   _ (L3, IPV4, "IPv4 packet without extension headers")                 \
   _ (L3, IPV4_EXT, "IPv4 packet with extension headers")                \
   _ (L3, IPV4_EXT_UNKNOWN, "IPv4 packet with or without extension headers") \
@@ -642,7 +661,8 @@
     s = format (s, "\n%U%U", format_white_space, indent,
 		format_dpdk_pkt_offload_flags, &mb->ol_flags);
 
-  if (mb->ol_flags & PKT_RX_VLAN_PKT)
+  if ((mb->ol_flags & PKT_RX_VLAN_PKT) &&
+      ((mb->ol_flags & (PKT_RX_VLAN_STRIPPED | PKT_RX_QINQ_STRIPPED)) == 0))
     {
       ethernet_vlan_header_tv_t *vlan_hdr =
 	((ethernet_vlan_header_tv_t *) & (eth_hdr->type));
diff --git a/vnet/vnet/devices/dpdk/init.c b/vnet/vnet/devices/dpdk/init.c
index c9e7dc8..0448c15 100755
--- a/vnet/vnet/devices/dpdk/init.c
+++ b/vnet/vnet/devices/dpdk/init.c
@@ -473,6 +473,7 @@
 	      /* Cisco VIC */
 	    case VNET_DPDK_PMD_ENIC:
 	      rte_eth_link_get_nowait (i, &l);
+	      xd->flags |= DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE;
 	      xd->nb_rx_desc = DPDK_NB_RX_DESC_ENIC;
 	      if (l.link_speed == 40000)
 		{
@@ -489,6 +490,7 @@
 	      /* Intel Fortville */
 	    case VNET_DPDK_PMD_I40E:
 	    case VNET_DPDK_PMD_I40EVF:
+	      xd->flags |= DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE;
 	      xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G;
 	      xd->nb_rx_desc = DPDK_NB_RX_DESC_40GE;
 	      xd->nb_tx_desc = DPDK_NB_TX_DESC_40GE;
@@ -561,6 +563,7 @@
 	      break;
 
 	    case VNET_DPDK_PMD_BOND:
+	      xd->flags |= DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE;
 	      xd->port_type = VNET_DPDK_PORT_TYPE_ETH_BOND;
 	      break;
 
diff --git a/vnet/vnet/devices/dpdk/node.c b/vnet/vnet/devices/dpdk/node.c
index 4e66877..e541cdb 100644
--- a/vnet/vnet/devices/dpdk/node.c
+++ b/vnet/vnet/devices/dpdk/node.c
@@ -34,14 +34,61 @@
 #undef _
 };
 
-#if RTE_VERSION >= RTE_VERSION_NUM(16, 11, 0, 0)
+always_inline int
+vlib_buffer_is_ip4 (vlib_buffer_t * b)
+{
+  ethernet_header_t *h = (ethernet_header_t *) b->data;
+  return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_IP4));
+}
+
+always_inline int
+vlib_buffer_is_ip6 (vlib_buffer_t * b)
+{
+  ethernet_header_t *h = (ethernet_header_t *) b->data;
+  return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_IP6));
+}
+
+always_inline int
+vlib_buffer_is_mpls (vlib_buffer_t * b)
+{
+  ethernet_header_t *h = (ethernet_header_t *) b->data;
+  return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_MPLS_UNICAST));
+}
+
+#if RTE_VERSION < RTE_VERSION_NUM(16, 11, 0, 0)
+/* New ol_flags bits added in DPDK-16.11 */
+#define PKT_RX_IP_CKSUM_GOOD    (1ULL << 7)
+#endif
+
+always_inline u32
+dpdk_rx_next_from_etype (struct rte_mbuf * mb, vlib_buffer_t * b0)
+{
+  if (PREDICT_TRUE (vlib_buffer_is_ip4 (b0)))
+    if (PREDICT_TRUE ((mb->ol_flags & PKT_RX_IP_CKSUM_GOOD) != 0))
+      return VNET_DEVICE_INPUT_NEXT_IP4_NCS_INPUT;
+    else
+      return VNET_DEVICE_INPUT_NEXT_IP4_INPUT;
+  else if (PREDICT_TRUE (vlib_buffer_is_ip6 (b0)))
+    return VNET_DEVICE_INPUT_NEXT_IP6_INPUT;
+  else if (PREDICT_TRUE (vlib_buffer_is_mpls (b0)))
+    return VNET_DEVICE_INPUT_NEXT_MPLS_INPUT;
+  else
+    return VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
+}
+
 always_inline int
 dpdk_mbuf_is_vlan (struct rte_mbuf *mb)
 {
+#if RTE_VERSION >= RTE_VERSION_NUM(16, 11, 0, 0)
   return (mb->packet_type & RTE_PTYPE_L2_ETHER_VLAN) ==
     RTE_PTYPE_L2_ETHER_VLAN;
-}
+#else
+  return
+    (mb->ol_flags &
+     (PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED | PKT_RX_QINQ_STRIPPED)) ==
+    PKT_RX_VLAN_PKT;
 #endif
+}
 
 always_inline int
 dpdk_mbuf_is_ip4 (struct rte_mbuf *mb)
@@ -55,35 +102,19 @@
   return RTE_ETH_IS_IPV6_HDR (mb->packet_type) != 0;
 }
 
-always_inline int
-vlib_buffer_is_mpls (vlib_buffer_t * b)
+always_inline u32
+dpdk_rx_next_from_mb (struct rte_mbuf * mb, vlib_buffer_t * b0)
 {
-  ethernet_header_t *h = (ethernet_header_t *) b->data;
-  return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_MPLS_UNICAST));
-}
-
-always_inline void
-dpdk_rx_next_from_mb (struct rte_mbuf *mb, vlib_buffer_t * b0, u32 * next0)
-{
-  u32 n0;
-
-#if RTE_VERSION < RTE_VERSION_NUM(16, 11, 0, 0)
-  if (PREDICT_FALSE
-      ((mb->ol_flags & (PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED)) ==
-       PKT_RX_VLAN_PKT))
-#else
   if (PREDICT_FALSE (dpdk_mbuf_is_vlan (mb)))
-#endif
-    n0 = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
+    return VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
   else if (PREDICT_TRUE (dpdk_mbuf_is_ip4 (mb)))
-    n0 = VNET_DEVICE_INPUT_NEXT_IP4_INPUT;
+    return VNET_DEVICE_INPUT_NEXT_IP4_NCS_INPUT;
   else if (PREDICT_TRUE (dpdk_mbuf_is_ip6 (mb)))
-    n0 = VNET_DEVICE_INPUT_NEXT_IP6_INPUT;
+    return VNET_DEVICE_INPUT_NEXT_IP6_INPUT;
   else if (PREDICT_TRUE (vlib_buffer_is_mpls (b0)))
-    n0 = VNET_DEVICE_INPUT_NEXT_MPLS_INPUT;
+    return VNET_DEVICE_INPUT_NEXT_MPLS_INPUT;
   else
-    n0 = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
-  *next0 = n0;
+    return dpdk_rx_next_from_etype (mb, b0);
 }
 
 always_inline void
@@ -127,10 +158,12 @@
 
       if (PREDICT_FALSE (xd->per_interface_next_index != ~0))
 	next0 = xd->per_interface_next_index;
-      else if (PREDICT_FALSE (xd->flags & DPDK_DEVICE_FLAG_HAVE_SUBIF))
-	next0 = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
+      else if (PREDICT_TRUE
+	       ((xd->flags & DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE) != 0))
+	next0 = dpdk_rx_next_from_mb (mb, b0);
       else
-	dpdk_rx_next_from_mb (mb, b0, &next0);
+	next0 = dpdk_rx_next_from_etype (mb, b0);
+
       dpdk_rx_error_from_mb (mb, &next0, &error0);
 
       vlib_trace_buffer (vm, node, next0, b0, /* follow_chain */ 0);
@@ -350,17 +383,20 @@
 	    {
 	      next0 = next1 = next2 = next3 = xd->per_interface_next_index;
 	    }
-	  else if (PREDICT_FALSE (xd->flags & DPDK_DEVICE_FLAG_HAVE_SUBIF))
+	  else if (PREDICT_TRUE
+		   ((xd->flags & DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE) != 0))
 	    {
-	      next0 = next1 = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
-	      next2 = next3 = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
+	      next0 = dpdk_rx_next_from_mb (mb0, b0);
+	      next1 = dpdk_rx_next_from_mb (mb1, b1);
+	      next2 = dpdk_rx_next_from_mb (mb2, b2);
+	      next3 = dpdk_rx_next_from_mb (mb3, b3);
 	    }
 	  else
 	    {
-	      dpdk_rx_next_from_mb (mb0, b0, &next0);
-	      dpdk_rx_next_from_mb (mb1, b1, &next1);
-	      dpdk_rx_next_from_mb (mb2, b2, &next2);
-	      dpdk_rx_next_from_mb (mb3, b3, &next3);
+	      next0 = dpdk_rx_next_from_etype (mb0, b0);
+	      next1 = dpdk_rx_next_from_etype (mb1, b1);
+	      next2 = dpdk_rx_next_from_etype (mb2, b2);
+	      next3 = dpdk_rx_next_from_etype (mb3, b3);
 	    }
 
 	  if (PREDICT_FALSE (or_ol_flags & PKT_RX_IP_CKSUM_BAD))
@@ -475,10 +511,12 @@
 
 	  if (PREDICT_FALSE (xd->per_interface_next_index != ~0))
 	    next0 = xd->per_interface_next_index;
-	  else if (PREDICT_FALSE (xd->flags & DPDK_DEVICE_FLAG_HAVE_SUBIF))
-	    next0 = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
+	  else if (PREDICT_TRUE
+		   ((xd->flags & DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE) != 0))
+	    next0 = dpdk_rx_next_from_mb (mb0, b0);
 	  else
-	    dpdk_rx_next_from_mb (mb0, b0, &next0);
+	    next0 = dpdk_rx_next_from_etype (mb0, b0);
+
 	  dpdk_rx_error_from_mb (mb0, &next0, &error0);
 	  b0->error = node->errors[error0];