Improve dpdk-input node to handle drivers not setting mbuf PTYPE
For drivers that do not provide dpdk rte_mbuf PTYPE information,
check ethernet header Etype to acccelerate IP4 and IP6 forwarding
path. Update packet trace for dpdk-input node to provide more info
from DPDK rte_mbuf offload flags and packet types.
Change-Id: I207158797a155305314d002726c0af97b8cb0eb3
Signed-off-by: John Lo <loj@cisco.com>
diff --git a/vnet/vnet/devices/dpdk/dpdk.h b/vnet/vnet/devices/dpdk/dpdk.h
index 771c91f..3669bc5 100644
--- a/vnet/vnet/devices/dpdk/dpdk.h
+++ b/vnet/vnet/devices/dpdk/dpdk.h
@@ -189,13 +189,13 @@
i8 cpu_socket;
u16 flags;
-#define DPDK_DEVICE_FLAG_ADMIN_UP (1 << 0)
-#define DPDK_DEVICE_FLAG_PROMISC (1 << 1)
-#define DPDK_DEVICE_FLAG_PMD (1 << 2)
-#define DPDK_DEVICE_FLAG_MAYBE_MULTISEG (1 << 3)
-
-#define DPDK_DEVICE_FLAG_HAVE_SUBIF (1 << 5)
-#define DPDK_DEVICE_FLAG_HQOS (1 << 6)
+#define DPDK_DEVICE_FLAG_ADMIN_UP (1 << 0)
+#define DPDK_DEVICE_FLAG_PROMISC (1 << 1)
+#define DPDK_DEVICE_FLAG_PMD (1 << 2)
+#define DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE (1 << 3)
+#define DPDK_DEVICE_FLAG_MAYBE_MULTISEG (1 << 4)
+#define DPDK_DEVICE_FLAG_HAVE_SUBIF (1 << 5)
+#define DPDK_DEVICE_FLAG_HQOS (1 << 6)
u16 nb_tx_desc;
CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
diff --git a/vnet/vnet/devices/dpdk/format.c b/vnet/vnet/devices/dpdk/format.c
index 212f109..0b8a693 100644
--- a/vnet/vnet/devices/dpdk/format.c
+++ b/vnet/vnet/devices/dpdk/format.c
@@ -79,20 +79,39 @@
_(DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM, "outer-ipv4-cksum") \
_(DEV_TX_OFFLOAD_QINQ_INSERT, "qinq-insert")
+#if RTE_VERSION < RTE_VERSION_NUM(16, 11, 0, 0)
+/* New ol_flags bits added in DPDK-16.11 */
+#define PKT_RX_IP_CKSUM_GOOD (1ULL << 7)
+#define PKT_RX_L4_CKSUM_GOOD (1ULL << 8)
+#endif
+
#define foreach_dpdk_pkt_rx_offload_flag \
_ (PKT_RX_VLAN_PKT, "RX packet is a 802.1q VLAN packet") \
_ (PKT_RX_RSS_HASH, "RX packet with RSS hash result") \
_ (PKT_RX_FDIR, "RX packet with FDIR infos") \
_ (PKT_RX_L4_CKSUM_BAD, "L4 cksum of RX pkt. is not OK") \
_ (PKT_RX_IP_CKSUM_BAD, "IP cksum of RX pkt. is not OK") \
+ _ (PKT_RX_VLAN_STRIPPED, "RX packet VLAN tag stripped") \
+ _ (PKT_RX_IP_CKSUM_GOOD, "IP cksum of RX pkt. is valid") \
+ _ (PKT_RX_L4_CKSUM_GOOD, "L4 cksum of RX pkt. is valid") \
_ (PKT_RX_IEEE1588_PTP, "RX IEEE1588 L2 Ethernet PT Packet") \
- _ (PKT_RX_IEEE1588_TMST, "RX IEEE1588 L2/L4 timestamped packet")
+ _ (PKT_RX_IEEE1588_TMST, "RX IEEE1588 L2/L4 timestamped packet") \
+ _ (PKT_RX_QINQ_STRIPPED, "RX packet QinQ tags stripped")
+
+#if RTE_VERSION < RTE_VERSION_NUM(16, 11, 0, 0)
+/* PTYPE added in DPDK-16.11 */
+#define RTE_PTYPE_L2_ETHER_VLAN 0x00000006
+#define RTE_PTYPE_L2_ETHER_QINQ 0x00000007
+#endif
#define foreach_dpdk_pkt_type \
_ (L2, ETHER, "Ethernet packet") \
_ (L2, ETHER_TIMESYNC, "Ethernet packet for time sync") \
_ (L2, ETHER_ARP, "ARP packet") \
_ (L2, ETHER_LLDP, "LLDP (Link Layer Discovery Protocol) packet") \
+ _ (L2, ETHER_NSH, "NSH (Network Service Header) packet") \
+ _ (L2, ETHER_VLAN, "VLAN packet") \
+ _ (L2, ETHER_QINQ, "QinQ packet") \
_ (L3, IPV4, "IPv4 packet without extension headers") \
_ (L3, IPV4_EXT, "IPv4 packet with extension headers") \
_ (L3, IPV4_EXT_UNKNOWN, "IPv4 packet with or without extension headers") \
@@ -642,7 +661,8 @@
s = format (s, "\n%U%U", format_white_space, indent,
format_dpdk_pkt_offload_flags, &mb->ol_flags);
- if (mb->ol_flags & PKT_RX_VLAN_PKT)
+ if ((mb->ol_flags & PKT_RX_VLAN_PKT) &&
+ ((mb->ol_flags & (PKT_RX_VLAN_STRIPPED | PKT_RX_QINQ_STRIPPED)) == 0))
{
ethernet_vlan_header_tv_t *vlan_hdr =
((ethernet_vlan_header_tv_t *) & (eth_hdr->type));
diff --git a/vnet/vnet/devices/dpdk/init.c b/vnet/vnet/devices/dpdk/init.c
index c9e7dc8..0448c15 100755
--- a/vnet/vnet/devices/dpdk/init.c
+++ b/vnet/vnet/devices/dpdk/init.c
@@ -473,6 +473,7 @@
/* Cisco VIC */
case VNET_DPDK_PMD_ENIC:
rte_eth_link_get_nowait (i, &l);
+ xd->flags |= DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE;
xd->nb_rx_desc = DPDK_NB_RX_DESC_ENIC;
if (l.link_speed == 40000)
{
@@ -489,6 +490,7 @@
/* Intel Fortville */
case VNET_DPDK_PMD_I40E:
case VNET_DPDK_PMD_I40EVF:
+ xd->flags |= DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE;
xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G;
xd->nb_rx_desc = DPDK_NB_RX_DESC_40GE;
xd->nb_tx_desc = DPDK_NB_TX_DESC_40GE;
@@ -561,6 +563,7 @@
break;
case VNET_DPDK_PMD_BOND:
+ xd->flags |= DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE;
xd->port_type = VNET_DPDK_PORT_TYPE_ETH_BOND;
break;
diff --git a/vnet/vnet/devices/dpdk/node.c b/vnet/vnet/devices/dpdk/node.c
index 4e66877..e541cdb 100644
--- a/vnet/vnet/devices/dpdk/node.c
+++ b/vnet/vnet/devices/dpdk/node.c
@@ -34,14 +34,61 @@
#undef _
};
-#if RTE_VERSION >= RTE_VERSION_NUM(16, 11, 0, 0)
+always_inline int
+vlib_buffer_is_ip4 (vlib_buffer_t * b)
+{
+ ethernet_header_t *h = (ethernet_header_t *) b->data;
+ return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_IP4));
+}
+
+always_inline int
+vlib_buffer_is_ip6 (vlib_buffer_t * b)
+{
+ ethernet_header_t *h = (ethernet_header_t *) b->data;
+ return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_IP6));
+}
+
+always_inline int
+vlib_buffer_is_mpls (vlib_buffer_t * b)
+{
+ ethernet_header_t *h = (ethernet_header_t *) b->data;
+ return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_MPLS_UNICAST));
+}
+
+#if RTE_VERSION < RTE_VERSION_NUM(16, 11, 0, 0)
+/* New ol_flags bits added in DPDK-16.11 */
+#define PKT_RX_IP_CKSUM_GOOD (1ULL << 7)
+#endif
+
+always_inline u32
+dpdk_rx_next_from_etype (struct rte_mbuf * mb, vlib_buffer_t * b0)
+{
+ if (PREDICT_TRUE (vlib_buffer_is_ip4 (b0)))
+ if (PREDICT_TRUE ((mb->ol_flags & PKT_RX_IP_CKSUM_GOOD) != 0))
+ return VNET_DEVICE_INPUT_NEXT_IP4_NCS_INPUT;
+ else
+ return VNET_DEVICE_INPUT_NEXT_IP4_INPUT;
+ else if (PREDICT_TRUE (vlib_buffer_is_ip6 (b0)))
+ return VNET_DEVICE_INPUT_NEXT_IP6_INPUT;
+ else if (PREDICT_TRUE (vlib_buffer_is_mpls (b0)))
+ return VNET_DEVICE_INPUT_NEXT_MPLS_INPUT;
+ else
+ return VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
+}
+
always_inline int
dpdk_mbuf_is_vlan (struct rte_mbuf *mb)
{
+#if RTE_VERSION >= RTE_VERSION_NUM(16, 11, 0, 0)
return (mb->packet_type & RTE_PTYPE_L2_ETHER_VLAN) ==
RTE_PTYPE_L2_ETHER_VLAN;
-}
+#else
+ return
+ (mb->ol_flags &
+ (PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED | PKT_RX_QINQ_STRIPPED)) ==
+ PKT_RX_VLAN_PKT;
#endif
+}
always_inline int
dpdk_mbuf_is_ip4 (struct rte_mbuf *mb)
@@ -55,35 +102,19 @@
return RTE_ETH_IS_IPV6_HDR (mb->packet_type) != 0;
}
-always_inline int
-vlib_buffer_is_mpls (vlib_buffer_t * b)
+always_inline u32
+dpdk_rx_next_from_mb (struct rte_mbuf * mb, vlib_buffer_t * b0)
{
- ethernet_header_t *h = (ethernet_header_t *) b->data;
- return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_MPLS_UNICAST));
-}
-
-always_inline void
-dpdk_rx_next_from_mb (struct rte_mbuf *mb, vlib_buffer_t * b0, u32 * next0)
-{
- u32 n0;
-
-#if RTE_VERSION < RTE_VERSION_NUM(16, 11, 0, 0)
- if (PREDICT_FALSE
- ((mb->ol_flags & (PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED)) ==
- PKT_RX_VLAN_PKT))
-#else
if (PREDICT_FALSE (dpdk_mbuf_is_vlan (mb)))
-#endif
- n0 = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
+ return VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
else if (PREDICT_TRUE (dpdk_mbuf_is_ip4 (mb)))
- n0 = VNET_DEVICE_INPUT_NEXT_IP4_INPUT;
+ return VNET_DEVICE_INPUT_NEXT_IP4_NCS_INPUT;
else if (PREDICT_TRUE (dpdk_mbuf_is_ip6 (mb)))
- n0 = VNET_DEVICE_INPUT_NEXT_IP6_INPUT;
+ return VNET_DEVICE_INPUT_NEXT_IP6_INPUT;
else if (PREDICT_TRUE (vlib_buffer_is_mpls (b0)))
- n0 = VNET_DEVICE_INPUT_NEXT_MPLS_INPUT;
+ return VNET_DEVICE_INPUT_NEXT_MPLS_INPUT;
else
- n0 = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
- *next0 = n0;
+ return dpdk_rx_next_from_etype (mb, b0);
}
always_inline void
@@ -127,10 +158,12 @@
if (PREDICT_FALSE (xd->per_interface_next_index != ~0))
next0 = xd->per_interface_next_index;
- else if (PREDICT_FALSE (xd->flags & DPDK_DEVICE_FLAG_HAVE_SUBIF))
- next0 = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
+ else if (PREDICT_TRUE
+ ((xd->flags & DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE) != 0))
+ next0 = dpdk_rx_next_from_mb (mb, b0);
else
- dpdk_rx_next_from_mb (mb, b0, &next0);
+ next0 = dpdk_rx_next_from_etype (mb, b0);
+
dpdk_rx_error_from_mb (mb, &next0, &error0);
vlib_trace_buffer (vm, node, next0, b0, /* follow_chain */ 0);
@@ -350,17 +383,20 @@
{
next0 = next1 = next2 = next3 = xd->per_interface_next_index;
}
- else if (PREDICT_FALSE (xd->flags & DPDK_DEVICE_FLAG_HAVE_SUBIF))
+ else if (PREDICT_TRUE
+ ((xd->flags & DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE) != 0))
{
- next0 = next1 = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
- next2 = next3 = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
+ next0 = dpdk_rx_next_from_mb (mb0, b0);
+ next1 = dpdk_rx_next_from_mb (mb1, b1);
+ next2 = dpdk_rx_next_from_mb (mb2, b2);
+ next3 = dpdk_rx_next_from_mb (mb3, b3);
}
else
{
- dpdk_rx_next_from_mb (mb0, b0, &next0);
- dpdk_rx_next_from_mb (mb1, b1, &next1);
- dpdk_rx_next_from_mb (mb2, b2, &next2);
- dpdk_rx_next_from_mb (mb3, b3, &next3);
+ next0 = dpdk_rx_next_from_etype (mb0, b0);
+ next1 = dpdk_rx_next_from_etype (mb1, b1);
+ next2 = dpdk_rx_next_from_etype (mb2, b2);
+ next3 = dpdk_rx_next_from_etype (mb3, b3);
}
if (PREDICT_FALSE (or_ol_flags & PKT_RX_IP_CKSUM_BAD))
@@ -475,10 +511,12 @@
if (PREDICT_FALSE (xd->per_interface_next_index != ~0))
next0 = xd->per_interface_next_index;
- else if (PREDICT_FALSE (xd->flags & DPDK_DEVICE_FLAG_HAVE_SUBIF))
- next0 = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
+ else if (PREDICT_TRUE
+ ((xd->flags & DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE) != 0))
+ next0 = dpdk_rx_next_from_mb (mb0, b0);
else
- dpdk_rx_next_from_mb (mb0, b0, &next0);
+ next0 = dpdk_rx_next_from_etype (mb0, b0);
+
dpdk_rx_error_from_mb (mb0, &next0, &error0);
b0->error = node->errors[error0];