vxlan-gpe: improve encap performance
This patch improves performance by prefetching encap header area
and taking full advantage of optimized function vlib_get_buffers.
After applying the patch, the function vxlan_gpe_encap can save
4.1 clocks/pkt from 41.7 to 37.6 clocks/pkt on Skylake.
Change-Id: I85d486b21a2524d64f2e246dfb4183539ec2532d
Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com>
diff --git a/src/vnet/vxlan-gpe/encap.c b/src/vnet/vxlan-gpe/encap.c
index 9006b1b..1cca415 100644
--- a/src/vnet/vxlan-gpe/encap.c
+++ b/src/vnet/vxlan-gpe/encap.c
@@ -153,6 +153,7 @@
u32 pkts_encapsulated = 0;
u32 thread_index = vm->thread_index;
u32 stats_sw_if_index, stats_n_packets, stats_n_bytes;
+ vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
from = vlib_frame_vector_args (from_frame);
n_left_from = from_frame->n_vectors;
@@ -160,6 +161,7 @@
next_index = node->cached_next_index;
stats_sw_if_index = node->runtime_data[0];
stats_n_packets = stats_n_bytes = 0;
+ vlib_get_buffers (vm, from, bufs, n_left_from);
while (n_left_from > 0)
{
@@ -174,23 +176,19 @@
while (n_left_from >= 4 && n_left_to_next >= 2)
{
u32 bi0, bi1;
- vlib_buffer_t *b0, *b1;
u32 next0, next1;
next0 = next1 = VXLAN_GPE_ENCAP_NEXT_IP4_LOOKUP;
/* Prefetch next iteration. */
{
- vlib_buffer_t *p2, *p3;
+ vlib_prefetch_buffer_header (b[2], LOAD);
+ vlib_prefetch_buffer_header (b[3], LOAD);
- p2 = vlib_get_buffer (vm, from[2]);
- p3 = vlib_get_buffer (vm, from[3]);
-
- vlib_prefetch_buffer_header (p2, LOAD);
- vlib_prefetch_buffer_header (p3, LOAD);
-
- CLIB_PREFETCH (p2->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
- CLIB_PREFETCH (p3->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (b[2]->data - CLIB_CACHE_LINE_BYTES,
+ 2 * CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (b[3]->data - CLIB_CACHE_LINE_BYTES,
+ 2 * CLIB_CACHE_LINE_BYTES, LOAD);
}
bi0 = from[0];
@@ -202,25 +200,22 @@
n_left_to_next -= 2;
n_left_from -= 2;
- b0 = vlib_get_buffer (vm, bi0);
- b1 = vlib_get_buffer (vm, bi1);
-
/* get the flag "is_ip4" */
- if (sw_if_index0 != vnet_buffer (b0)->sw_if_index[VLIB_TX])
+ if (sw_if_index0 != vnet_buffer (b[0])->sw_if_index[VLIB_TX])
{
- sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+ sw_if_index0 = vnet_buffer (b[0])->sw_if_index[VLIB_TX];
hi0 =
vnet_get_sup_hw_interface (vnm,
- vnet_buffer (b0)->sw_if_index
+ vnet_buffer (b[0])->sw_if_index
[VLIB_TX]);
t0 = pool_elt_at_index (ngm->tunnels, hi0->dev_instance);
is_ip4_0 = (t0->flags & VXLAN_GPE_TUNNEL_IS_IPV4);
}
/* get the flag "is_ip4" */
- if (sw_if_index1 != vnet_buffer (b1)->sw_if_index[VLIB_TX])
+ if (sw_if_index1 != vnet_buffer (b[1])->sw_if_index[VLIB_TX])
{
- if (sw_if_index0 == vnet_buffer (b1)->sw_if_index[VLIB_TX])
+ if (sw_if_index0 == vnet_buffer (b[1])->sw_if_index[VLIB_TX])
{
sw_if_index1 = sw_if_index0;
hi1 = hi0;
@@ -229,10 +224,10 @@
}
else
{
- sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_TX];
+ sw_if_index1 = vnet_buffer (b[1])->sw_if_index[VLIB_TX];
hi1 =
vnet_get_sup_hw_interface (vnm,
- vnet_buffer (b1)->sw_if_index
+ vnet_buffer (b[1])->sw_if_index
[VLIB_TX]);
t1 = pool_elt_at_index (ngm->tunnels, hi1->dev_instance);
is_ip4_1 = (t1->flags & VXLAN_GPE_TUNNEL_IS_IPV4);
@@ -241,24 +236,24 @@
if (PREDICT_TRUE (is_ip4_0 == is_ip4_1))
{
- vxlan_gpe_encap_two_inline (ngm, b0, b1, t0, t1, &next0, &next1,
- is_ip4_0);
+ vxlan_gpe_encap_two_inline (ngm, b[0], b[1], t0, t1, &next0,
+ &next1, is_ip4_0);
}
else
{
- vxlan_gpe_encap_one_inline (ngm, b0, t0, &next0, is_ip4_0);
- vxlan_gpe_encap_one_inline (ngm, b1, t1, &next1, is_ip4_1);
+ vxlan_gpe_encap_one_inline (ngm, b[0], t0, &next0, is_ip4_0);
+ vxlan_gpe_encap_one_inline (ngm, b[1], t1, &next1, is_ip4_1);
}
/* Reset to look up tunnel partner in the configured FIB */
- vnet_buffer (b0)->sw_if_index[VLIB_TX] = t0->encap_fib_index;
- vnet_buffer (b1)->sw_if_index[VLIB_TX] = t1->encap_fib_index;
- vnet_buffer (b0)->sw_if_index[VLIB_RX] = sw_if_index0;
- vnet_buffer (b1)->sw_if_index[VLIB_RX] = sw_if_index1;
+ vnet_buffer (b[0])->sw_if_index[VLIB_TX] = t0->encap_fib_index;
+ vnet_buffer (b[1])->sw_if_index[VLIB_TX] = t1->encap_fib_index;
+ vnet_buffer (b[0])->sw_if_index[VLIB_RX] = sw_if_index0;
+ vnet_buffer (b[1])->sw_if_index[VLIB_RX] = sw_if_index1;
pkts_encapsulated += 2;
- len0 = vlib_buffer_length_in_chain (vm, b0);
- len1 = vlib_buffer_length_in_chain (vm, b1);
+ len0 = vlib_buffer_length_in_chain (vm, b[0]);
+ len1 = vlib_buffer_length_in_chain (vm, b[1]);
stats_n_packets += 2;
stats_n_bytes += len0 + len1;
@@ -297,19 +292,20 @@
}
}
- if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
{
vxlan_gpe_encap_trace_t *tr =
- vlib_add_trace (vm, node, b0, sizeof (*tr));
+ vlib_add_trace (vm, node, b[0], sizeof (*tr));
tr->tunnel_index = t0 - ngm->tunnels;
}
- if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
+ if (PREDICT_FALSE (b[1]->flags & VLIB_BUFFER_IS_TRACED))
{
- vxlan_gpe_encap_trace_t *tr = vlib_add_trace (vm, node, b1,
+ vxlan_gpe_encap_trace_t *tr = vlib_add_trace (vm, node, b[1],
sizeof (*tr));
tr->tunnel_index = t1 - ngm->tunnels;
}
+ b += 2;
vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
n_left_to_next, bi0, bi1, next0,
@@ -319,7 +315,6 @@
while (n_left_from > 0 && n_left_to_next > 0)
{
u32 bi0;
- vlib_buffer_t *b0;
u32 next0 = VXLAN_GPE_ENCAP_NEXT_IP4_LOOKUP;
bi0 = from[0];
@@ -329,15 +324,13 @@
n_left_from -= 1;
n_left_to_next -= 1;
- b0 = vlib_get_buffer (vm, bi0);
-
/* get the flag "is_ip4" */
- if (sw_if_index0 != vnet_buffer (b0)->sw_if_index[VLIB_TX])
+ if (sw_if_index0 != vnet_buffer (b[0])->sw_if_index[VLIB_TX])
{
- sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+ sw_if_index0 = vnet_buffer (b[0])->sw_if_index[VLIB_TX];
hi0 =
vnet_get_sup_hw_interface (vnm,
- vnet_buffer (b0)->sw_if_index
+ vnet_buffer (b[0])->sw_if_index
[VLIB_TX]);
t0 = pool_elt_at_index (ngm->tunnels, hi0->dev_instance);
@@ -345,14 +338,14 @@
is_ip4_0 = (t0->flags & VXLAN_GPE_TUNNEL_IS_IPV4);
}
- vxlan_gpe_encap_one_inline (ngm, b0, t0, &next0, is_ip4_0);
+ vxlan_gpe_encap_one_inline (ngm, b[0], t0, &next0, is_ip4_0);
/* Reset to look up tunnel partner in the configured FIB */
- vnet_buffer (b0)->sw_if_index[VLIB_TX] = t0->encap_fib_index;
- vnet_buffer (b0)->sw_if_index[VLIB_RX] = sw_if_index0;
+ vnet_buffer (b[0])->sw_if_index[VLIB_TX] = t0->encap_fib_index;
+ vnet_buffer (b[0])->sw_if_index[VLIB_RX] = sw_if_index0;
pkts_encapsulated++;
- len0 = vlib_buffer_length_in_chain (vm, b0);
+ len0 = vlib_buffer_length_in_chain (vm, b[0]);
stats_n_packets += 1;
stats_n_bytes += len0;
@@ -375,12 +368,14 @@
stats_n_bytes = len0;
stats_sw_if_index = sw_if_index0;
}
- if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
{
- vxlan_gpe_encap_trace_t *tr = vlib_add_trace (vm, node, b0,
+ vxlan_gpe_encap_trace_t *tr = vlib_add_trace (vm, node, b[0],
sizeof (*tr));
tr->tunnel_index = t0 - ngm->tunnels;
}
+ b += 1;
+
vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
n_left_to_next, bi0, next0);
}