Simplify adjacency rewrite code
Using memcpy instead of complex specific copy logic. This simplify
the implementation and also improve perf slightly.
Also move adjacency data from tail to head of buffer, which improves
cache locality (header and data share the same cacheline)
Finally, fix VxLAN which used to workaround vnet_rewrite logic.
Change-Id: I770ddad9846f7ee505aa99ad417e6a61d5cbbefa
Signed-off-by: Benoît Ganne <bganne@cisco.com>
diff --git a/src/vnet/vxlan/encap.c b/src/vnet/vxlan/encap.c
index fe2374c..b797624 100644
--- a/src/vnet/vxlan/encap.c
+++ b/src/vnet/vxlan/encap.c
@@ -92,7 +92,6 @@
u8 const underlay_hdr_len = is_ip4 ?
sizeof(ip4_vxlan_header_t) : sizeof(ip6_vxlan_header_t);
- u8 const rw_hdr_offset = sizeof t0->rewrite_data - underlay_hdr_len;
u16 const l3_len = is_ip4 ? sizeof(ip4_header_t) : sizeof(ip6_header_t);
u32 const csum_flags = is_ip4 ?
VNET_BUFFER_F_OFFLOAD_IP_CKSUM | VNET_BUFFER_F_IS_IP4 |
@@ -173,6 +172,7 @@
ASSERT(t0->rewrite_header.data_bytes == underlay_hdr_len);
ASSERT(t1->rewrite_header.data_bytes == underlay_hdr_len);
+ vnet_rewrite_two_headers(*t0, *t1, vlib_buffer_get_current(b0), vlib_buffer_get_current(b1), underlay_hdr_len);
vlib_buffer_advance (b0, -underlay_hdr_len);
vlib_buffer_advance (b1, -underlay_hdr_len);
@@ -185,12 +185,6 @@
void * underlay0 = vlib_buffer_get_current(b0);
void * underlay1 = vlib_buffer_get_current(b1);
- /* vnet_rewrite_two_header writes only in (uword) 8 bytes chunks
- * and discards the first 4 bytes of the (36 bytes ip4 underlay) rewrite
- * use memcpy as a workaround */
- clib_memcpy_fast(underlay0, t0->rewrite_header.data + rw_hdr_offset, underlay_hdr_len);
- clib_memcpy_fast(underlay1, t1->rewrite_header.data + rw_hdr_offset, underlay_hdr_len);
-
ip4_header_t * ip4_0, * ip4_1;
qos_bits_t ip4_0_tos = 0, ip4_1_tos = 0;
ip6_header_t * ip6_0, * ip6_1;
@@ -354,15 +348,11 @@
vnet_buffer(b0)->ip.adj_index[VLIB_TX] = dpoi_idx0;
ASSERT(t0->rewrite_header.data_bytes == underlay_hdr_len);
+ vnet_rewrite_one_header(*t0, vlib_buffer_get_current(b0), underlay_hdr_len);
vlib_buffer_advance (b0, -underlay_hdr_len);
void * underlay0 = vlib_buffer_get_current(b0);
- /* vnet_rewrite_one_header writes only in (uword) 8 bytes chunks
- * and discards the first 4 bytes of the (36 bytes ip4 underlay) rewrite
- * use memcpy as a workaround */
- clib_memcpy_fast(underlay0, t0->rewrite_header.data + rw_hdr_offset, underlay_hdr_len);
-
u32 len0 = vlib_buffer_length_in_chain (vm, b0);
u16 payload_l0 = clib_host_to_net_u16 (len0 - l3_len);