128-bit SIMD version of vlib_get_buffers

Change-Id: I1a28ddf535c80ecf4ba4bf31659ff2fead1d8a64
Signed-off-by: Damjan Marion <damarion@cisco.com>
diff --git a/src/vlib/buffer_funcs.h b/src/vlib/buffer_funcs.h
index 205eaa3..667063c 100644
--- a/src/vlib/buffer_funcs.h
+++ b/src/vlib/buffer_funcs.h
@@ -97,6 +97,14 @@
       u64x4 b0 = u32x4_extend_to_u64x4 (u32x4_load_unaligned (bi));
       /* shift and add to get vlib_buffer_t pointer */
       u64x4_store_unaligned ((b0 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b);
+#elif defined (CLIB_HAVE_VEC128) && defined (__x86_64__)
+      u64x2 off = u64x2_splat (buffer_main.buffer_mem_start + offset);
+      u32x4 bi4 = u32x4_load_unaligned (bi);
+      u64x2 b0 = u32x4_extend_to_u64x2 ((u32x4) bi4);
+      bi4 = u32x4_shuffle (bi4, 2, 3, 0, 1);
+      u64x2 b1 = u32x4_extend_to_u64x2 ((u32x4) bi4);
+      u64x2_store_unaligned ((b0 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b);
+      u64x2_store_unaligned ((b1 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b + 2);
 #else
       b[0] = ((u8 *) vlib_get_buffer (vm, bi[0])) + offset;
       b[1] = ((u8 *) vlib_get_buffer (vm, bi[1])) + offset;