add vlib_prefetch_buffer_data(...) macro

Change-Id: Iba750a41262cc028ad0363fff78cc219e4a33538
Signed-off-by: Damjan Marion <damarion@cisco.com>
diff --git a/src/vlib/buffer.h b/src/vlib/buffer.h
index 99f40e8..493d111 100644
--- a/src/vlib/buffer.h
+++ b/src/vlib/buffer.h
@@ -185,6 +185,8 @@
 */
 
 #define vlib_prefetch_buffer_header(b,type) CLIB_PREFETCH (b, 64, type)
+#define vlib_prefetch_buffer_data(b,type) \
+  CLIB_PREFETCH (vlib_buffer_get_current(b), CLIB_CACHE_LINE_BYTES, type)
 
 always_inline void
 vlib_buffer_struct_is_sane (vlib_buffer_t * b)
diff --git a/src/vnet/bonding/node.c b/src/vnet/bonding/node.c
index 2bc2dc6..d945069 100644
--- a/src/vnet/bonding/node.c
+++ b/src/vnet/bonding/node.c
@@ -202,14 +202,10 @@
       /* Prefetch next iteration */
       if (PREDICT_TRUE (n_left >= 16))
 	{
-	  CLIB_PREFETCH (vlib_buffer_get_current (b[8]),
-			 CLIB_CACHE_LINE_BYTES, LOAD);
-	  CLIB_PREFETCH (vlib_buffer_get_current (b[9]),
-			 CLIB_CACHE_LINE_BYTES, LOAD);
-	  CLIB_PREFETCH (vlib_buffer_get_current (b[10]),
-			 CLIB_CACHE_LINE_BYTES, LOAD);
-	  CLIB_PREFETCH (vlib_buffer_get_current (b[11]),
-			 CLIB_CACHE_LINE_BYTES, LOAD);
+	  vlib_prefetch_buffer_data (b[8], LOAD);
+	  vlib_prefetch_buffer_data (b[9], LOAD);
+	  vlib_prefetch_buffer_data (b[10], LOAD);
+	  vlib_prefetch_buffer_data (b[11], LOAD);
 
 	  vlib_prefetch_buffer_header (b[12], LOAD);
 	  vlib_prefetch_buffer_header (b[13], LOAD);
diff --git a/src/vnet/ip/ip4_input.c b/src/vnet/ip/ip4_input.c
index 1928a66..696c4b6 100644
--- a/src/vnet/ip/ip4_input.c
+++ b/src/vnet/ip/ip4_input.c
@@ -163,14 +163,10 @@
 	  vlib_prefetch_buffer_header (b[10], LOAD);
 	  vlib_prefetch_buffer_header (b[11], LOAD);
 
-	  CLIB_PREFETCH (vlib_buffer_get_current (b[4]),
-			 sizeof (ip4_header_t), LOAD);
-	  CLIB_PREFETCH (vlib_buffer_get_current (b[5]),
-			 sizeof (ip4_header_t), LOAD);
-	  CLIB_PREFETCH (vlib_buffer_get_current (b[6]),
-			 sizeof (ip4_header_t), LOAD);
-	  CLIB_PREFETCH (vlib_buffer_get_current (b[7]),
-			 sizeof (ip4_header_t), LOAD);
+	  vlib_prefetch_buffer_data (b[4], LOAD);
+	  vlib_prefetch_buffer_data (b[5], LOAD);
+	  vlib_prefetch_buffer_data (b[6], LOAD);
+	  vlib_prefetch_buffer_data (b[7], LOAD);
 	}
 
       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] = ~0;
diff --git a/src/vnet/l2/l2_rw.c b/src/vnet/l2/l2_rw.c
index 5b0034c..ac83483 100644
--- a/src/vnet/l2/l2_rw.c
+++ b/src/vnet/l2/l2_rw.c
@@ -164,7 +164,6 @@
   u32 n_left_from, *from, *to_next, next_index;
   vnet_classify_main_t *vcm = &vnet_classify_main;
   f64 now = vlib_time_now (vlib_get_main ());
-  u32 prefetch_size = 0;
 
   from = vlib_frame_vector_args (frame);
   n_left_from = frame->n_vectors;	/* number of packets to process */
@@ -177,7 +176,7 @@
       /* get space to enqueue frame to graph node "next_index" */
       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
 
-      while (n_left_from >= 4 && n_left_to_next >= 2)
+      while (n_left_from >= 6 && n_left_to_next >= 2)
 	{
 	  u32 bi0, next0, sw_if_index0, rwe_index0;
 	  u32 bi1, next1, sw_if_index1, rwe_index1;
@@ -190,14 +189,16 @@
 	  l2_rw_entry_t *rwe0, *rwe1;
 
 	  {
-	    vlib_buffer_t *p2, *p3;
+	    vlib_buffer_t *p2, *p3, *p4, *p5;
 	    p2 = vlib_get_buffer (vm, from[2]);
 	    p3 = vlib_get_buffer (vm, from[3]);
+	    p4 = vlib_get_buffer (vm, from[4]);
+	    p5 = vlib_get_buffer (vm, from[5]);
 
-	    vlib_prefetch_buffer_header (p2, LOAD);
-	    vlib_prefetch_buffer_header (p3, LOAD);
-	    CLIB_PREFETCH (vlib_buffer_get_current (p2), prefetch_size, LOAD);
-	    CLIB_PREFETCH (vlib_buffer_get_current (p3), prefetch_size, LOAD);
+	    vlib_prefetch_buffer_header (p4, LOAD);
+	    vlib_prefetch_buffer_header (p5, LOAD);
+	    vlib_prefetch_buffer_data (p2, LOAD);
+	    vlib_prefetch_buffer_data (p3, LOAD);
 	  }
 
 	  bi0 = from[0];
@@ -220,8 +221,6 @@
 	  config1 = l2_rw_get_config (sw_if_index1);	/*TODO: check sw_if_index0 value */
 	  t0 = pool_elt_at_index (vcm->tables, config0->table_index);
 	  t1 = pool_elt_at_index (vcm->tables, config1->table_index);
-	  prefetch_size =
-	    (t1->skip_n_vectors + t1->match_n_vectors) * sizeof (u32x4);
 
 	  hash0 = vnet_classify_hash_packet (t0, (u8 *) h0);
 	  hash1 = vnet_classify_hash_packet (t1, (u8 *) h1);