Convert GRE nodes to new buffer APIs and multiarch

Change-Id: I3b3c8333287bb704ac7b0bbc81b3dbb059e8d2ac
Signed-off-by: Benoît Ganne <bganne@cisco.com>
diff --git a/src/vnet/gre/gre.c b/src/vnet/gre/gre.c
index 028eefb..72c76fc 100644
--- a/src/vnet/gre/gre.c
+++ b/src/vnet/gre/gre.c
@@ -19,6 +19,8 @@
 #include <vnet/gre/gre.h>
 #include <vnet/adj/adj_midchain.h>
 
+extern gre_main_t gre_main;
+
 #ifndef CLIB_MARCH_VARIANT
 gre_main_t gre_main;
 
@@ -56,7 +58,10 @@
   ip46_address_t dst;
 } gre_tx_trace_t;
 
-static u8 *
+extern u8 *format_gre_tx_trace (u8 * s, va_list * args);
+
+#ifndef CLIB_MARCH_VARIANT
+u8 *
 format_gre_tx_trace (u8 * s, va_list * args)
 {
   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
@@ -70,7 +75,6 @@
   return s;
 }
 
-#ifndef CLIB_MARCH_VARIANT
 u8 *
 format_gre_protocol (u8 * s, va_list * args)
 {
@@ -324,7 +328,6 @@
 }
 #endif /* CLIB_MARCH_VARIANT */
 
-
 typedef enum
 {
   GRE_ENCAP_NEXT_L2_MIDCHAIN,
@@ -335,173 +338,145 @@
  * @brief TX function. Only called for L2 payload including TEB or ERSPAN.
  *        L3 traffic uses the adj-midchains.
  */
-VLIB_NODE_FN (gre_encap_node) (vlib_main_t * vm,
-			       vlib_node_runtime_t * node,
+VLIB_NODE_FN (gre_encap_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
 			       vlib_frame_t * frame)
 {
   gre_main_t *gm = &gre_main;
-  vnet_main_t *vnm = gm->vnet_main;
-  u32 next_index;
-  u32 *from, *to_next, n_left_from, n_left_to_next;
-  u32 sw_if_index0 = ~0;
-  u32 sw_if_index1 = ~0;
-  adj_index_t adj_index0 = ADJ_INDEX_INVALID;
-  adj_index_t adj_index1 = ADJ_INDEX_INVALID;
-  gre_tunnel_t *gt0 = NULL;
-  gre_tunnel_t *gt1 = NULL;
+  u32 *from, n_left_from;
+  vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
+  u32 sw_if_index[2] = { ~0, ~0 };
+  const gre_tunnel_t *gt[2] = { 0 };
+  adj_index_t adj_index[2] = { ADJ_INDEX_INVALID, ADJ_INDEX_INVALID };
 
-  /* Vector of buffer / pkt indices we're supposed to process */
   from = vlib_frame_vector_args (frame);
-
-  /* Number of buffers / pkts */
   n_left_from = frame->n_vectors;
+  vlib_get_buffers (vm, from, bufs, n_left_from);
 
-  /* Speculatively send the first buffer to the last disposition we used */
-  next_index = GRE_ENCAP_NEXT_L2_MIDCHAIN;
-
-  while (n_left_from > 0)
+  while (n_left_from >= 2)
     {
-      /* set up to enqueue to our disposition with index = next_index */
-      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
 
-      while (n_left_from >= 4 && n_left_to_next >= 2)
+      if (PREDICT_FALSE
+	  (sw_if_index[0] != vnet_buffer (b[0])->sw_if_index[VLIB_TX]))
 	{
-	  u32 bi0 = from[0];
-	  u32 bi1 = from[1];
-	  vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
-	  vlib_buffer_t *b1 = vlib_get_buffer (vm, bi1);
-
-	  to_next[0] = bi0;
-	  to_next[1] = bi1;
-	  from += 2;
-	  to_next += 2;
-	  n_left_to_next -= 2;
-	  n_left_from -= 2;
-
-	  if (sw_if_index0 != vnet_buffer (b0)->sw_if_index[VLIB_TX])
-	    {
-	      sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
-	      vnet_hw_interface_t *hi0 =
-		vnet_get_sup_hw_interface (vnm, sw_if_index0);
-	      gt0 = &gm->tunnels[hi0->dev_instance];
-	      adj_index0 = gt0->l2_adj_index;
-	    }
-
-	  if (sw_if_index1 != vnet_buffer (b1)->sw_if_index[VLIB_TX])
-	    {
-	      if (sw_if_index0 == vnet_buffer (b1)->sw_if_index[VLIB_TX])
-		{
-		  sw_if_index1 = sw_if_index0;
-		  gt1 = gt0;
-		  adj_index1 = adj_index0;
-		}
-	      else
-		{
-		  sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_TX];
-		  vnet_hw_interface_t *hi1 =
-		    vnet_get_sup_hw_interface (vnm, sw_if_index1);
-		  gt1 = &gm->tunnels[hi1->dev_instance];
-		  adj_index1 = gt1->l2_adj_index;
-		}
-	    }
-
-	  vnet_buffer (b0)->ip.adj_index[VLIB_TX] = adj_index0;
-	  vnet_buffer (b1)->ip.adj_index[VLIB_TX] = adj_index1;
-
-	  if (PREDICT_FALSE (gt0->type == GRE_TUNNEL_TYPE_ERSPAN))
-	    {
-	      /* Encap GRE seq# and ERSPAN type II header */
-	      vlib_buffer_advance (b0, -sizeof (erspan_t2_t));
-	      erspan_t2_t *h0 = vlib_buffer_get_current (b0);
-	      u32 seq_num = clib_atomic_fetch_add (&gt0->gre_sn->seq_num, 1);
-	      u64 hdr = clib_host_to_net_u64 (ERSPAN_HDR2);
-	      h0->seq_num = clib_host_to_net_u32 (seq_num);
-	      h0->t2_u64 = hdr;
-	      h0->t2.cos_en_t_session |=
-		clib_host_to_net_u16 (gt0->session_id);
-	    }
-	  if (PREDICT_FALSE (gt1->type == GRE_TUNNEL_TYPE_ERSPAN))
-	    {
-	      /* Encap GRE seq# and ERSPAN type II header */
-	      vlib_buffer_advance (b1, -sizeof (erspan_t2_t));
-	      erspan_t2_t *h1 = vlib_buffer_get_current (b1);
-	      u32 seq_num = clib_atomic_fetch_add (&gt1->gre_sn->seq_num, 1);
-	      u64 hdr = clib_host_to_net_u64 (ERSPAN_HDR2);
-	      h1->seq_num = clib_host_to_net_u32 (seq_num);
-	      h1->t2_u64 = hdr;
-	      h1->t2.cos_en_t_session |=
-		clib_host_to_net_u16 (gt1->session_id);
-	    }
-
-	  if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
-	    {
-	      gre_tx_trace_t *tr0 = vlib_add_trace (vm, node,
-						    b0, sizeof (*tr0));
-	      tr0->tunnel_id = gt0 - gm->tunnels;
-	      tr0->src = gt0->tunnel_src;
-	      tr0->dst = gt0->tunnel_dst.fp_addr;
-	      tr0->length = vlib_buffer_length_in_chain (vm, b0);
-	    }
-	  if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
-	    {
-	      gre_tx_trace_t *tr1 = vlib_add_trace (vm, node,
-						    b1, sizeof (*tr1));
-	      tr1->tunnel_id = gt1 - gm->tunnels;
-	      tr1->src = gt1->tunnel_src;
-	      tr1->dst = gt1->tunnel_dst.fp_addr;
-	      tr1->length = vlib_buffer_length_in_chain (vm, b1);
-	    }
+	  const vnet_hw_interface_t *hi;
+	  sw_if_index[0] = vnet_buffer (b[0])->sw_if_index[VLIB_TX];
+	  hi = vnet_get_sup_hw_interface (gm->vnet_main, sw_if_index[0]);
+	  gt[0] = &gm->tunnels[hi->dev_instance];
+	  adj_index[0] = gt[0]->l2_adj_index;
+	}
+      if (PREDICT_FALSE
+	  (sw_if_index[1] != vnet_buffer (b[1])->sw_if_index[VLIB_TX]))
+	{
+	  const vnet_hw_interface_t *hi;
+	  sw_if_index[1] = vnet_buffer (b[1])->sw_if_index[VLIB_TX];
+	  hi = vnet_get_sup_hw_interface (gm->vnet_main, sw_if_index[1]);
+	  gt[1] = &gm->tunnels[hi->dev_instance];
+	  adj_index[1] = gt[1]->l2_adj_index;
 	}
 
-      while (n_left_from > 0 && n_left_to_next > 0)
+      vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = adj_index[0];
+      vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = adj_index[1];
+
+      if (PREDICT_FALSE (gt[0]->type == GRE_TUNNEL_TYPE_ERSPAN))
 	{
-	  u32 bi0 = from[0];
-	  vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
-
-	  to_next[0] = bi0;
-	  from += 1;
-	  to_next += 1;
-	  n_left_from -= 1;
-	  n_left_to_next -= 1;
-
-	  if (sw_if_index0 != vnet_buffer (b0)->sw_if_index[VLIB_TX])
-	    {
-	      sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
-	      vnet_hw_interface_t *hi0 =
-		vnet_get_sup_hw_interface (vnm, sw_if_index0);
-	      gt0 = &gm->tunnels[hi0->dev_instance];
-	      adj_index0 = gt0->l2_adj_index;
-	    }
-
-	  vnet_buffer (b0)->ip.adj_index[VLIB_TX] = adj_index0;
-
-	  if (PREDICT_FALSE (gt0->type == GRE_TUNNEL_TYPE_ERSPAN))
-	    {
-	      /* Encap GRE seq# and ERSPAN type II header */
-	      vlib_buffer_advance (b0, -sizeof (erspan_t2_t));
-	      erspan_t2_t *h0 = vlib_buffer_get_current (b0);
-	      u32 seq_num = clib_atomic_fetch_add (&gt0->gre_sn->seq_num, 1);
-	      u64 hdr = clib_host_to_net_u64 (ERSPAN_HDR2);
-	      h0->seq_num = clib_host_to_net_u32 (seq_num);
-	      h0->t2_u64 = hdr;
-	      h0->t2.cos_en_t_session |=
-		clib_host_to_net_u16 (gt0->session_id);
-	    }
-
-	  if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
-	    {
-	      gre_tx_trace_t *tr = vlib_add_trace (vm, node,
-						   b0, sizeof (*tr));
-	      tr->tunnel_id = gt0 - gm->tunnels;
-	      tr->src = gt0->tunnel_src;
-	      tr->dst = gt0->tunnel_dst.fp_addr;
-	      tr->length = vlib_buffer_length_in_chain (vm, b0);
-	    }
+	  /* Encap GRE seq# and ERSPAN type II header */
+	  erspan_t2_t *h0;
+	  u32 seq_num;
+	  u64 hdr;
+	  vlib_buffer_advance (b[0], -sizeof (erspan_t2_t));
+	  h0 = vlib_buffer_get_current (b[0]);
+	  seq_num = clib_atomic_fetch_add (&gt[0]->gre_sn->seq_num, 1);
+	  hdr = clib_host_to_net_u64 (ERSPAN_HDR2);
+	  h0->seq_num = clib_host_to_net_u32 (seq_num);
+	  h0->t2_u64 = hdr;
+	  h0->t2.cos_en_t_session |= clib_host_to_net_u16 (gt[0]->session_id);
+	}
+      if (PREDICT_FALSE (gt[1]->type == GRE_TUNNEL_TYPE_ERSPAN))
+	{
+	  /* Encap GRE seq# and ERSPAN type II header */
+	  erspan_t2_t *h0;
+	  u32 seq_num;
+	  u64 hdr;
+	  vlib_buffer_advance (b[1], -sizeof (erspan_t2_t));
+	  h0 = vlib_buffer_get_current (b[1]);
+	  seq_num = clib_atomic_fetch_add (&gt[1]->gre_sn->seq_num, 1);
+	  hdr = clib_host_to_net_u64 (ERSPAN_HDR2);
+	  h0->seq_num = clib_host_to_net_u32 (seq_num);
+	  h0->t2_u64 = hdr;
+	  h0->t2.cos_en_t_session |= clib_host_to_net_u16 (gt[1]->session_id);
 	}
 
-      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+      if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
+	{
+	  gre_tx_trace_t *tr = vlib_add_trace (vm, node,
+					       b[0], sizeof (*tr));
+	  tr->tunnel_id = gt[0] - gm->tunnels;
+	  tr->src = gt[0]->tunnel_src;
+	  tr->dst = gt[0]->tunnel_dst.fp_addr;
+	  tr->length = vlib_buffer_length_in_chain (vm, b[0]);
+	}
+      if (PREDICT_FALSE (b[1]->flags & VLIB_BUFFER_IS_TRACED))
+	{
+	  gre_tx_trace_t *tr = vlib_add_trace (vm, node,
+					       b[1], sizeof (*tr));
+	  tr->tunnel_id = gt[1] - gm->tunnels;
+	  tr->src = gt[1]->tunnel_src;
+	  tr->dst = gt[1]->tunnel_dst.fp_addr;
+	  tr->length = vlib_buffer_length_in_chain (vm, b[1]);
+	}
+
+      b += 2;
+      n_left_from -= 2;
     }
 
+  while (n_left_from >= 1)
+    {
+
+      if (PREDICT_FALSE
+	  (sw_if_index[0] != vnet_buffer (b[0])->sw_if_index[VLIB_TX]))
+	{
+	  const vnet_hw_interface_t *hi;
+	  sw_if_index[0] = vnet_buffer (b[0])->sw_if_index[VLIB_TX];
+	  hi = vnet_get_sup_hw_interface (gm->vnet_main, sw_if_index[0]);
+	  gt[0] = &gm->tunnels[hi->dev_instance];
+	  adj_index[0] = gt[0]->l2_adj_index;
+	}
+
+      vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = adj_index[0];
+
+      if (PREDICT_FALSE (gt[0]->type == GRE_TUNNEL_TYPE_ERSPAN))
+	{
+	  /* Encap GRE seq# and ERSPAN type II header */
+	  erspan_t2_t *h0;
+	  u32 seq_num;
+	  u64 hdr;
+	  vlib_buffer_advance (b[0], -sizeof (erspan_t2_t));
+	  h0 = vlib_buffer_get_current (b[0]);
+	  seq_num = clib_atomic_fetch_add (&gt[0]->gre_sn->seq_num, 1);
+	  hdr = clib_host_to_net_u64 (ERSPAN_HDR2);
+	  h0->seq_num = clib_host_to_net_u32 (seq_num);
+	  h0->t2_u64 = hdr;
+	  h0->t2.cos_en_t_session |= clib_host_to_net_u16 (gt[0]->session_id);
+	}
+
+      if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
+	{
+	  gre_tx_trace_t *tr = vlib_add_trace (vm, node,
+					       b[0], sizeof (*tr));
+	  tr->tunnel_id = gt[0] - gm->tunnels;
+	  tr->src = gt[0]->tunnel_src;
+	  tr->dst = gt[0]->tunnel_dst.fp_addr;
+	  tr->length = vlib_buffer_length_in_chain (vm, b[0]);
+	}
+
+      b += 1;
+      n_left_from -= 1;
+    }
+
+  vlib_buffer_enqueue_to_single_next (vm, node, from,
+				      GRE_ENCAP_NEXT_L2_MIDCHAIN,
+				      frame->n_vectors);
+
   vlib_node_increment_counter (vm, node->node_index,
 			       GRE_ERROR_PKTS_ENCAP, frame->n_vectors);
 
@@ -530,6 +505,7 @@
 };
 /* *INDENT-ON* */
 
+#ifndef CLIB_MARCH_VARIANT
 static u8 *
 format_gre_tunnel_name (u8 * s, va_list * args)
 {
@@ -566,7 +542,6 @@
 #endif
 };
 
-#ifndef CLIB_MARCH_VARIANT
 VNET_HW_INTERFACE_CLASS (gre_hw_interface_class) = {
   .name = "GRE",
   .format_header = format_gre_header_with_length,
@@ -576,6 +551,7 @@
   .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
 };
 /* *INDENT-ON* */
+#endif /* CLIB_MARCH_VARIANT */
 
 static void
 add_protocol (gre_main_t * gm, gre_protocol_t protocol, char *protocol_name)
@@ -637,8 +613,6 @@
 
 VLIB_INIT_FUNCTION (gre_init);
 
-#endif /* CLIB_MARCH_VARIANT */
-
 /*
  * fd.io coding-style-patch-verification: ON
  *
diff --git a/src/vnet/gre/node.c b/src/vnet/gre/node.c
index ad3c8c6..ff74d1b 100644
--- a/src/vnet/gre/node.c
+++ b/src/vnet/gre/node.c
@@ -43,10 +43,12 @@
   u32 length;
   ip46_address_t src;
   ip46_address_t dst;
-  u8 is_ipv6;
 } gre_rx_trace_t;
 
-static u8 *
+extern u8 *format_gre_rx_trace (u8 * s, va_list * args);
+
+#ifndef CLIB_MARCH_VARIANT
+u8 *
 format_gre_rx_trace (u8 * s, va_list * args)
 {
   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
@@ -59,6 +61,7 @@
 	      format_ip46_address, &t->dst, IP46_TYPE_ANY);
   return s;
 }
+#endif /* CLIB_MARCH_VARIANT */
 
 typedef struct
 {
@@ -67,488 +70,363 @@
   u16 *next_by_protocol;
 } gre_input_runtime_t;
 
+always_inline void
+gre_trace (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_buffer_t * b,
+	   u32 tun_sw_if_index, const ip6_header_t * ip6,
+	   const ip4_header_t * ip4, int is_ipv6)
+{
+  gre_rx_trace_t *tr = vlib_add_trace (vm, node,
+				       b, sizeof (*tr));
+  tr->tunnel_id = tun_sw_if_index;
+  if (is_ipv6)
+    {
+      tr->length = ip6->payload_length;
+      tr->src.ip6.as_u64[0] = ip6->src_address.as_u64[0];
+      tr->src.ip6.as_u64[1] = ip6->src_address.as_u64[1];
+      tr->dst.ip6.as_u64[0] = ip6->dst_address.as_u64[0];
+      tr->dst.ip6.as_u64[1] = ip6->dst_address.as_u64[1];
+    }
+  else
+    {
+      tr->length = ip4->length;
+      tr->src.as_u64[0] = tr->src.as_u64[1] = 0;
+      tr->dst.as_u64[0] = tr->dst.as_u64[1] = 0;
+      tr->src.ip4.as_u32 = ip4->src_address.as_u32;
+      tr->dst.ip4.as_u32 = ip4->dst_address.as_u32;
+    }
+}
+
+always_inline void
+gre_tunnel_get (const gre_main_t * gm, vlib_node_runtime_t * node,
+		vlib_buffer_t * b, u16 * next, const gre_tunnel_key_t * key,
+		gre_tunnel_key_t * cached_key, u32 * tun_sw_if_index,
+		u32 * cached_tun_sw_if_index, int is_ipv6)
+{
+  const uword *p;
+  p = is_ipv6 ? hash_get_mem (gm->tunnel_by_key6, &key->gtk_v6)
+    : hash_get_mem (gm->tunnel_by_key4, &key->gtk_v4);
+  if (PREDICT_FALSE (!p))
+    {
+      *next = GRE_INPUT_NEXT_DROP;
+      b->error = node->errors[GRE_ERROR_NO_SUCH_TUNNEL];
+      *tun_sw_if_index = ~0;
+    }
+  else
+    {
+      const gre_tunnel_t *tun;
+      tun = pool_elt_at_index (gm->tunnels, *p);
+      *cached_tun_sw_if_index = *tun_sw_if_index = tun->sw_if_index;
+      if (is_ipv6)
+	cached_key->gtk_v6 = key->gtk_v6;
+      else
+	cached_key->gtk_v4 = key->gtk_v4;
+    }
+}
+
 always_inline uword
 gre_input (vlib_main_t * vm,
-	   vlib_node_runtime_t * node, vlib_frame_t * from_frame, u8 is_ipv6)
+	   vlib_node_runtime_t * node, vlib_frame_t * frame,
+	   const int is_ipv6)
 {
   gre_main_t *gm = &gre_main;
-  __attribute__ ((unused)) u32 n_left_from, next_index, *from, *to_next;
-  gre_tunnel_key_t cached_tunnel_key;
+  u32 *from, n_left_from;
+  vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
+  u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
+  u16 cached_protocol = ~0;
+  u32 cached_next_index = SPARSE_VEC_INVALID_INDEX;
+  u32 cached_tun_sw_if_index = ~0;
+  gre_tunnel_key_t cached_key;
 
-  u32 cached_tunnel_sw_if_index = ~0, tunnel_sw_if_index = ~0;
+  from = vlib_frame_vector_args (frame);
+  n_left_from = frame->n_vectors;
+  vlib_get_buffers (vm, from, bufs, n_left_from);
 
-  u32 thread_index = vm->thread_index;
-  u32 len;
-  vnet_interface_main_t *im = &gm->vnet_main->interface_main;
-
-  if (!is_ipv6)
-    clib_memset (&cached_tunnel_key.gtk_v4, 0xff,
-		 sizeof (cached_tunnel_key.gtk_v4));
+  if (is_ipv6)
+    clib_memset (&cached_key.gtk_v6, 0xff, sizeof (cached_key.gtk_v6));
   else
-    clib_memset (&cached_tunnel_key.gtk_v6, 0xff,
-		 sizeof (cached_tunnel_key.gtk_v6));
+    clib_memset (&cached_key.gtk_v4, 0xff, sizeof (cached_key.gtk_v4));
 
-  from = vlib_frame_vector_args (from_frame);
-  n_left_from = from_frame->n_vectors;
-
-  next_index = node->cached_next_index;
-
-  while (n_left_from > 0)
+  while (n_left_from >= 2)
     {
-      u32 n_left_to_next;
+      const ip6_header_t *ip6[2];
+      const ip4_header_t *ip4[2];
+      const gre_header_t *gre[2];
+      u32 nidx[2];
+      next_info_t ni[2];
+      u8 type[2];
+      u16 version[2];
+      u32 len[2];
+      gre_tunnel_key_t key[2];
+      u8 matched[2];
+      u32 tun_sw_if_index[2];
 
-      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
-      while (n_left_from >= 4 && n_left_to_next >= 2)
+      if (PREDICT_TRUE (n_left_from >= 6))
 	{
-	  u32 bi0, bi1;
-	  vlib_buffer_t *b0, *b1;
-	  gre_header_t *h0, *h1;
-	  u16 version0, version1;
-	  int verr0, verr1;
-	  u32 i0, i1, next0, next1, protocol0, protocol1;
-	  ip4_header_t *ip4_0, *ip4_1;
-	  ip6_header_t *ip6_0, *ip6_1;
-	  gre_tunnel_key_t key0, key1;
-
-	  /* Prefetch next iteration. */
-	  {
-	    vlib_buffer_t *p2, *p3;
-
-	    p2 = vlib_get_buffer (vm, from[2]);
-	    p3 = vlib_get_buffer (vm, from[3]);
-
-	    vlib_prefetch_buffer_header (p2, LOAD);
-	    vlib_prefetch_buffer_header (p3, LOAD);
-
-	    CLIB_PREFETCH (p2->data, sizeof (h0[0]), LOAD);
-	    CLIB_PREFETCH (p3->data, sizeof (h1[0]), LOAD);
-	  }
-
-	  bi0 = from[0];
-	  bi1 = from[1];
-	  to_next[0] = bi0;
-	  to_next[1] = bi1;
-	  from += 2;
-	  to_next += 2;
-	  n_left_to_next -= 2;
-	  n_left_from -= 2;
-
-	  b0 = vlib_get_buffer (vm, bi0);
-	  b1 = vlib_get_buffer (vm, bi1);
-
-	  if (!is_ipv6)
-	    {
-	      /* ip4_local hands us the ip header, not the gre header */
-	      ip4_0 = vlib_buffer_get_current (b0);
-	      ip4_1 = vlib_buffer_get_current (b1);
-
-	      vlib_buffer_advance (b0, sizeof (*ip4_0));
-	      vlib_buffer_advance (b1, sizeof (*ip4_1));
-	    }
-	  else
-	    {
-	      /* ip6_local hands us the ip header, not the gre header */
-	      ip6_0 = vlib_buffer_get_current (b0);
-	      ip6_1 = vlib_buffer_get_current (b1);
-
-	      vlib_buffer_advance (b0, sizeof (*ip6_0));
-	      vlib_buffer_advance (b1, sizeof (*ip6_1));
-	    }
-
-	  h0 = vlib_buffer_get_current (b0);
-	  h1 = vlib_buffer_get_current (b1);
-
-	  /* Index sparse array with network byte order. */
-	  protocol0 = h0->protocol;
-	  protocol1 = h1->protocol;
-	  sparse_vec_index2 (gm->next_by_protocol, protocol0, protocol1,
-			     &i0, &i1);
-	  next0 = vec_elt (gm->next_by_protocol, i0).next_index;
-	  next1 = vec_elt (gm->next_by_protocol, i1).next_index;
-	  u8 ttype0 = vec_elt (gm->next_by_protocol, i0).tunnel_type;
-	  u8 ttype1 = vec_elt (gm->next_by_protocol, i1).tunnel_type;
-
-	  b0->error =
-	    node->errors[i0 ==
-			 SPARSE_VEC_INVALID_INDEX ? GRE_ERROR_UNKNOWN_PROTOCOL
-			 : GRE_ERROR_NONE];
-	  b1->error =
-	    node->errors[i1 ==
-			 SPARSE_VEC_INVALID_INDEX ? GRE_ERROR_UNKNOWN_PROTOCOL
-			 : GRE_ERROR_NONE];
-
-	  version0 = clib_net_to_host_u16 (h0->flags_and_version);
-	  verr0 = version0 & GRE_VERSION_MASK;
-	  version1 = clib_net_to_host_u16 (h1->flags_and_version);
-	  verr1 = version1 & GRE_VERSION_MASK;
-
-	  b0->error = verr0 ? node->errors[GRE_ERROR_UNSUPPORTED_VERSION]
-	    : b0->error;
-	  next0 = verr0 ? GRE_INPUT_NEXT_DROP : next0;
-	  b1->error = verr1 ? node->errors[GRE_ERROR_UNSUPPORTED_VERSION]
-	    : b1->error;
-	  next1 = verr1 ? GRE_INPUT_NEXT_DROP : next1;
-
-
-	  /* RPF check for ip4/ip6 input */
-	  if (PREDICT_TRUE (next0 > GRE_INPUT_NEXT_DROP))
-	    {
-	      if (is_ipv6)
-		{
-		  gre_mk_key6 (&ip6_0->dst_address,
-			       &ip6_0->src_address,
-			       vnet_buffer (b0)->ip.fib_index,
-			       ttype0, 0, &key0.gtk_v6);
-		}
-	      else
-		{
-		  gre_mk_key4 (ip4_0->dst_address,
-			       ip4_0->src_address,
-			       vnet_buffer (b0)->ip.fib_index,
-			       ttype0, 0, &key0.gtk_v4);
-		}
-
-	      if ((!is_ipv6 && !gre_match_key4 (&cached_tunnel_key.gtk_v4,
-						&key0.gtk_v4)) ||
-		  (is_ipv6 && !gre_match_key6 (&cached_tunnel_key.gtk_v6,
-					       &key0.gtk_v6)))
-		{
-		  gre_tunnel_t *t;
-		  uword *p;
-
-		  if (!is_ipv6)
-		    {
-		      p = hash_get_mem (gm->tunnel_by_key4, &key0.gtk_v4);
-		    }
-		  else
-		    {
-		      p = hash_get_mem (gm->tunnel_by_key6, &key0.gtk_v6);
-		    }
-		  if (!p)
-		    {
-		      next0 = GRE_INPUT_NEXT_DROP;
-		      b0->error = node->errors[GRE_ERROR_NO_SUCH_TUNNEL];
-		      goto drop0;
-		    }
-		  t = pool_elt_at_index (gm->tunnels, p[0]);
-		  tunnel_sw_if_index = t->sw_if_index;
-
-		  cached_tunnel_sw_if_index = tunnel_sw_if_index;
-		  if (!is_ipv6)
-		    {
-		      cached_tunnel_key.gtk_v4 = key0.gtk_v4;
-		    }
-		  else
-		    {
-		      cached_tunnel_key.gtk_v6 = key0.gtk_v6;
-		    }
-		}
-	      else
-		{
-		  tunnel_sw_if_index = cached_tunnel_sw_if_index;
-		}
-	    }
-	  else
-	    {
-	      next0 = GRE_INPUT_NEXT_DROP;
-	      goto drop0;
-	    }
-	  len = vlib_buffer_length_in_chain (vm, b0);
-	  vlib_increment_combined_counter (im->combined_sw_if_counters
-					   + VNET_INTERFACE_COUNTER_RX,
-					   thread_index,
-					   tunnel_sw_if_index,
-					   1 /* packets */ ,
-					   len /* bytes */ );
-
-	  vnet_buffer (b0)->sw_if_index[VLIB_RX] = tunnel_sw_if_index;
-
-	drop0:
-	  if (PREDICT_TRUE (next1 > GRE_INPUT_NEXT_DROP))
-	    {
-	      if (is_ipv6)
-		{
-		  gre_mk_key6 (&ip6_1->dst_address,
-			       &ip6_1->src_address,
-			       vnet_buffer (b1)->ip.fib_index,
-			       ttype1, 0, &key1.gtk_v6);
-		}
-	      else
-		{
-		  gre_mk_key4 (ip4_1->dst_address,
-			       ip4_1->src_address,
-			       vnet_buffer (b1)->ip.fib_index,
-			       ttype1, 0, &key1.gtk_v4);
-		}
-
-	      if ((!is_ipv6 && !gre_match_key4 (&cached_tunnel_key.gtk_v4,
-						&key1.gtk_v4)) ||
-		  (is_ipv6 && !gre_match_key6 (&cached_tunnel_key.gtk_v6,
-					       &key1.gtk_v6)))
-		{
-		  gre_tunnel_t *t;
-		  uword *p;
-
-		  if (!is_ipv6)
-		    {
-		      p = hash_get_mem (gm->tunnel_by_key4, &key1.gtk_v4);
-		    }
-		  else
-		    {
-		      p = hash_get_mem (gm->tunnel_by_key6, &key1.gtk_v6);
-		    }
-		  if (!p)
-		    {
-		      next1 = GRE_INPUT_NEXT_DROP;
-		      b1->error = node->errors[GRE_ERROR_NO_SUCH_TUNNEL];
-		      goto drop1;
-		    }
-		  t = pool_elt_at_index (gm->tunnels, p[0]);
-		  tunnel_sw_if_index = t->sw_if_index;
-
-		  cached_tunnel_sw_if_index = tunnel_sw_if_index;
-		  if (!is_ipv6)
-		    {
-		      cached_tunnel_key.gtk_v4 = key1.gtk_v4;
-		    }
-		  else
-		    {
-		      cached_tunnel_key.gtk_v6 = key1.gtk_v6;
-		    }
-		}
-	      else
-		{
-		  tunnel_sw_if_index = cached_tunnel_sw_if_index;
-		}
-	    }
-	  else
-	    {
-	      next1 = GRE_INPUT_NEXT_DROP;
-	      goto drop1;
-	    }
-	  len = vlib_buffer_length_in_chain (vm, b1);
-	  vlib_increment_combined_counter (im->combined_sw_if_counters
-					   + VNET_INTERFACE_COUNTER_RX,
-					   thread_index,
-					   tunnel_sw_if_index,
-					   1 /* packets */ ,
-					   len /* bytes */ );
-
-	  vnet_buffer (b1)->sw_if_index[VLIB_RX] = tunnel_sw_if_index;
-
-	drop1:
-	  if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
-	    {
-	      gre_rx_trace_t *tr = vlib_add_trace (vm, node,
-						   b0, sizeof (*tr));
-	      tr->tunnel_id = tunnel_sw_if_index;
-	      if (!is_ipv6)
-		{
-		  tr->length = ip4_0->length;
-		  tr->src.ip4.as_u32 = ip4_0->src_address.as_u32;
-		  tr->dst.ip4.as_u32 = ip4_0->dst_address.as_u32;
-		}
-	      else
-		{
-		  tr->length = ip6_0->payload_length;
-		  tr->src.ip6.as_u64[0] = ip6_0->src_address.as_u64[0];
-		  tr->src.ip6.as_u64[1] = ip6_0->src_address.as_u64[1];
-		  tr->dst.ip6.as_u64[0] = ip6_0->dst_address.as_u64[0];
-		  tr->dst.ip6.as_u64[1] = ip6_0->dst_address.as_u64[1];
-		}
-	    }
-
-	  if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
-	    {
-	      gre_rx_trace_t *tr = vlib_add_trace (vm, node,
-						   b1, sizeof (*tr));
-	      tr->tunnel_id = tunnel_sw_if_index;
-	      if (!is_ipv6)
-		{
-		  tr->length = ip4_1->length;
-		  tr->src.ip4.as_u32 = ip4_1->src_address.as_u32;
-		  tr->dst.ip4.as_u32 = ip4_1->dst_address.as_u32;
-		}
-	      else
-		{
-		  tr->length = ip6_1->payload_length;
-		  tr->src.ip6.as_u64[0] = ip6_1->src_address.as_u64[0];
-		  tr->src.ip6.as_u64[1] = ip6_1->src_address.as_u64[1];
-		  tr->dst.ip6.as_u64[0] = ip6_1->dst_address.as_u64[0];
-		  tr->dst.ip6.as_u64[1] = ip6_1->dst_address.as_u64[1];
-		}
-	    }
-
-	  vlib_buffer_advance (b0, sizeof (*h0));
-	  vlib_buffer_advance (b1, sizeof (*h1));
-
-	  vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
-					   to_next, n_left_to_next,
-					   bi0, bi1, next0, next1);
+	  vlib_prefetch_buffer_data (b[2], LOAD);
+	  vlib_prefetch_buffer_data (b[3], LOAD);
+	  vlib_prefetch_buffer_header (b[4], STORE);
+	  vlib_prefetch_buffer_header (b[5], STORE);
 	}
 
-      while (n_left_from > 0 && n_left_to_next > 0)
+      if (is_ipv6)
 	{
-	  u32 bi0;
-	  vlib_buffer_t *b0;
-	  gre_header_t *h0;
-	  ip4_header_t *ip4_0;
-	  ip6_header_t *ip6_0;
-	  u16 version0;
-	  int verr0;
-	  u32 i0, next0;
-	  gre_tunnel_key_t key0;
-
-	  bi0 = from[0];
-	  to_next[0] = bi0;
-	  from += 1;
-	  to_next += 1;
-	  n_left_from -= 1;
-	  n_left_to_next -= 1;
-
-	  b0 = vlib_get_buffer (vm, bi0);
-	  ip4_0 = vlib_buffer_get_current (b0);
-	  ip6_0 = (void *) ip4_0;
-
-	  if (!is_ipv6)
-	    {
-	      vlib_buffer_advance (b0, sizeof (*ip4_0));
-	    }
-	  else
-	    {
-	      vlib_buffer_advance (b0, sizeof (*ip6_0));
-	    }
-
-	  h0 = vlib_buffer_get_current (b0);
-
-	  i0 = sparse_vec_index (gm->next_by_protocol, h0->protocol);
-	  next0 = vec_elt (gm->next_by_protocol, i0).next_index;
-	  u8 ttype0 = vec_elt (gm->next_by_protocol, i0).tunnel_type;
-
-	  b0->error =
-	    node->errors[i0 == SPARSE_VEC_INVALID_INDEX
-			 ? GRE_ERROR_UNKNOWN_PROTOCOL : GRE_ERROR_NONE];
-
-	  version0 = clib_net_to_host_u16 (h0->flags_and_version);
-	  verr0 = version0 & GRE_VERSION_MASK;
-	  b0->error = verr0 ? node->errors[GRE_ERROR_UNSUPPORTED_VERSION]
-	    : b0->error;
-	  next0 = verr0 ? GRE_INPUT_NEXT_DROP : next0;
-
-
-	  /* For IP payload we need to find source interface
-	     so we can increase counters and help forward node to
-	     pick right FIB */
-	  /* RPF check for ip4/ip6 input */
-	  if (PREDICT_TRUE (next0 > GRE_INPUT_NEXT_DROP))
-	    {
-	      if (is_ipv6)
-		{
-		  gre_mk_key6 (&ip6_0->dst_address,
-			       &ip6_0->src_address,
-			       vnet_buffer (b0)->ip.fib_index,
-			       ttype0, 0, &key0.gtk_v6);
-		}
-	      else
-		{
-		  gre_mk_key4 (ip4_0->dst_address,
-			       ip4_0->src_address,
-			       vnet_buffer (b0)->ip.fib_index,
-			       ttype0, 0, &key0.gtk_v4);
-		}
-
-	      if ((!is_ipv6 && !gre_match_key4 (&cached_tunnel_key.gtk_v4,
-						&key0.gtk_v4)) ||
-		  (is_ipv6 && !gre_match_key6 (&cached_tunnel_key.gtk_v6,
-					       &key0.gtk_v6)))
-		{
-		  gre_tunnel_t *t;
-		  uword *p;
-
-		  if (!is_ipv6)
-		    {
-		      p = hash_get_mem (gm->tunnel_by_key4, &key0.gtk_v4);
-		    }
-		  else
-		    {
-		      p = hash_get_mem (gm->tunnel_by_key6, &key0.gtk_v6);
-		    }
-		  if (!p)
-		    {
-		      next0 = GRE_INPUT_NEXT_DROP;
-		      b0->error = node->errors[GRE_ERROR_NO_SUCH_TUNNEL];
-		      goto drop;
-		    }
-		  t = pool_elt_at_index (gm->tunnels, p[0]);
-		  tunnel_sw_if_index = t->sw_if_index;
-
-		  cached_tunnel_sw_if_index = tunnel_sw_if_index;
-		  if (!is_ipv6)
-		    {
-		      cached_tunnel_key.gtk_v4 = key0.gtk_v4;
-		    }
-		  else
-		    {
-		      cached_tunnel_key.gtk_v6 = key0.gtk_v6;
-		    }
-		}
-	      else
-		{
-		  tunnel_sw_if_index = cached_tunnel_sw_if_index;
-		}
-	    }
-	  else
-	    {
-	      next0 = GRE_INPUT_NEXT_DROP;
-	      goto drop;
-	    }
-	  len = vlib_buffer_length_in_chain (vm, b0);
-	  vlib_increment_combined_counter (im->combined_sw_if_counters
-					   + VNET_INTERFACE_COUNTER_RX,
-					   thread_index,
-					   tunnel_sw_if_index,
-					   1 /* packets */ ,
-					   len /* bytes */ );
-
-	  vnet_buffer (b0)->sw_if_index[VLIB_RX] = tunnel_sw_if_index;
-
-	drop:
-	  if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
-	    {
-	      gre_rx_trace_t *tr = vlib_add_trace (vm, node,
-						   b0, sizeof (*tr));
-	      tr->tunnel_id = tunnel_sw_if_index;
-	      if (!is_ipv6)
-		{
-		  tr->length = ip4_0->length;
-		  tr->src.ip4.as_u32 = ip4_0->src_address.as_u32;
-		  tr->dst.ip4.as_u32 = ip4_0->dst_address.as_u32;
-		}
-	      else
-		{
-		  tr->length = ip6_0->payload_length;
-		  tr->src.ip6.as_u64[0] = ip6_0->src_address.as_u64[0];
-		  tr->src.ip6.as_u64[1] = ip6_0->src_address.as_u64[1];
-		  tr->dst.ip6.as_u64[0] = ip6_0->dst_address.as_u64[0];
-		  tr->dst.ip6.as_u64[1] = ip6_0->dst_address.as_u64[1];
-		}
-	    }
-
-	  vlib_buffer_advance (b0, sizeof (*h0));
-
-	  vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
-					   to_next, n_left_to_next,
-					   bi0, next0);
+	  /* ip6_local hands us the ip header, not the gre header */
+	  ip6[0] = vlib_buffer_get_current (b[0]);
+	  ip6[1] = vlib_buffer_get_current (b[1]);
+	  gre[0] = (void *) (ip6[0] + 1);
+	  gre[1] = (void *) (ip6[1] + 1);
+	  vlib_buffer_advance (b[0], sizeof (*ip6[0]) + sizeof (*gre[0]));
+	  vlib_buffer_advance (b[1], sizeof (*ip6[0]) + sizeof (*gre[0]));
+	}
+      else
+	{
+	  /* ip4_local hands us the ip header, not the gre header */
+	  ip4[0] = vlib_buffer_get_current (b[0]);
+	  ip4[1] = vlib_buffer_get_current (b[1]);
+	  gre[0] = (void *) (ip4[0] + 1);
+	  gre[1] = (void *) (ip4[1] + 1);
+	  vlib_buffer_advance (b[0], sizeof (*ip4[0]) + sizeof (*gre[0]));
+	  vlib_buffer_advance (b[1], sizeof (*ip4[0]) + sizeof (*gre[0]));
 	}
 
-      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+      if (PREDICT_TRUE (cached_protocol == gre[0]->protocol))
+	{
+	  nidx[0] = cached_next_index;
+	}
+      else
+	{
+	  cached_next_index = nidx[0] =
+	    sparse_vec_index (gm->next_by_protocol, gre[0]->protocol);
+	  cached_protocol = gre[0]->protocol;
+	}
+      if (PREDICT_TRUE (cached_protocol == gre[1]->protocol))
+	{
+	  nidx[1] = cached_next_index;
+	}
+      else
+	{
+	  cached_next_index = nidx[1] =
+	    sparse_vec_index (gm->next_by_protocol, gre[1]->protocol);
+	  cached_protocol = gre[1]->protocol;
+	}
+
+      ni[0] = vec_elt (gm->next_by_protocol, nidx[0]);
+      ni[1] = vec_elt (gm->next_by_protocol, nidx[1]);
+      next[0] = ni[0].next_index;
+      next[1] = ni[1].next_index;
+      type[0] = ni[0].tunnel_type;
+      type[1] = ni[1].tunnel_type;
+
+      b[0]->error = nidx[0] == SPARSE_VEC_INVALID_INDEX
+	? node->errors[GRE_ERROR_UNKNOWN_PROTOCOL]
+	: node->errors[GRE_ERROR_NONE];
+      b[1]->error = nidx[1] == SPARSE_VEC_INVALID_INDEX
+	? node->errors[GRE_ERROR_UNKNOWN_PROTOCOL]
+	: node->errors[GRE_ERROR_NONE];
+
+      version[0] = clib_net_to_host_u16 (gre[0]->flags_and_version);
+      version[1] = clib_net_to_host_u16 (gre[1]->flags_and_version);
+      version[0] &= GRE_VERSION_MASK;
+      version[1] &= GRE_VERSION_MASK;
+
+      b[0]->error = version[0]
+	? node->errors[GRE_ERROR_UNSUPPORTED_VERSION] : b[0]->error;
+      next[0] = version[0] ? GRE_INPUT_NEXT_DROP : next[0];
+      b[1]->error = version[1]
+	? node->errors[GRE_ERROR_UNSUPPORTED_VERSION] : b[1]->error;
+      next[1] = version[1] ? GRE_INPUT_NEXT_DROP : next[1];
+
+      len[0] = vlib_buffer_length_in_chain (vm, b[0]);
+      len[1] = vlib_buffer_length_in_chain (vm, b[1]);
+
+      if (is_ipv6)
+	{
+	  gre_mk_key6 (&ip6[0]->dst_address,
+		       &ip6[0]->src_address,
+		       vnet_buffer (b[0])->ip.fib_index,
+		       type[0], 0, &key[0].gtk_v6);
+	  gre_mk_key6 (&ip6[1]->dst_address,
+		       &ip6[1]->src_address,
+		       vnet_buffer (b[1])->ip.fib_index,
+		       type[1], 0, &key[1].gtk_v6);
+	  matched[0] = gre_match_key6 (&cached_key.gtk_v6, &key[0].gtk_v6);
+	  matched[1] = gre_match_key6 (&cached_key.gtk_v6, &key[1].gtk_v6);
+	}
+      else
+	{
+	  gre_mk_key4 (ip4[0]->dst_address,
+		       ip4[0]->src_address,
+		       vnet_buffer (b[0])->ip.fib_index,
+		       type[0], 0, &key[0].gtk_v4);
+	  gre_mk_key4 (ip4[1]->dst_address,
+		       ip4[1]->src_address,
+		       vnet_buffer (b[1])->ip.fib_index,
+		       type[1], 0, &key[1].gtk_v4);
+	  matched[0] = gre_match_key4 (&cached_key.gtk_v4, &key[0].gtk_v4);
+	  matched[1] = gre_match_key4 (&cached_key.gtk_v4, &key[1].gtk_v4);
+	}
+
+      tun_sw_if_index[0] = cached_tun_sw_if_index;
+      tun_sw_if_index[1] = cached_tun_sw_if_index;
+      if (PREDICT_FALSE (!matched[0]))
+	gre_tunnel_get (gm, node, b[0], &next[0], &key[0], &cached_key,
+			&tun_sw_if_index[0], &cached_tun_sw_if_index,
+			is_ipv6);
+      if (PREDICT_FALSE (!matched[1]))
+	gre_tunnel_get (gm, node, b[1], &next[1], &key[1], &cached_key,
+			&tun_sw_if_index[1], &cached_tun_sw_if_index,
+			is_ipv6);
+
+      if (PREDICT_TRUE (next[0] > GRE_INPUT_NEXT_DROP))
+	{
+	  vlib_increment_combined_counter (&gm->vnet_main->
+					   interface_main.combined_sw_if_counters
+					   [VNET_INTERFACE_COUNTER_RX],
+					   vm->thread_index,
+					   tun_sw_if_index[0],
+					   1 /* packets */ ,
+					   len[0] /* bytes */ );
+	  vnet_buffer (b[0])->sw_if_index[VLIB_RX] = tun_sw_if_index[0];
+	}
+      if (PREDICT_TRUE (next[1] > GRE_INPUT_NEXT_DROP))
+	{
+	  vlib_increment_combined_counter (&gm->vnet_main->
+					   interface_main.combined_sw_if_counters
+					   [VNET_INTERFACE_COUNTER_RX],
+					   vm->thread_index,
+					   tun_sw_if_index[1],
+					   1 /* packets */ ,
+					   len[1] /* bytes */ );
+	  vnet_buffer (b[1])->sw_if_index[VLIB_RX] = tun_sw_if_index[1];
+	}
+
+      if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
+	gre_trace (vm, node, b[0], tun_sw_if_index[0], ip6[0], ip4[0],
+		   is_ipv6);
+      if (PREDICT_FALSE (b[1]->flags & VLIB_BUFFER_IS_TRACED))
+	gre_trace (vm, node, b[1], tun_sw_if_index[1], ip6[1], ip4[1],
+		   is_ipv6);
+
+      b += 2;
+      next += 2;
+      n_left_from -= 2;
     }
+
+  while (n_left_from >= 1)
+    {
+      const ip6_header_t *ip6[1];
+      const ip4_header_t *ip4[1];
+      const gre_header_t *gre[1];
+      u32 nidx[1];
+      next_info_t ni[1];
+      u8 type[1];
+      u16 version[1];
+      u32 len[1];
+      gre_tunnel_key_t key[1];
+      u8 matched[1];
+      u32 tun_sw_if_index[1];
+
+      if (PREDICT_TRUE (n_left_from >= 3))
+	{
+	  vlib_prefetch_buffer_data (b[1], LOAD);
+	  vlib_prefetch_buffer_header (b[2], STORE);
+	}
+
+      if (is_ipv6)
+	{
+	  /* ip6_local hands us the ip header, not the gre header */
+	  ip6[0] = vlib_buffer_get_current (b[0]);
+	  gre[0] = (void *) (ip6[0] + 1);
+	  vlib_buffer_advance (b[0], sizeof (*ip6[0]) + sizeof (*gre[0]));
+	}
+      else
+	{
+	  /* ip4_local hands us the ip header, not the gre header */
+	  ip4[0] = vlib_buffer_get_current (b[0]);
+	  gre[0] = (void *) (ip4[0] + 1);
+	  vlib_buffer_advance (b[0], sizeof (*ip4[0]) + sizeof (*gre[0]));
+	}
+
+      if (PREDICT_TRUE (cached_protocol == gre[0]->protocol))
+	{
+	  nidx[0] = cached_next_index;
+	}
+      else
+	{
+	  cached_next_index = nidx[0] =
+	    sparse_vec_index (gm->next_by_protocol, gre[0]->protocol);
+	  cached_protocol = gre[0]->protocol;
+	}
+
+      ni[0] = vec_elt (gm->next_by_protocol, nidx[0]);
+      next[0] = ni[0].next_index;
+      type[0] = ni[0].tunnel_type;
+
+      b[0]->error = nidx[0] == SPARSE_VEC_INVALID_INDEX
+	? node->errors[GRE_ERROR_UNKNOWN_PROTOCOL]
+	: node->errors[GRE_ERROR_NONE];
+
+      version[0] = clib_net_to_host_u16 (gre[0]->flags_and_version);
+      version[0] &= GRE_VERSION_MASK;
+
+      b[0]->error = version[0]
+	? node->errors[GRE_ERROR_UNSUPPORTED_VERSION] : b[0]->error;
+      next[0] = version[0] ? GRE_INPUT_NEXT_DROP : next[0];
+
+      len[0] = vlib_buffer_length_in_chain (vm, b[0]);
+
+      if (is_ipv6)
+	{
+	  gre_mk_key6 (&ip6[0]->dst_address,
+		       &ip6[0]->src_address,
+		       vnet_buffer (b[0])->ip.fib_index,
+		       type[0], 0, &key[0].gtk_v6);
+	  matched[0] = gre_match_key6 (&cached_key.gtk_v6, &key[0].gtk_v6);
+	}
+      else
+	{
+	  gre_mk_key4 (ip4[0]->dst_address,
+		       ip4[0]->src_address,
+		       vnet_buffer (b[0])->ip.fib_index,
+		       type[0], 0, &key[0].gtk_v4);
+	  matched[0] = gre_match_key4 (&cached_key.gtk_v4, &key[0].gtk_v4);
+	}
+
+      tun_sw_if_index[0] = cached_tun_sw_if_index;
+      if (PREDICT_FALSE (!matched[0]))
+	gre_tunnel_get (gm, node, b[0], &next[0], &key[0], &cached_key,
+			&tun_sw_if_index[0], &cached_tun_sw_if_index,
+			is_ipv6);
+
+      if (PREDICT_TRUE (next[0] > GRE_INPUT_NEXT_DROP))
+	{
+	  vlib_increment_combined_counter (&gm->vnet_main->
+					   interface_main.combined_sw_if_counters
+					   [VNET_INTERFACE_COUNTER_RX],
+					   vm->thread_index,
+					   tun_sw_if_index[0],
+					   1 /* packets */ ,
+					   len[0] /* bytes */ );
+	  vnet_buffer (b[0])->sw_if_index[VLIB_RX] = tun_sw_if_index[0];
+	}
+
+      if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
+	gre_trace (vm, node, b[0], tun_sw_if_index[0], ip6[0], ip4[0],
+		   is_ipv6);
+
+      b += 1;
+      next += 1;
+      n_left_from -= 1;
+    }
+
+  vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
+
   vlib_node_increment_counter (vm,
-			       !is_ipv6 ? gre4_input_node.index :
-			       gre6_input_node.index, GRE_ERROR_PKTS_DECAP,
-			       from_frame->n_vectors);
-  return from_frame->n_vectors;
+			       is_ipv6 ? gre6_input_node.index :
+			       gre4_input_node.index, GRE_ERROR_PKTS_DECAP,
+			       n_left_from);
+
+  return frame->n_vectors;
 }
 
 VLIB_NODE_FN (gre4_input_node) (vlib_main_t * vm,