Optimize GRE Tunnel and add support for ERSPAN encap

Change GRE tunnel to use the interface type where the same encap
node is used as output node for all GRE tunnels, instead of having
dedicated output and tx node for each tunnel. This allows for more
efficient tunnel creation and deletion at scale tested at 1000's
of GRE tunnels.

Add support for ERSPAN encap as another tunnel type, in addition
to the existing L3 and TEB types. The GRE ERSPAN encap supported
is type 2 thus GRE encap need to include sequence number and GRE-
ERSPAN tunnel can be created with user secified ERSPAN session ID.
The GRE tunnel lookup hash key is updated to inclue tunnel type
and session ID, in addition to SIP/DIP and FIB index.
Thus, GRE-ERSPAN tunnel can be created, with the appropriate
session ID, to be used as output interface for SPAN config to
send mirrored packets.

Change interface naming so that all GRE tunnels, irrespective of
tunnel type, uses "greN" where N is the instance number. Removed
interface reuse on tunnel creation and deletion to enable unfied
tunnel interface name.

Add support of user specified instance on GRE tunnel creation.
Thus, N in the "greN" interface name can optionally be specified
by user via CLI/API.

Optimize GRE tunnel encap DPO stacking to bypass load-balance DPO
node since packet output on GRE tunnel always belong to the same
flow after 5-tupple hash.

Change-Id: Ifa83915744a1a88045c998604777cc3583f4da52
Signed-off-by: John Lo <loj@cisco.com>
diff --git a/src/vnet/gre/gre.api b/src/vnet/gre/gre.api
index b410ac9..28c9701 100644
--- a/src/vnet/gre/gre.api
+++ b/src/vnet/gre/gre.api
@@ -13,18 +13,32 @@
  * limitations under the License.
  */
 
-option version = "1.0.0";
+option version = "1.0.1";
 
+/** \brief Create or delete a GRE tunnel
+    @param client_index - opaque cookie to identify the sender
+    @param context - sender context, to match reply w/ request
+    @param is_add - Use 1 to create the tunnel, 0 to remove it
+    @param is_ipv6 - Use 0 for IPv4, 1 for IPv6
+    @param tunnel_type - 0: L3, 1: TEB, 2: ERSPAN
+    @param instance - optional unique custom device instance, else ~0.
+    @param src_address - Source IP address
+    @param dst_address - Destination IP address, can be multicast
+    @param outer_fib_id - Encap FIB table ID
+    @param session_id - session for ERSPAN tunnel, range 0-1023
+*/
 define gre_add_del_tunnel
 {
   u32 client_index;
   u32 context;
   u8 is_add;
   u8 is_ipv6;
-  u8 teb;
+  u8 tunnel_type;
+  u32 instance;		/* If non-~0, specifies a custom dev instance */
   u8 src_address[16];
   u8 dst_address[16];
   u32 outer_fib_id;
+  u16 session_id;
 };
 
 define gre_add_del_tunnel_reply
@@ -45,11 +59,13 @@
 {
   u32 context;
   u32 sw_if_index;
+  u32 instance;
   u8 is_ipv6;
-  u8 teb;
+  u8 tunnel_type;
   u8 src_address[16];
   u8 dst_address[16];
   u32 outer_fib_id;
+  u16 session_id;
 };
 
 /*
diff --git a/src/vnet/gre/gre.c b/src/vnet/gre/gre.c
index c049b87..a370292 100644
--- a/src/vnet/gre/gre.c
+++ b/src/vnet/gre/gre.c
@@ -62,7 +62,7 @@
   gre_tx_trace_t *t = va_arg (*args, gre_tx_trace_t *);
 
   s = format (s, "GRE: tunnel %d len %d src %U dst %U",
-	      t->tunnel_id, clib_net_to_host_u16 (t->length),
+	      t->tunnel_id, t->length,
 	      format_ip46_address, &t->src, IP46_TYPE_ANY,
 	      format_ip46_address, &t->dst, IP46_TYPE_ANY);
   return s;
@@ -100,7 +100,7 @@
 
   s = format (s, "GRE %U", format_gre_protocol, p);
 
-  if (max_header_bytes != 0 && header_bytes > max_header_bytes)
+  if (max_header_bytes != 0 && header_bytes < max_header_bytes)
     {
       gre_protocol_info_t *pi = gre_get_protocol_info (gm, p);
       vlib_node_t *node = vlib_get_node (gm->vlib_main, pi->node_index);
@@ -208,6 +208,7 @@
   gre_main_t *gm = &gre_main;
   ip4_and_gre_header_t *h4;
   ip6_and_gre_header_t *h6;
+  gre_header_t *gre;
   u8 *rewrite = NULL;
   gre_tunnel_t *t;
   u32 ti;
@@ -227,9 +228,7 @@
     {
       vec_validate (rewrite, sizeof (*h4) - 1);
       h4 = (ip4_and_gre_header_t *) rewrite;
-      h4->gre.protocol =
-	clib_host_to_net_u16 (gre_proto_from_vnet_link (link_type));
-
+      gre = &h4->gre;
       h4->ip4.ip_version_and_header_length = 0x45;
       h4->ip4.ttl = 254;
       h4->ip4.protocol = IP_PROTOCOL_GRE;
@@ -242,9 +241,7 @@
     {
       vec_validate (rewrite, sizeof (*h6) - 1);
       h6 = (ip6_and_gre_header_t *) rewrite;
-      h6->gre.protocol =
-	clib_host_to_net_u16 (gre_proto_from_vnet_link (link_type));
-
+      gre = &h6->gre;
       h6->ip6.ip_version_traffic_class_and_flow_label =
 	clib_host_to_net_u32 (6 << 28);
       h6->ip6.hop_limit = 255;
@@ -256,6 +253,15 @@
       h6->ip6.dst_address.as_u64[1] = t->tunnel_dst.fp_addr.ip6.as_u64[1];
     }
 
+  if (PREDICT_FALSE (t->type == GRE_TUNNEL_TYPE_ERSPAN))
+    {
+      gre->protocol = clib_host_to_net_u16 (GRE_PROTOCOL_erspan);
+      gre->flags_and_version = clib_host_to_net_u16 (GRE_FLAGS_SEQUENCE);
+    }
+  else
+    gre->protocol =
+      clib_host_to_net_u16 (gre_proto_from_vnet_link (link_type));
+
   return (rewrite);
 }
 
@@ -302,33 +308,43 @@
   t = pool_elt_at_index (gm->tunnels, ti);
   is_ipv6 = t->tunnel_dst.fp_proto == FIB_PROTOCOL_IP6 ? 1 : 0;
 
-  adj_nbr_midchain_update_rewrite (ai, !is_ipv6 ? gre4_fixup : gre6_fixup,
-				   NULL,
-				   (VNET_LINK_ETHERNET ==
-				    adj_get_link_type (ai) ?
-				    ADJ_FLAG_MIDCHAIN_NO_COUNT :
-				    ADJ_FLAG_NONE), gre_build_rewrite (vnm,
-								       sw_if_index,
-								       adj_get_link_type
-								       (ai),
-								       NULL));
+  adj_nbr_midchain_update_rewrite
+    (ai, !is_ipv6 ? gre4_fixup : gre6_fixup, NULL,
+     (VNET_LINK_ETHERNET == adj_get_link_type (ai) ?
+      ADJ_FLAG_MIDCHAIN_NO_COUNT : ADJ_FLAG_NONE),
+     gre_build_rewrite (vnm, sw_if_index, adj_get_link_type (ai), NULL));
 
   gre_tunnel_stack (ai);
 }
 
+
+typedef enum
+{
+  GRE_ENCAP_NEXT_DROP,
+  GRE_ENCAP_NEXT_L2_MIDCHAIN,
+  GRE_ENCAP_N_NEXT,
+} gre_encap_next_t;
+
+#define NEXT_IDX (GRE_ENCAP_NEXT_L2_MIDCHAIN)
+
 /**
- * @brief TX function. Only called L2. L3 traffic uses the adj-midchains
+ * @brief TX function. Only called for L2 payload including TEB or ERSPAN.
+ *        L3 traffic uses the adj-midchains.
  */
 static uword
-gre_interface_tx_inline (vlib_main_t * vm,
-			 vlib_node_runtime_t * node, vlib_frame_t * frame)
+gre_interface_tx (vlib_main_t * vm,
+		  vlib_node_runtime_t * node, vlib_frame_t * frame)
 {
   gre_main_t *gm = &gre_main;
+  vnet_main_t *vnm = gm->vnet_main;
   u32 next_index;
   u32 *from, *to_next, n_left_from, n_left_to_next;
-  vnet_interface_output_runtime_t *rd = (void *) node->runtime_data;
-  const gre_tunnel_t *gt = pool_elt_at_index (gm->tunnels, rd->dev_instance);
-  u8 is_ipv6 = gt->tunnel_dst.fp_proto == FIB_PROTOCOL_IP6 ? 1 : 0;
+  u32 sw_if_index0 = 0;
+  u32 sw_if_index1 = 0;
+  adj_index_t adj_index0 = ADJ_INDEX_INVALID;
+  adj_index_t adj_index1 = ADJ_INDEX_INVALID;
+  gre_tunnel_t *gt0 = NULL;
+  gre_tunnel_t *gt1 = NULL;
 
   /* Vector of buffer / pkt indices we're supposed to process */
   from = vlib_frame_vector_args (frame);
@@ -344,77 +360,193 @@
       /* set up to enqueue to our disposition with index = next_index */
       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
 
-      /*
-       * FIXME DUAL LOOP
-       */
+      while (n_left_from >= 4 && n_left_to_next >= 2)
+	{
+	  u32 bi0 = from[0];
+	  u32 bi1 = from[1];
+	  vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
+	  vlib_buffer_t *b1 = vlib_get_buffer (vm, bi1);
+
+	  to_next[0] = bi0;
+	  to_next[1] = bi1;
+	  from += 2;
+	  to_next += 2;
+	  n_left_to_next -= 2;
+	  n_left_from -= 2;
+
+	  if (sw_if_index0 != vnet_buffer (b0)->sw_if_index[VLIB_TX])
+	    {
+	      sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+	      vnet_hw_interface_t *hi0 =
+		vnet_get_sup_hw_interface (vnm, sw_if_index0);
+	      gt0 = &gm->tunnels[hi0->dev_instance];
+	      adj_index0 = gt0->l2_adj_index;
+	    }
+
+	  if (sw_if_index0 == vnet_buffer (b1)->sw_if_index[VLIB_TX])
+	    {
+	      sw_if_index1 = sw_if_index0;
+	      gt1 = gt0;
+	      adj_index1 = adj_index0;
+	    }
+	  else if (sw_if_index1 != vnet_buffer (b1)->sw_if_index[VLIB_TX])
+	    {
+	      sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_TX];
+	      vnet_hw_interface_t *hi1 =
+		vnet_get_sup_hw_interface (vnm, sw_if_index1);
+	      gt1 = &gm->tunnels[hi1->dev_instance];
+	      adj_index1 = gt1->l2_adj_index;
+	    }
+
+	  vnet_buffer (b0)->ip.adj_index[VLIB_TX] = adj_index0;
+	  vnet_buffer (b1)->ip.adj_index[VLIB_TX] = adj_index1;
+
+	  if (PREDICT_FALSE (gt0->type == GRE_TUNNEL_TYPE_ERSPAN))
+	    {
+	      /* Encap GRE seq# and ERSPAN type II header */
+	      vlib_buffer_advance (b0, -sizeof (erspan_t2_t));
+	      erspan_t2_t *h0 = vlib_buffer_get_current (b0);
+	      u32 seq_num = clib_smp_atomic_add (&gt0->gre_sn->seq_num, 1);
+	      u64 ver1 = clib_host_to_net_u64 (0x1000000000000000);
+	      h0->seq_num = clib_host_to_net_u32 (seq_num);
+	      h0->t2_u64 = ver1;	/* all 0's except ver=1 */
+	      h0->t2.cos_en_t_session =
+		clib_host_to_net_u16 (gt0->session_id);
+	    }
+	  if (PREDICT_FALSE (gt1->type == GRE_TUNNEL_TYPE_ERSPAN))
+	    {
+	      /* Encap GRE seq# and ERSPAN type II header */
+	      vlib_buffer_advance (b1, -sizeof (erspan_t2_t));
+	      erspan_t2_t *h1 = vlib_buffer_get_current (b1);
+	      u32 seq_num = clib_smp_atomic_add (&gt1->gre_sn->seq_num, 1);
+	      u64 ver1 = clib_host_to_net_u64 (0x1000000000000000);
+	      h1->seq_num = clib_host_to_net_u32 (seq_num);
+	      h1->t2_u64 = ver1;	/* all 0's except ver=1 */
+	      h1->t2.cos_en_t_session =
+		clib_host_to_net_u16 (gt1->session_id);
+	    }
+
+	  if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+	    {
+	      gre_tx_trace_t *tr0 = vlib_add_trace (vm, node,
+						    b0, sizeof (*tr0));
+	      tr0->tunnel_id = gt0 - gm->tunnels;
+	      tr0->src = gt0->tunnel_src;
+	      tr0->dst = gt0->tunnel_dst.fp_addr;
+	      tr0->length = vlib_buffer_length_in_chain (vm, b0);
+	    }
+	  if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
+	    {
+	      gre_tx_trace_t *tr1 = vlib_add_trace (vm, node,
+						    b1, sizeof (*tr1));
+	      tr1->tunnel_id = gt1 - gm->tunnels;
+	      tr1->src = gt1->tunnel_src;
+	      tr1->dst = gt1->tunnel_dst.fp_addr;
+	      tr1->length = vlib_buffer_length_in_chain (vm, b1);
+	    }
+
+	  vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+					   to_next, n_left_to_next,
+					   bi0, bi1, NEXT_IDX, NEXT_IDX);
+	}
 
       while (n_left_from > 0 && n_left_to_next > 0)
 	{
-	  vlib_buffer_t *b0;
-	  u32 bi0;
+	  u32 bi0 = from[0];
+	  vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
 
-	  bi0 = from[0];
 	  to_next[0] = bi0;
 	  from += 1;
 	  to_next += 1;
 	  n_left_from -= 1;
 	  n_left_to_next -= 1;
 
-	  b0 = vlib_get_buffer (vm, bi0);
+	  if (sw_if_index0 != vnet_buffer (b0)->sw_if_index[VLIB_TX])
+	    {
+	      sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+	      vnet_hw_interface_t *hi0 =
+		vnet_get_sup_hw_interface (vnm, sw_if_index0);
+	      gt0 = &gm->tunnels[hi0->dev_instance];
+	      adj_index0 = gt0->l2_adj_index;
+	    }
 
-	  vnet_buffer (b0)->ip.adj_index[VLIB_TX] = gt->l2_adj_index;
+	  vnet_buffer (b0)->ip.adj_index[VLIB_TX] = adj_index0;
+
+	  if (PREDICT_FALSE (gt0->type == GRE_TUNNEL_TYPE_ERSPAN))
+	    {
+	      /* Encap GRE seq# and ERSPAN type II header */
+	      vlib_buffer_advance (b0, -sizeof (erspan_t2_t));
+	      erspan_t2_t *h0 = vlib_buffer_get_current (b0);
+	      u32 seq_num = clib_smp_atomic_add (&gt0->gre_sn->seq_num, 1);
+	      u64 ver1 = clib_host_to_net_u64 (0x1000000000000000);
+	      h0->seq_num = clib_host_to_net_u32 (seq_num);
+	      h0->t2_u64 = ver1;	/* all 0's except ver=1 */
+	      h0->t2.cos_en_t_session =
+		clib_host_to_net_u16 (gt0->session_id);
+	    }
 
 	  if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
 	    {
 	      gre_tx_trace_t *tr = vlib_add_trace (vm, node,
 						   b0, sizeof (*tr));
-	      tr->tunnel_id = gt - gm->tunnels;
-	      tr->src = gt->tunnel_src;
-	      tr->dst = gt->tunnel_src;
+	      tr->tunnel_id = gt0 - gm->tunnels;
+	      tr->src = gt0->tunnel_src;
+	      tr->dst = gt0->tunnel_dst.fp_addr;
 	      tr->length = vlib_buffer_length_in_chain (vm, b0);
 	    }
 
 	  vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
 					   to_next, n_left_to_next,
-					   bi0, gt->l2_tx_arc);
+					   bi0, NEXT_IDX);
 	}
 
       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
     }
 
-  vlib_node_increment_counter (vm, !is_ipv6 ? gre4_input_node.index :
-			       gre6_input_node.index,
+  vlib_node_increment_counter (vm, node->node_index,
 			       GRE_ERROR_PKTS_ENCAP, frame->n_vectors);
 
   return frame->n_vectors;
 }
 
-static uword
-gre_interface_tx (vlib_main_t * vm,
-		  vlib_node_runtime_t * node, vlib_frame_t * frame)
-{
-  return (gre_interface_tx_inline (vm, node, frame));
-}
+static char *gre_error_strings[] = {
+#define gre_error(n,s) s,
+#include "error.def"
+#undef gre_error
+};
 
-static uword
-gre_teb_interface_tx (vlib_main_t * vm,
-		      vlib_node_runtime_t * node, vlib_frame_t * frame)
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (gre_encap_node) =
 {
-  return (gre_interface_tx_inline (vm, node, frame));
-}
+  .function = gre_interface_tx,
+  .name = "gre-encap",
+  .vector_size = sizeof (u32),
+  .format_trace = format_gre_tx_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+  .n_errors = GRE_N_ERROR,
+  .error_strings = gre_error_strings,
+  .n_next_nodes = GRE_ENCAP_N_NEXT,
+  .next_nodes = {
+    [GRE_ENCAP_NEXT_DROP] = "error-drop",
+    [GRE_ENCAP_NEXT_L2_MIDCHAIN] = "adj-l2-midchain",
+  },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (gre_encap_node, gre_interface_tx)
+/* *INDENT-ON* */
 
 static u8 *
 format_gre_tunnel_name (u8 * s, va_list * args)
 {
   u32 dev_instance = va_arg (*args, u32);
-  return format (s, "gre%d", dev_instance);
-}
+  gre_main_t *gm = &gre_main;
+  gre_tunnel_t *t;
 
-static u8 *
-format_gre_tunnel_teb_name (u8 * s, va_list * args)
-{
-  u32 dev_instance = va_arg (*args, u32);
-  return format (s, "teb-gre%d", dev_instance);
+  if (dev_instance >= vec_len (gm->tunnels))
+    return format (s, "<improperly-referenced>");
+
+  t = pool_elt_at_index (gm->tunnels, dev_instance);
+  return format (s, "gre%d", t->user_instance);
 }
 
 static u8 *
@@ -433,36 +565,11 @@
   .format_device_name = format_gre_tunnel_name,
   .format_device = format_gre_device,
   .format_tx_trace = format_gre_tx_trace,
-  .tx_function = gre_interface_tx,
   .admin_up_down_function = gre_interface_admin_up_down,
 #ifdef SOON
   .clear counter = 0;
 #endif
 };
-/* *INDENT-ON* */
-
-
-/* *INDENT-OFF* */
-VLIB_DEVICE_TX_FUNCTION_MULTIARCH (gre_device_class,
-                                   gre_interface_tx)
-
-VNET_DEVICE_CLASS (gre_device_teb_class) = {
-  .name = "GRE TEB tunnel device",
-  .format_device_name = format_gre_tunnel_teb_name,
-  .format_device = format_gre_device,
-  .format_tx_trace = format_gre_tx_trace,
-  .tx_function = gre_teb_interface_tx,
-  .admin_up_down_function = gre_interface_admin_up_down,
-#ifdef SOON
-  .clear counter = 0;
-#endif
-};
-
-/* *INDENT-ON* */
-
-/* *INDENT-OFF* */
-VLIB_DEVICE_TX_FUNCTION_MULTIARCH (gre_device_teb_class,
-                                   gre_teb_interface_tx)
 
 VNET_HW_INTERFACE_CLASS (gre_hw_interface_class) = {
   .name = "GRE",
@@ -523,6 +630,8 @@
     hash_create_mem (0, sizeof (gre_tunnel_key4_t), sizeof (uword));
   gm->tunnel_by_key6 =
     hash_create_mem (0, sizeof (gre_tunnel_key6_t), sizeof (uword));
+  gm->seq_num_by_key =
+    hash_create_mem (0, sizeof (gre_sn_key_t), sizeof (uword));
 
 #define _(n,s) add_protocol (gm, GRE_PROTOCOL_##s, #s);
   foreach_gre_protocol
diff --git a/src/vnet/gre/gre.h b/src/vnet/gre/gre.h
index 83bab76..b3b0b54 100644
--- a/src/vnet/gre/gre.h
+++ b/src/vnet/gre/gre.h
@@ -36,6 +36,36 @@
 } gre_error_t;
 
 /**
+ * @brief The GRE tunnel type
+ */
+typedef enum gre_tunnel_type_t_
+{
+  /**
+   * L3 GRE (i.e. this tunnel is in L3 mode)
+   */
+  GRE_TUNNEL_TYPE_L3 = 0,
+  /**
+   * Transparent Ethernet Bridging - the tunnel is in L2 mode
+   */
+  GRE_TUNNEL_TYPE_TEB = 1,
+  /**
+   * ERSPAN type 2 - the tunnel is for port mirror SPAN output. Each tunnel is
+   * associated with a session ID and expected to be used for encap and output
+   * of mirrored packet from a L2 network only. There is no support for
+   * receiving ERSPAN packets from a GRE ERSPAN tunnel in VPP.
+   */
+  GRE_TUNNEL_TYPE_ERSPAN = 2,
+
+  GRE_TUNNEL_TYPE_N
+} gre_tunnel_type_t;
+
+#define GRE_TUNNEL_TYPE_NAMES {    \
+    [GRE_TUNNEL_TYPE_L3] = "L3",   \
+    [GRE_TUNNEL_TYPE_TEB] = "TEB", \
+    [GRE_TUNNEL_TYPE_ERSPAN] = "ERSPAN", \
+}
+
+/**
  * A GRE payload protocol registration
  */
 typedef struct
@@ -46,6 +76,9 @@
   /** GRE protocol type in host byte order. */
   gre_protocol_t protocol;
 
+  /** GRE tunnel type */
+  gre_tunnel_type_t tunnel_type;
+
   /** Node which handles this type. */
   u32 node_index;
 
@@ -54,28 +87,6 @@
 } gre_protocol_info_t;
 
 /**
- * @brief The GRE tunnel type
- */
-typedef enum gre_tunnel_tyoe_t_
-{
-  /**
-   * L3 GRE (i.e. this tunnel is in L3 mode)
-   */
-  GRE_TUNNEL_TYPE_L3,
-  /**
-   * Transparent Ethernet Bridging - the tunnel is in L2 mode
-   */
-  GRE_TUNNEL_TYPE_TEB,
-} gre_tunnel_type_t;
-
-#define GRE_TUNNEL_TYPE_NAMES {    \
-    [GRE_TUNNEL_TYPE_L3] = "L3",   \
-    [GRE_TUNNEL_TYPE_TEB] = "TEB", \
-}
-
-#define GRE_TUNNEL_N_TYPES ((gre_tunnel_type_t)GRE_TUNNEL_TYPE_TEB+1)
-
-/**
  * @brief Key for a IPv4 GRE Tunnel
  */
 typedef struct gre_tunnel_key4_t_
@@ -94,11 +105,12 @@
   };
 
   /**
-   * The FIB table the src,dst addresses are in.
-   * tunnels with the same IP addresses in different FIBs are not
-   * the same tunnel
+   * FIB table index, ERSPAN session ID and tunnel type in u32 bit fields:
+   * - The FIB table index the src,dst addresses are in, top 20 bits
+   * - The Session ID for ERSPAN tunnel type and 0 otherwise, next 10 bits
+   * - Tunnel type, bottom 2 bits
    */
-  u32 gtk_fib_index;
+  u32 gtk_fidx_ssid_type;
 } __attribute__ ((packed)) gre_tunnel_key4_t;
 
 /**
@@ -114,13 +126,22 @@
   ip6_address_t gtk_dst;
 
   /**
-   * The FIB table the src,dst addresses are in.
-   * tunnels with the same IP addresses in different FIBs are not
-   * the same tunnel
+   * FIB table index, ERSPAN session ID and tunnel type in u32 bit fields:
+   * - The FIB table index the src,dst addresses are in, top 20 bits
+   * - The Session ID for ERSPAN tunnel type and 0 otherwise, next 10 bits
+   * - Tunnel type, bottom 2 bits
    */
-  u32 gtk_fib_index;
+  u32 gtk_fidx_ssid_type;
 } __attribute__ ((packed)) gre_tunnel_key6_t;
 
+#define GTK_FIB_INDEX_SHIFT	12
+#define GTK_FIB_INDEX_MASK	0xfffff000
+#define GTK_TYPE_SHIFT		0
+#define GTK_TYPE_MASK		0x3
+#define GTK_SESSION_ID_SHIFT	2
+#define GTK_SESSION_ID_MASK	0xffc
+#define GTK_SESSION_ID_MAX	(GTK_SESSION_ID_MASK >> GTK_SESSION_ID_SHIFT)
+
 /**
  * Union of the two possible key types
  */
@@ -131,6 +152,25 @@
 } gre_tunnel_key_t;
 
 /**
+ * Used for GRE header seq number generation for ERSPAN encap
+ */
+typedef struct
+{
+  u32 seq_num;
+  u32 ref_count;
+} gre_sn_t;
+
+/**
+ * Hash key for GRE header seq number generation for ERSPAN encap
+ */
+typedef struct
+{
+  ip46_address_t src;
+  ip46_address_t dst;
+  u32 fib_index;
+} gre_sn_key_t;
+
+/**
  * @brief A representation of a GRE tunnel
  */
 typedef struct
@@ -176,16 +216,34 @@
   u32 sibling_index;
 
   /**
-   * on a L2 tunnel this is the VLIB arc from the L2-tx to the l2-midchain
-   */
-  u32 l2_tx_arc;
-
-  /**
    * an L2 tunnel always rquires an L2 midchain. cache here for DP.
    */
   adj_index_t l2_adj_index;
+
+  /**
+   * ERSPAN type 2 session ID, least significant 10 bits of u16
+   */
+  u16 session_id;
+
+  /**
+   * GRE header sequence number (SN) used for ERSPAN type 2 header, must be
+   * bumped automically to be thread safe. As multiple GRE tunnels are created
+   * for the same fib-idx/DIP/SIP with different ERSPAN session number, they all
+   * share the same SN which is kept per FIB/DIP/SIP, as specified by RFC2890.
+   */
+  gre_sn_t *gre_sn;
+
+
+  u32 dev_instance;		/* Real device instance in tunnel vector */
+  u32 user_instance;		/* Instance name being shown to user */
 } gre_tunnel_t;
 
+typedef struct
+{
+  u8 next_index;
+  u8 tunnel_type;
+} next_info_t;
+
 /**
  * @brief GRE related global data
  */
@@ -207,21 +265,19 @@
   uword *protocol_info_by_name, *protocol_info_by_protocol;
 
   /**
-   * Hash mapping ipv4 src/dst addr pair to tunnel
+   * Hash mapping to tunnels with ipv4 src/dst addr
    */
   uword *tunnel_by_key4;
 
   /**
-   * Hash mapping ipv6 src/dst addr pair to tunnel
+   * Hash mapping to tunnels with ipv6 src/dst addr
    */
   uword *tunnel_by_key6;
 
   /**
-   * Free vlib hw_if_indices.
-   * A free list per-tunnel type since the interfaces ctreated are fo different
-   * types and we cannot change the type.
+   * Hash mapping tunnel src/dst addr and fib-idx to sequence number
    */
-  u32 *free_gre_tunnel_hw_if_indices[GRE_TUNNEL_N_TYPES];
+  uword *seq_num_by_key;
 
   /**
    * Mapping from sw_if_index to tunnel index
@@ -230,11 +286,14 @@
 
   /* Sparse vector mapping gre protocol in network byte order
      to next index. */
-  u16 *next_by_protocol;
+  next_info_t *next_by_protocol;
 
   /* convenience */
   vlib_main_t *vlib_main;
   vnet_main_t *vnet_main;
+
+  /* Record used instances */
+  uword *instance_used;
 } gre_main_t;
 
 /**
@@ -266,11 +325,6 @@
 
 extern gre_main_t gre_main;
 
-/* Register given node index to take input for given gre type. */
-void
-gre_register_input_type (vlib_main_t * vm,
-			 gre_protocol_t protocol, u32 node_index);
-
 extern clib_error_t *gre_interface_admin_up_down (vnet_main_t * vnm,
 						  u32 hw_if_index, u32 flags);
 
@@ -284,8 +338,8 @@
 
 extern vlib_node_registration_t gre4_input_node;
 extern vlib_node_registration_t gre6_input_node;
+extern vlib_node_registration_t gre_encap_node;
 extern vnet_device_class_t gre_device_class;
-extern vnet_device_class_t gre_device_teb_class;
 
 /* Parse gre protocol as 0xXXXX or protocol name.
    In either host or network byte order. */
@@ -297,8 +351,8 @@
 unformat_function_t unformat_pg_gre_header;
 
 void
-gre_register_input_protocol (vlib_main_t * vm,
-			     gre_protocol_t protocol, u32 node_index);
+gre_register_input_protocol (vlib_main_t * vm, gre_protocol_t protocol,
+			     u32 node_index, gre_tunnel_type_t tunnel_type);
 
 /* manually added to the interface output node in gre.c */
 #define GRE_OUTPUT_NEXT_LOOKUP	1
@@ -306,24 +360,26 @@
 typedef struct
 {
   u8 is_add;
-
-  ip46_address_t src, dst;
+  u8 tunnel_type;
   u8 is_ipv6;
+  u32 instance;
+  ip46_address_t src, dst;
   u32 outer_fib_id;
-  u8 teb;
+  u16 session_id;
 } vnet_gre_add_del_tunnel_args_t;
 
 int vnet_gre_add_del_tunnel
   (vnet_gre_add_del_tunnel_args_t * a, u32 * sw_if_indexp);
 
 static inline void
-gre_mk_key4 (const ip4_address_t * src,
-	     const ip4_address_t * dst,
-	     u32 fib_index, gre_tunnel_key4_t * key)
+gre_mk_key4 (ip4_address_t src,
+	     ip4_address_t dst,
+	     u32 fib_index, u8 ttype, u16 session_id, gre_tunnel_key4_t * key)
 {
-  key->gtk_src = *src;
-  key->gtk_dst = *dst;
-  key->gtk_fib_index = fib_index;
+  key->gtk_src = src;
+  key->gtk_dst = dst;
+  key->gtk_fidx_ssid_type = ttype |
+    (fib_index << GTK_FIB_INDEX_SHIFT) | (session_id << GTK_SESSION_ID_SHIFT);
 }
 
 static inline int
@@ -331,17 +387,18 @@
 		const gre_tunnel_key4_t * key2)
 {
   return ((key1->gtk_as_u64 == key2->gtk_as_u64) &&
-	  (key1->gtk_fib_index == key2->gtk_fib_index));
+	  (key1->gtk_fidx_ssid_type == key2->gtk_fidx_ssid_type));
 }
 
 static inline void
 gre_mk_key6 (const ip6_address_t * src,
 	     const ip6_address_t * dst,
-	     u32 fib_index, gre_tunnel_key6_t * key)
+	     u32 fib_index, u8 ttype, u16 session_id, gre_tunnel_key6_t * key)
 {
   key->gtk_src = *src;
   key->gtk_dst = *dst;
-  key->gtk_fib_index = fib_index;
+  key->gtk_fidx_ssid_type = ttype |
+    (fib_index << GTK_FIB_INDEX_SHIFT) | (session_id << GTK_SESSION_ID_SHIFT);
 }
 
 static inline int
@@ -352,7 +409,15 @@
 	  (key1->gtk_src.as_u64[1] == key2->gtk_src.as_u64[1]) &&
 	  (key1->gtk_dst.as_u64[0] == key2->gtk_dst.as_u64[0]) &&
 	  (key1->gtk_dst.as_u64[1] == key2->gtk_dst.as_u64[1]) &&
-	  (key1->gtk_fib_index == key2->gtk_fib_index));
+	  (key1->gtk_fidx_ssid_type == key2->gtk_fidx_ssid_type));
+}
+
+static inline void
+gre_mk_sn_key (const gre_tunnel_t * gt, gre_sn_key_t * key)
+{
+  key->src = gt->tunnel_src;
+  key->dst = gt->tunnel_dst.fp_addr;
+  key->fib_index = gt->outer_fib_index;
 }
 
 #endif /* included_gre_h */
diff --git a/src/vnet/gre/gre_api.c b/src/vnet/gre/gre_api.c
index 4dad614..63d4ca4 100644
--- a/src/vnet/gre/gre_api.c
+++ b/src/vnet/gre/gre_api.c
@@ -66,8 +66,10 @@
   memset (a, 0, sizeof (*a));
 
   a->is_add = mp->is_add;
-  a->teb = mp->teb;
+  a->tunnel_type = mp->tunnel_type;
   a->is_ipv6 = mp->is_ipv6;
+  a->instance = ntohl (mp->instance);
+  a->session_id = ntohs (mp->session_id);
 
   /* ip addresses sent in network byte order */
   if (!mp->is_ipv6)
@@ -102,23 +104,25 @@
 
   rmp = vl_msg_api_alloc (sizeof (*rmp));
   memset (rmp, 0, sizeof (*rmp));
-  rmp->_vl_msg_id = ntohs (VL_API_GRE_TUNNEL_DETAILS);
+  rmp->_vl_msg_id = htons (VL_API_GRE_TUNNEL_DETAILS);
   if (!is_ipv6)
     {
       clib_memcpy (rmp->src_address, &(t->tunnel_src.ip4.as_u8), 4);
       clib_memcpy (rmp->dst_address, &(t->tunnel_dst.fp_addr.ip4.as_u8), 4);
       ft = fib_table_get (t->outer_fib_index, FIB_PROTOCOL_IP4);
-      rmp->outer_fib_id = ft->ft_table_id;
+      rmp->outer_fib_id = htonl (ft->ft_table_id);
     }
   else
     {
       clib_memcpy (rmp->src_address, &(t->tunnel_src.ip6.as_u8), 16);
       clib_memcpy (rmp->dst_address, &(t->tunnel_dst.fp_addr.ip6.as_u8), 16);
       ft = fib_table_get (t->outer_fib_index, FIB_PROTOCOL_IP6);
-      rmp->outer_fib_id = ft->ft_table_id;
+      rmp->outer_fib_id = htonl (ft->ft_table_id);
     }
-  rmp->teb = (GRE_TUNNEL_TYPE_TEB == t->type);
+  rmp->tunnel_type = t->type;
+  rmp->instance = htonl (t->user_instance);
   rmp->sw_if_index = htonl (t->sw_if_index);
+  rmp->session_id = htons (t->session_id);
   rmp->context = context;
   rmp->is_ipv6 = is_ipv6;
 
diff --git a/src/vnet/gre/interface.c b/src/vnet/gre/interface.c
index ce9685d..97c4f16 100644
--- a/src/vnet/gre/interface.c
+++ b/src/vnet/gre/interface.c
@@ -31,35 +31,41 @@
 format_gre_tunnel (u8 * s, va_list * args)
 {
   gre_tunnel_t *t = va_arg (*args, gre_tunnel_t *);
-  gre_main_t *gm = &gre_main;
 
-  s = format (s, "[%d] src %U dst %U fib-idx %d sw-if-idx %d ",
-	      t - gm->tunnels,
+  s = format (s, "[%d] instance %d src %U dst %U fib-idx %d sw-if-idx %d ",
+	      t->dev_instance, t->user_instance,
 	      format_ip46_address, &t->tunnel_src, IP46_TYPE_ANY,
 	      format_ip46_address, &t->tunnel_dst.fp_addr, IP46_TYPE_ANY,
 	      t->outer_fib_index, t->sw_if_index);
 
-  s = format (s, "payload %s", gre_tunnel_type_names[t->type]);
+  s = format (s, "payload %s ", gre_tunnel_type_names[t->type]);
+
+  if (t->type == GRE_TUNNEL_TYPE_ERSPAN)
+    s = format (s, "session %d ", t->session_id);
+
+  if (t->type != GRE_TUNNEL_TYPE_L3)
+    s = format (s, "l2-adj-idx %d ", t->l2_adj_index);
 
   return s;
 }
 
 static gre_tunnel_t *
-gre_tunnel_db_find (const ip46_address_t * src,
-		    const ip46_address_t * dst,
-		    u32 out_fib_index, u8 is_ipv6, gre_tunnel_key_t * key)
+gre_tunnel_db_find (const vnet_gre_add_del_tunnel_args_t * a,
+		    u32 outer_fib_index, gre_tunnel_key_t * key)
 {
   gre_main_t *gm = &gre_main;
   uword *p;
 
-  if (!is_ipv6)
+  if (!a->is_ipv6)
     {
-      gre_mk_key4 (&src->ip4, &dst->ip4, out_fib_index, &key->gtk_v4);
+      gre_mk_key4 (a->src.ip4, a->dst.ip4, outer_fib_index,
+		   a->tunnel_type, a->session_id, &key->gtk_v4);
       p = hash_get_mem (gm->tunnel_by_key4, &key->gtk_v4);
     }
   else
     {
-      gre_mk_key6 (&src->ip6, &dst->ip6, out_fib_index, &key->gtk_v6);
+      gre_mk_key6 (&a->src.ip6, &a->dst.ip6, outer_fib_index,
+		   a->tunnel_type, a->session_id, &key->gtk_v6);
       p = hash_get_mem (gm->tunnel_by_key6, &key->gtk_v6);
     }
 
@@ -79,11 +85,11 @@
 
   if (t->tunnel_dst.fp_proto == FIB_PROTOCOL_IP6)
     {
-      hash_set_mem (gm->tunnel_by_key6, &t->key->gtk_v6, t - gm->tunnels);
+      hash_set_mem (gm->tunnel_by_key6, &t->key->gtk_v6, t->dev_instance);
     }
   else
     {
-      hash_set_mem (gm->tunnel_by_key4, &t->key->gtk_v4, t - gm->tunnels);
+      hash_set_mem (gm->tunnel_by_key4, &t->key->gtk_v4, t->dev_instance);
     }
 }
 
@@ -136,23 +142,48 @@
   gt = pool_elt_at_index (gm->tunnels,
 			  gm->tunnel_index_by_sw_if_index[sw_if_index]);
 
-  /*
-   * find the adjacency that is contributed by the FIB entry
-   * that this tunnel resovles via, and use it as the next adj
-   * in the midchain
-   */
-  if (vnet_hw_interface_get_flags (vnet_get_main (),
-				   gt->hw_if_index) &
-      VNET_HW_INTERFACE_FLAG_LINK_UP)
-    {
-      adj_nbr_midchain_stack (ai,
-			      fib_entry_contribute_ip_forwarding
-			      (gt->fib_entry_index));
-    }
-  else
+  if ((vnet_hw_interface_get_flags (vnet_get_main (), gt->hw_if_index) &
+       VNET_HW_INTERFACE_FLAG_LINK_UP) == 0)
     {
       adj_nbr_midchain_unstack (ai);
+      return;
     }
+
+  dpo_id_t tmp = DPO_INVALID;
+  fib_forward_chain_type_t fib_fwd = (FIB_PROTOCOL_IP6 == adj->ia_nh_proto) ?
+    FIB_FORW_CHAIN_TYPE_UNICAST_IP6 : FIB_FORW_CHAIN_TYPE_UNICAST_IP4;
+
+  fib_entry_contribute_forwarding (gt->fib_entry_index, fib_fwd, &tmp);
+  if (DPO_LOAD_BALANCE == tmp.dpoi_type)
+    {
+      /*
+       * post GRE rewrite we will load-balance. However, the GRE encap
+       * is always the same for this adjacency/tunnel and hence the IP/GRE
+       * src,dst hash is always the same result too. So we do that hash now and
+       * stack on the choice.
+       * If the choice is an incomplete adj then we will need a poke when
+       * it becomes complete. This happens since the adj update walk propagates
+       * as far a recursive paths.
+       */
+      const dpo_id_t *choice;
+      load_balance_t *lb;
+      int hash;
+
+      lb = load_balance_get (tmp.dpoi_index);
+
+      if (fib_fwd == FIB_FORW_CHAIN_TYPE_UNICAST_IP4)
+	hash = ip4_compute_flow_hash ((ip4_header_t *) adj_get_rewrite (ai),
+				      lb->lb_hash_config);
+      else
+	hash = ip6_compute_flow_hash ((ip6_header_t *) adj_get_rewrite (ai),
+				      lb->lb_hash_config);
+      choice =
+	load_balance_get_bucket_i (lb, hash & lb->lb_n_buckets_minus_1);
+      dpo_copy (&tmp, choice);
+    }
+
+  adj_nbr_midchain_stack (ai, &tmp);
+  dpo_reset (&tmp);
 }
 
 /**
@@ -230,7 +261,8 @@
 };
 
 static int
-vnet_gre_tunnel_add (vnet_gre_add_del_tunnel_args_t * a, u32 * sw_if_indexp)
+vnet_gre_tunnel_add (vnet_gre_add_del_tunnel_args_t * a,
+		     u32 outer_fib_index, u32 * sw_if_indexp)
 {
   gre_main_t *gm = &gre_main;
   vnet_main_t *vnm = gm->vnet_main;
@@ -239,112 +271,67 @@
   gre_tunnel_t *t;
   vnet_hw_interface_t *hi;
   u32 hw_if_index, sw_if_index;
-  u32 outer_fib_index;
-  u8 address[6];
   clib_error_t *error;
   u8 is_ipv6 = a->is_ipv6;
   gre_tunnel_key_t key;
 
-  if (!is_ipv6)
-    outer_fib_index = ip4_fib_index_from_table_id (a->outer_fib_id);
-  else
-    outer_fib_index = ip6_fib_index_from_table_id (a->outer_fib_id);
-
-  if (~0 == outer_fib_index)
-    return VNET_API_ERROR_NO_SUCH_FIB;
-
-  t =
-    gre_tunnel_db_find (&a->src, &a->dst, outer_fib_index, a->is_ipv6, &key);
-
+  t = gre_tunnel_db_find (a, outer_fib_index, &key);
   if (NULL != t)
-    return VNET_API_ERROR_INVALID_VALUE;
+    return VNET_API_ERROR_IF_ALREADY_EXISTS;
 
   pool_get_aligned (gm->tunnels, t, CLIB_CACHE_LINE_BYTES);
   memset (t, 0, sizeof (*t));
+
+  /* Reconcile the real dev_instance and a possible requested instance */
+  u32 t_idx = t - gm->tunnels;	/* tunnel index (or instance) */
+  u32 u_idx = a->instance;	/* user specified instance */
+  if (u_idx == ~0)
+    u_idx = t_idx;
+  if (hash_get (gm->instance_used, u_idx))
+    {
+      pool_put (gm->tunnels, t);
+      return VNET_API_ERROR_INSTANCE_IN_USE;
+    }
+  hash_set (gm->instance_used, u_idx, 1);
+
+  t->dev_instance = t_idx;	/* actual */
+  t->user_instance = u_idx;	/* name */
   fib_node_init (&t->node, FIB_NODE_TYPE_GRE_TUNNEL);
 
-  if (a->teb)
-    t->type = GRE_TUNNEL_TYPE_TEB;
-  else
-    t->type = GRE_TUNNEL_TYPE_L3;
+  t->type = a->tunnel_type;
+  if (t->type == GRE_TUNNEL_TYPE_ERSPAN)
+    t->session_id = a->session_id;
 
-  if (vec_len (gm->free_gre_tunnel_hw_if_indices[t->type]) > 0)
-    {
-      vnet_interface_main_t *im = &vnm->interface_main;
-
-      hw_if_index = gm->free_gre_tunnel_hw_if_indices[t->type]
-	[vec_len (gm->free_gre_tunnel_hw_if_indices[t->type]) - 1];
-      _vec_len (gm->free_gre_tunnel_hw_if_indices[t->type]) -= 1;
-
-      hi = vnet_get_hw_interface (vnm, hw_if_index);
-      hi->dev_instance = t - gm->tunnels;
-      hi->hw_instance = hi->dev_instance;
-
-      /* clear old stats of freed tunnel before reuse */
-      sw_if_index = hi->sw_if_index;
-      vnet_interface_counter_lock (im);
-      vlib_zero_combined_counter
-	(&im->combined_sw_if_counters[VNET_INTERFACE_COUNTER_TX],
-	 sw_if_index);
-      vlib_zero_combined_counter (&im->combined_sw_if_counters
-				  [VNET_INTERFACE_COUNTER_RX], sw_if_index);
-      vlib_zero_simple_counter (&im->sw_if_counters
-				[VNET_INTERFACE_COUNTER_DROP], sw_if_index);
-      vnet_interface_counter_unlock (im);
-      if (GRE_TUNNEL_TYPE_TEB == t->type)
-	{
-	  t->l2_tx_arc = vlib_node_add_named_next (vlib_get_main (),
-						   hi->tx_node_index,
-						   "adj-l2-midchain");
-	}
-    }
+  if (t->type == GRE_TUNNEL_TYPE_L3)
+    hw_if_index = vnet_register_interface (vnm, gre_device_class.index, t_idx,
+					   gre_hw_interface_class.index,
+					   t_idx);
   else
     {
-      if (GRE_TUNNEL_TYPE_TEB == t->type)
+      /* Default MAC address (d00b:eed0:0000 + sw_if_index) */
+      u8 address[6] = { 0xd0, 0x0b, 0xee, 0xd0, (u8) t_idx >> 8, (u8) t_idx };
+      error = ethernet_register_interface (vnm, gre_device_class.index, t_idx,
+					   address, &hw_if_index, 0);
+      if (error)
 	{
-	  /* Default MAC address (d00b:eed0:0000 + sw_if_index) */
-	  memset (address, 0, sizeof (address));
-	  address[0] = 0xd0;
-	  address[1] = 0x0b;
-	  address[2] = 0xee;
-	  address[3] = 0xd0;
-	  address[4] = t - gm->tunnels;
-
-	  error = ethernet_register_interface (vnm,
-					       gre_device_teb_class.index,
-					       t - gm->tunnels, address,
-					       &hw_if_index, 0);
-
-	  if (error)
-	    {
-	      clib_error_report (error);
-	      return VNET_API_ERROR_INVALID_REGISTRATION;
-	    }
-	  hi = vnet_get_hw_interface (vnm, hw_if_index);
-
-	  t->l2_tx_arc = vlib_node_add_named_next (vlib_get_main (),
-						   hi->tx_node_index,
-						   "adj-l2-midchain");
+	  clib_error_report (error);
+	  return VNET_API_ERROR_INVALID_REGISTRATION;
 	}
-      else
-	{
-	  hw_if_index = vnet_register_interface (vnm,
-						 gre_device_class.index,
-						 t - gm->tunnels,
-						 gre_hw_interface_class.index,
-						 t - gm->tunnels);
-	}
-      hi = vnet_get_hw_interface (vnm, hw_if_index);
-      sw_if_index = hi->sw_if_index;
     }
 
+  /* Set GRE tunnel interface output node (not used for L3 payload) */
+  vnet_set_interface_output_node (vnm, hw_if_index, gre_encap_node.index);
+
+  hi = vnet_get_hw_interface (vnm, hw_if_index);
+  sw_if_index = hi->sw_if_index;
+
   t->hw_if_index = hw_if_index;
   t->outer_fib_index = outer_fib_index;
   t->sw_if_index = sw_if_index;
   t->l2_adj_index = ADJ_INDEX_INVALID;
 
   vec_validate_init_empty (gm->tunnel_index_by_sw_if_index, sw_if_index, ~0);
-  gm->tunnel_index_by_sw_if_index[sw_if_index] = t - gm->tunnels;
+  gm->tunnel_index_by_sw_if_index[sw_if_index] = t_idx;
 
   if (!is_ipv6)
     {
@@ -378,20 +365,37 @@
   t->tunnel_dst.fp_addr = a->dst;
 
   gre_tunnel_db_add (t, &key);
-
-  t->fib_entry_index =
-    fib_table_entry_special_add (outer_fib_index,
-				 &t->tunnel_dst,
-				 FIB_SOURCE_RR, FIB_ENTRY_FLAG_NONE);
-  t->sibling_index =
-    fib_entry_child_add (t->fib_entry_index,
-			 FIB_NODE_TYPE_GRE_TUNNEL, t - gm->tunnels);
-
-  if (GRE_TUNNEL_TYPE_TEB == t->type)
+  if (t->type == GRE_TUNNEL_TYPE_ERSPAN)
     {
-      t->l2_adj_index = adj_nbr_add_or_lock (t->tunnel_dst.fp_proto,
-					     VNET_LINK_ETHERNET,
-					     &zero_addr, sw_if_index);
+      gre_sn_key_t skey;
+      gre_sn_t *gre_sn;
+
+      gre_mk_sn_key (t, &skey);
+      gre_sn = (gre_sn_t *) hash_get_mem (gm->seq_num_by_key, &skey);
+      if (gre_sn != NULL)
+	{
+	  gre_sn->ref_count++;
+	  t->gre_sn = gre_sn;
+	}
+      else
+	{
+	  gre_sn = clib_mem_alloc (sizeof (gre_sn_t));
+	  gre_sn->seq_num = 0;
+	  gre_sn->ref_count = 1;
+	  t->gre_sn = gre_sn;
+	  hash_set_mem_alloc (&gm->seq_num_by_key, &skey, (uword) gre_sn);
+	}
+    }
+
+  t->fib_entry_index = fib_table_entry_special_add
+    (outer_fib_index, &t->tunnel_dst, FIB_SOURCE_RR, FIB_ENTRY_FLAG_NONE);
+  t->sibling_index = fib_entry_child_add
+    (t->fib_entry_index, FIB_NODE_TYPE_GRE_TUNNEL, t_idx);
+
+  if (t->type != GRE_TUNNEL_TYPE_L3)
+    {
+      t->l2_adj_index = adj_nbr_add_or_lock
+	(t->tunnel_dst.fp_proto, VNET_LINK_ETHERNET, &zero_addr, sw_if_index);
       gre_update_adj (vnm, t->sw_if_index, t->l2_adj_index);
     }
 
@@ -403,38 +407,29 @@
 
 static int
 vnet_gre_tunnel_delete (vnet_gre_add_del_tunnel_args_t * a,
-			u32 * sw_if_indexp)
+			u32 outer_fib_index, u32 * sw_if_indexp)
 {
   gre_main_t *gm = &gre_main;
   vnet_main_t *vnm = gm->vnet_main;
   gre_tunnel_t *t;
   gre_tunnel_key_t key;
   u32 sw_if_index;
-  u32 outer_fib_index;
 
-  if (!a->is_ipv6)
-    outer_fib_index = ip4_fib_index_from_table_id (a->outer_fib_id);
-  else
-    outer_fib_index = ip6_fib_index_from_table_id (a->outer_fib_id);
-
-  if (~0 == outer_fib_index)
-    return VNET_API_ERROR_NO_SUCH_FIB;
-
-  t =
-    gre_tunnel_db_find (&a->src, &a->dst, outer_fib_index, a->is_ipv6, &key);
-
+  t = gre_tunnel_db_find (a, outer_fib_index, &key);
   if (NULL == t)
     return VNET_API_ERROR_NO_SUCH_ENTRY;
 
   sw_if_index = t->sw_if_index;
   vnet_sw_interface_set_flags (vnm, sw_if_index, 0 /* down */ );
+
   /* make sure tunnel is removed from l2 bd or xconnect */
   set_int_l2_mode (gm->vlib_main, vnm, MODE_L3, sw_if_index, 0, 0, 0, 0);
-  vec_add1 (gm->free_gre_tunnel_hw_if_indices[t->type], t->hw_if_index);
   gm->tunnel_index_by_sw_if_index[sw_if_index] = ~0;
 
-  if (GRE_TUNNEL_TYPE_TEB == t->type)
-    adj_unlock (t->l2_adj_index);
+  if (t->type == GRE_TUNNEL_TYPE_L3)
+    vnet_delete_hw_interface (vnm, t->hw_if_index);
+  else
+    ethernet_delete_interface (vnm, t->hw_if_index);
 
   if (t->l2_adj_index != ADJ_INDEX_INVALID)
     adj_unlock (t->l2_adj_index);
@@ -442,6 +437,16 @@
   fib_entry_child_remove (t->fib_entry_index, t->sibling_index);
   fib_table_entry_delete_index (t->fib_entry_index, FIB_SOURCE_RR);
 
+  ASSERT ((t->type != GRE_TUNNEL_TYPE_ERSPAN) || (t->gre_sn != NULL));
+  if ((t->type == GRE_TUNNEL_TYPE_ERSPAN) && (t->gre_sn->ref_count-- == 1))
+    {
+      gre_sn_key_t skey;
+      gre_mk_sn_key (t, &skey);
+      hash_unset_mem_free (&gm->seq_num_by_key, &skey);
+      clib_mem_free (t->gre_sn);
+    }
+
+  hash_unset (gm->instance_used, t->user_instance);
   gre_tunnel_db_remove (t);
   fib_node_deinit (&t->node);
   pool_put (gm->tunnels, t);
@@ -456,10 +461,23 @@
 vnet_gre_add_del_tunnel (vnet_gre_add_del_tunnel_args_t * a,
 			 u32 * sw_if_indexp)
 {
-  if (a->is_add)
-    return (vnet_gre_tunnel_add (a, sw_if_indexp));
+  u32 outer_fib_index;
+
+  if (!a->is_ipv6)
+    outer_fib_index = ip4_fib_index_from_table_id (a->outer_fib_id);
   else
-    return (vnet_gre_tunnel_delete (a, sw_if_indexp));
+    outer_fib_index = ip6_fib_index_from_table_id (a->outer_fib_id);
+
+  if (~0 == outer_fib_index)
+    return VNET_API_ERROR_NO_SUCH_FIB;
+
+  if (a->session_id > GTK_SESSION_ID_MAX)
+    return VNET_API_ERROR_INVALID_SESSION_ID;
+
+  if (a->is_add)
+    return (vnet_gre_tunnel_add (a, outer_fib_index, sw_if_indexp));
+  else
+    return (vnet_gre_tunnel_delete (a, outer_fib_index, sw_if_indexp));
 }
 
 clib_error_t *
@@ -503,8 +521,10 @@
   unformat_input_t _line_input, *line_input = &_line_input;
   vnet_gre_add_del_tunnel_args_t _a, *a = &_a;
   ip46_address_t src, dst;
+  u32 instance = ~0;
   u32 outer_fib_id = 0;
-  u8 teb = 0;
+  gre_tunnel_type_t t_type = GRE_TUNNEL_TYPE_L3;
+  u32 session_id = 0;
   int rv;
   u32 num_m_args = 0;
   u8 is_add = 1;
@@ -521,6 +541,8 @@
     {
       if (unformat (line_input, "del"))
 	is_add = 0;
+      else if (unformat (line_input, "instance %d", &instance))
+	;
       else
 	if (unformat (line_input, "src %U", unformat_ip4_address, &src.ip4))
 	{
@@ -548,7 +570,9 @@
       else if (unformat (line_input, "outer-fib-id %d", &outer_fib_id))
 	;
       else if (unformat (line_input, "teb"))
-	teb = 1;
+	t_type = GRE_TUNNEL_TYPE_TEB;
+      else if (unformat (line_input, "erspan %d", &session_id))
+	t_type = GRE_TUNNEL_TYPE_ERSPAN;
       else
 	{
 	  error = clib_error_return (0, "unknown input `%U'",
@@ -582,9 +606,12 @@
     }
 
   memset (a, 0, sizeof (*a));
+  a->is_add = is_add;
   a->outer_fib_id = outer_fib_id;
-  a->teb = teb;
+  a->tunnel_type = t_type;
+  a->session_id = session_id;
   a->is_ipv6 = ipv6_set;
+  a->instance = instance;
   if (!ipv6_set)
     {
       clib_memcpy (&a->src.ip4, &src.ip4, sizeof (src.ip4));
@@ -596,10 +623,7 @@
       clib_memcpy (&a->dst.ip6, &dst.ip6, sizeof (dst.ip6));
     }
 
-  if (is_add)
-    rv = vnet_gre_tunnel_add (a, &sw_if_index);
-  else
-    rv = vnet_gre_tunnel_delete (a, &sw_if_index);
+  rv = vnet_gre_add_del_tunnel (a, &sw_if_index);
 
   switch (rv)
     {
@@ -607,13 +631,23 @@
       vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name,
 		       vnet_get_main (), sw_if_index);
       break;
-    case VNET_API_ERROR_INVALID_VALUE:
+    case VNET_API_ERROR_IF_ALREADY_EXISTS:
       error = clib_error_return (0, "GRE tunnel already exists...");
       goto done;
     case VNET_API_ERROR_NO_SUCH_FIB:
       error = clib_error_return (0, "outer fib ID %d doesn't exist\n",
 				 outer_fib_id);
       goto done;
+    case VNET_API_ERROR_NO_SUCH_ENTRY:
+      error = clib_error_return (0, "GRE tunnel doesn't exist");
+      goto done;
+    case VNET_API_ERROR_INVALID_SESSION_ID:
+      error = clib_error_return (0, "session ID %d out of range\n",
+				 session_id);
+      goto done;
+    case VNET_API_ERROR_INSTANCE_IN_USE:
+      error = clib_error_return (0, "Instance is in use");
+      goto done;
     default:
       error =
 	clib_error_return (0, "vnet_gre_add_del_tunnel returned %d", rv);
@@ -629,8 +663,8 @@
 /* *INDENT-OFF* */
 VLIB_CLI_COMMAND (create_gre_tunnel_command, static) = {
   .path = "create gre tunnel",
-  .short_help = "create gre tunnel src <addr> dst <addr> "
-                "[outer-fib-id <fib>] [teb] [del]",
+  .short_help = "create gre tunnel src <addr> dst <addr> [instance <n>] "
+                "[outer-fib-id <fib>] [teb | erspan <session-id>] [del]",
   .function = create_gre_tunnel_command_fn,
 };
 /* *INDENT-ON* */
diff --git a/src/vnet/gre/node.c b/src/vnet/gre/node.c
index 7223b01..ee32e60 100644
--- a/src/vnet/gre/node.c
+++ b/src/vnet/gre/node.c
@@ -164,8 +164,10 @@
 	  protocol1 = h1->protocol;
 	  sparse_vec_index2 (gm->next_by_protocol, protocol0, protocol1,
 			     &i0, &i1);
-	  next0 = vec_elt (gm->next_by_protocol, i0);
-	  next1 = vec_elt (gm->next_by_protocol, i1);
+	  next0 = vec_elt (gm->next_by_protocol, i0).next_index;
+	  next1 = vec_elt (gm->next_by_protocol, i1).next_index;
+	  u8 ttype0 = vec_elt (gm->next_by_protocol, i0).tunnel_type;
+	  u8 ttype1 = vec_elt (gm->next_by_protocol, i1).tunnel_type;
 
 	  b0->error =
 	    node->errors[i0 ==
@@ -190,22 +192,21 @@
 
 
 	  /* RPF check for ip4/ip6 input */
-	  if (PREDICT_TRUE (next0 == GRE_INPUT_NEXT_IP4_INPUT
-			    || next0 == GRE_INPUT_NEXT_IP6_INPUT
-			    || next0 == GRE_INPUT_NEXT_ETHERNET_INPUT
-			    || next0 == GRE_INPUT_NEXT_MPLS_INPUT))
+	  if (PREDICT_TRUE (next0 > GRE_INPUT_NEXT_DROP))
 	    {
 	      if (is_ipv6)
 		{
 		  gre_mk_key6 (&ip6_0->dst_address,
 			       &ip6_0->src_address,
-			       vnet_buffer (b0)->ip.fib_index, &key0.gtk_v6);
+			       vnet_buffer (b0)->ip.fib_index,
+			       ttype0, 0, &key0.gtk_v6);
 		}
 	      else
 		{
-		  gre_mk_key4 (&ip4_0->dst_address,
-			       &ip4_0->src_address,
-			       vnet_buffer (b0)->ip.fib_index, &key0.gtk_v4);
+		  gre_mk_key4 (ip4_0->dst_address,
+			       ip4_0->src_address,
+			       vnet_buffer (b0)->ip.fib_index,
+			       ttype0, 0, &key0.gtk_v4);
 		}
 
 	      if ((!is_ipv6 && !gre_match_key4 (&cached_tunnel_key.gtk_v4,
@@ -264,22 +265,21 @@
 	  vnet_buffer (b0)->sw_if_index[VLIB_RX] = tunnel_sw_if_index;
 
 	drop0:
-	  if (PREDICT_TRUE (next1 == GRE_INPUT_NEXT_IP4_INPUT
-			    || next1 == GRE_INPUT_NEXT_IP6_INPUT
-			    || next1 == GRE_INPUT_NEXT_ETHERNET_INPUT
-			    || next1 == GRE_INPUT_NEXT_MPLS_INPUT))
+	  if (PREDICT_TRUE (next1 > GRE_INPUT_NEXT_DROP))
 	    {
 	      if (is_ipv6)
 		{
 		  gre_mk_key6 (&ip6_1->dst_address,
 			       &ip6_1->src_address,
-			       vnet_buffer (b1)->ip.fib_index, &key1.gtk_v6);
+			       vnet_buffer (b1)->ip.fib_index,
+			       ttype1, 0, &key1.gtk_v6);
 		}
 	      else
 		{
-		  gre_mk_key4 (&ip4_1->dst_address,
-			       &ip4_1->src_address,
-			       vnet_buffer (b1)->ip.fib_index, &key1.gtk_v4);
+		  gre_mk_key4 (ip4_1->dst_address,
+			       ip4_1->src_address,
+			       vnet_buffer (b1)->ip.fib_index,
+			       ttype1, 0, &key1.gtk_v4);
 		}
 
 	      if ((!is_ipv6 && !gre_match_key4 (&cached_tunnel_key.gtk_v4,
@@ -423,7 +423,8 @@
 	  h0 = vlib_buffer_get_current (b0);
 
 	  i0 = sparse_vec_index (gm->next_by_protocol, h0->protocol);
-	  next0 = vec_elt (gm->next_by_protocol, i0);
+	  next0 = vec_elt (gm->next_by_protocol, i0).next_index;
+	  u8 ttype0 = vec_elt (gm->next_by_protocol, i0).tunnel_type;
 
 	  b0->error =
 	    node->errors[i0 == SPARSE_VEC_INVALID_INDEX
@@ -440,22 +441,21 @@
 	     so we can increase counters and help forward node to
 	     pick right FIB */
 	  /* RPF check for ip4/ip6 input */
-	  if (PREDICT_TRUE (next0 == GRE_INPUT_NEXT_IP4_INPUT
-			    || next0 == GRE_INPUT_NEXT_IP6_INPUT
-			    || next0 == GRE_INPUT_NEXT_ETHERNET_INPUT
-			    || next0 == GRE_INPUT_NEXT_MPLS_INPUT))
+	  if (PREDICT_TRUE (next0 > GRE_INPUT_NEXT_DROP))
 	    {
 	      if (is_ipv6)
 		{
 		  gre_mk_key6 (&ip6_0->dst_address,
 			       &ip6_0->src_address,
-			       vnet_buffer (b0)->ip.fib_index, &key0.gtk_v6);
+			       vnet_buffer (b0)->ip.fib_index,
+			       ttype0, 0, &key0.gtk_v6);
 		}
 	      else
 		{
-		  gre_mk_key4 (&ip4_0->dst_address,
-			       &ip4_0->src_address,
-			       vnet_buffer (b0)->ip.fib_index, &key0.gtk_v4);
+		  gre_mk_key4 (ip4_0->dst_address,
+			       ip4_0->src_address,
+			       vnet_buffer (b0)->ip.fib_index,
+			       ttype0, 0, &key0.gtk_v4);
 		}
 
 	      if ((!is_ipv6 && !gre_match_key4 (&cached_tunnel_key.gtk_v4,
@@ -592,9 +592,7 @@
   .format_trace = format_gre_rx_trace,
   .unformat_buffer = unformat_gre_header,
 };
-/* *INDENT-ON* */
 
-/* *INDENT-OFF* */
 VLIB_REGISTER_NODE (gre6_input_node) = {
   .function = gre6_input,
   .name = "gre6-input",
@@ -617,17 +615,19 @@
   .format_trace = format_gre_rx_trace,
   .unformat_buffer = unformat_gre_header,
 };
-/* *INDENT-ON* */
 
 VLIB_NODE_FUNCTION_MULTIARCH (gre4_input_node, gre4_input)
 VLIB_NODE_FUNCTION_MULTIARCH (gre6_input_node, gre6_input)
-     void
-       gre_register_input_protocol (vlib_main_t * vm,
-				    gre_protocol_t protocol, u32 node_index)
+/* *INDENT-ON* */
+
+void
+gre_register_input_protocol (vlib_main_t * vm,
+			     gre_protocol_t protocol, u32 node_index,
+			     gre_tunnel_type_t tunnel_type)
 {
   gre_main_t *em = &gre_main;
   gre_protocol_info_t *pi;
-  u16 *n;
+  next_info_t *n;
   u32 i;
 
   {
@@ -638,6 +638,7 @@
 
   pi = gre_get_protocol_info (em, protocol);
   pi->node_index = node_index;
+  pi->tunnel_type = tunnel_type;
   pi->next_index = vlib_node_add_next (vm, gre4_input_node.index, node_index);
   i = vlib_node_add_next (vm, gre6_input_node.index, node_index);
   ASSERT (i == pi->next_index);
@@ -645,7 +646,8 @@
   /* Setup gre protocol -> next index sparse vector mapping. */
   n = sparse_vec_validate (em->next_by_protocol,
 			   clib_host_to_net_u16 (protocol));
-  n[0] = pi->next_index;
+  n->next_index = pi->next_index;
+  n->tunnel_type = tunnel_type;
 }
 
 static void
@@ -689,14 +691,17 @@
   mpls_unicast_input = vlib_get_node_by_name (vm, (u8 *) "mpls-input");
   ASSERT (mpls_unicast_input);
 
-  gre_register_input_protocol (vm, GRE_PROTOCOL_teb, ethernet_input->index);
+  gre_register_input_protocol (vm, GRE_PROTOCOL_teb,
+			       ethernet_input->index, GRE_TUNNEL_TYPE_TEB);
 
-  gre_register_input_protocol (vm, GRE_PROTOCOL_ip4, ip4_input->index);
+  gre_register_input_protocol (vm, GRE_PROTOCOL_ip4,
+			       ip4_input->index, GRE_TUNNEL_TYPE_L3);
 
-  gre_register_input_protocol (vm, GRE_PROTOCOL_ip6, ip6_input->index);
+  gre_register_input_protocol (vm, GRE_PROTOCOL_ip6,
+			       ip6_input->index, GRE_TUNNEL_TYPE_L3);
 
   gre_register_input_protocol (vm, GRE_PROTOCOL_mpls_unicast,
-			       mpls_unicast_input->index);
+			       mpls_unicast_input->index, GRE_TUNNEL_TYPE_L3);
 
   ip4_register_protocol (IP_PROTOCOL_GRE, gre4_input_node.index);
   ip6_register_protocol (IP_PROTOCOL_GRE, gre6_input_node.index);
diff --git a/src/vnet/gre/packet.h b/src/vnet/gre/packet.h
index 64b39f2..034a611 100644
--- a/src/vnet/gre/packet.h
+++ b/src/vnet/gre/packet.h
@@ -24,6 +24,7 @@
 _ (0x6558, teb)                                 \
 _ (0x0806, arp)					\
 _ (0x8847, mpls_unicast)			\
+_ (0x88BE, erspan)				\
 _ (0x894F, nsh)
 
 typedef enum
@@ -54,6 +55,111 @@
   u16 protocol;
 } gre_header_t;
 
+/* From draft-foschiano-erspan-03.txt
+
+   Different frame variants known as "ERSPAN Types" can be
+   distinguished based on the GRE "Protocol Type" field value: Type I
+   and II's value is 0x88BE while Type III's is 0x22EB [ETYPES].
+
+         GRE header for ERSPAN Type II encapsulation (8 octets [34:41])
+       0                   1                   2                   3
+       0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |0|0|0|1|0|00000|000000000|00000|   Protocol Type for ERSPAN    |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |      Sequence Number (increments per packet per session)      |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+   Note that in the above GRE header [RFC1701] out of the C, R, K, S,
+   s, Recur, Flags, Version fields only S (bit 03) may be set to 1. The
+   other fields are always set to zero.
+
+   ERSPAN Type II's frame format also adds a special 8-octet ERSPAN
+   "feature" header on top of the MAC/IPv4/GRE headers to enclose the
+   raw mirrored frames.
+
+   The ERSPAN Type II feature header is described below:
+
+                     ERSPAN Type II header (8 octets [42:49])
+       0                   1                   2                   3
+       0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |  Ver  |          VLAN         | COS | En|T|    Session ID     |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |      Reserved         |                  Index                |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+  The various fields of the above header are described in this table:
+
+   Field         Position    Length          Definition
+                [octet:bit]  (bits)
+
+   Ver            [42:0]       4      ERSPAN Encapsulation version.
+                                      This indicates the version of
+                                      the ERSPAN encapsulation
+                                      specification. Set to 0x1 for
+                                      Type II.
+
+   VLAN           [42:4]      12      Original VLAN of the frame,
+                                      mirrored from the source.
+                                      If the En field is set to 11,
+                                      the value of VLAN is undefined.
+
+   COS            [44:0]       3      Original class of service of the
+                                      frame, mirrored from the source.
+
+   En             [44:3]       2      The trunk encapsulation type
+                                      associated with the ERSPAN source
+                                      port for ingress ERSPAN traffic.
+
+                                      The possible values are:
+                                      00-originally without VLAN tag
+                                      01-originally ISL encapsulated
+                                      10-originally 802.1Q encapsulated
+                                      11-VLAN tag preserved in frame.
+
+   T              [44:5]       1      This bit indicates that the frame
+                                      copy encapsulated in the ERSPAN
+                                      packet has been truncated. This
+                                      occurs if the ERSPAN encapsulated
+                                      frame exceeds the configured MTU.
+
+   Session ID     [44:6]      10      Identification associated with
+   (ERSPAN ID)                        each ERSPAN session. Must be
+                                     unique between the source and the
+                                      receiver(s). (See section below.)
+
+   Reserved       [46:0]      12      All bits are set to zero
+
+   Index          [47:4]      20      A 20 bit index/port number
+                                      associated with the ERSPAN
+                                      traffic's port and
+                                      direction (ingress/egress). N.B.:
+                                      This field is platform dependent.
+*/
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct {
+  u32 seq_num;
+  union
+  {
+    struct
+    {
+      u16 ver_vlan;
+      u16 cos_en_t_session;
+      u32 res_index;
+    } t2;
+    u64 t2_u64;
+  };
+}) erspan_t2_t;
+
+typedef CLIB_PACKED (struct {
+  gre_header_t gre;
+  erspan_t2_t erspan;
+}) erspan_t2_header_t;
+
+/* *INDENT-ON* */
+
 #endif /* included_vnet_gre_packet_h */
 
 /*