Pipes

A pipe resembles a unix pipe. Each end of the pipe is a full
VPP interface.
pipes can be used for e.g. packet recirculation, inter-BD, etc.

Change-Id: I185bb9fb43dd233ff45da63ac1b85ae2e1ceca16
Signed-off-by: Neale Ranns <neale.ranns@cisco.com>
diff --git a/src/vnet/devices/pipe/pipe.api b/src/vnet/devices/pipe/pipe.api
new file mode 100644
index 0000000..d3dfd16
--- /dev/null
+++ b/src/vnet/devices/pipe/pipe.api
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2018 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** \file
+
+    This file defines vpe control-plane API messages for
+    the Linux kernel PIPE device driver
+*/
+
+option version = "1.0.0";
+
+/** \brief Initialize a new pipe interface with the given paramters
+    @param client_index - opaque cookie to identify the sender
+    @param context - sender context, to match reply w/ request
+    @param is_specified - if non-0, a specific user_instance is being requested
+    @param user_instance - requested instance, ~0 => dynamically allocate
+*/
+define pipe_create
+{
+  u32 client_index;
+  u32 context;
+  u8 is_specified;
+  u32 user_instance;
+};
+
+/** \brief Reply for pipe create reply
+    @param context - returned sender context, to match reply w/ request
+    @param retval - return code
+    @param parent_sw_if_index - software index allocated for the new pipe parent interface
+                                Use the parent interface for link up/down and to delete
+    @param pipe_sw_if_index - the two SW indicies that form the ends of the pipe.
+*/
+define pipe_create_reply
+{
+  u32 context;
+  i32 retval;
+  u32 parent_sw_if_index;
+  u32 pipe_sw_if_index[2];
+};
+
+/** \brief Delete pipe interface
+    @param client_index - opaque cookie to identify the sender
+    @param context - sender context, to match reply w/ request
+    @param parnet_sw_if_index - interface index of existing pipe interface
+*/
+autoreply define pipe_delete
+{
+  u32 client_index;
+  u32 context;
+  u32 parent_sw_if_index;
+};
+
+/** \brief Dump pipe interfaces request */
+define pipe_dump
+{
+  u32 client_index;
+  u32 context;
+};
+
+/** \brief Reply for pipe dump request
+    @param sw_if_index - software index of pipe interface
+    @param parent_sw_if_index - software index allocated for the pipe parent interface
+    @param pipe_sw_if_index - the two SW indicies that form the ends of the pipe.
+    @param instance - instance allocated
+*/
+define pipe_details
+{
+  u32 context;
+  u32 parent_sw_if_index;
+  u32 pipe_sw_if_index[2];
+  u32 instance;
+};
+
+/*
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/devices/pipe/pipe.c b/src/vnet/devices/pipe/pipe.c
new file mode 100644
index 0000000..29f54e1
--- /dev/null
+++ b/src/vnet/devices/pipe/pipe.c
@@ -0,0 +1,802 @@
+/*
+ * Copyright (c) 2018 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/devices/pipe/pipe.h>
+
+#include <vppinfra/sparse_vec.h>
+
+/**
+ * @file
+ * @brief Pipe Interfaces.
+ *
+ * A pipe interface, like the UNIX pipe, is a pair of interfaces
+ * that are joined.
+ */
+const static pipe_t PIPE_INVALID = {
+  .sw_if_index = ~0,
+  .subint = {0},
+};
+
+/**
+ * Various 'module' lavel variables
+ */
+typedef struct pipe_main_t_
+{
+  /**
+   * Allocated pipe instances
+   */
+  uword *instances;
+
+  /**
+   * the per-swif-index array of pipes. Each end of the pipe is stored againt
+   * its respective sw_if_index
+   */
+  pipe_t *pipes;
+} pipe_main_t;
+
+static pipe_main_t pipe_main;
+
+/*
+ * The pipe rewrite is the same size as an ethernet header (since it
+ * is an ethernet interface and the DP is optimised for writing
+ * sizeof(ethernet_header_t) rewirtes. Hwoever, there are no MAC addresses
+ * since pipes don't have them.
+ */
+static u8 *
+pipe_build_rewrite (vnet_main_t * vnm,
+		    u32 sw_if_index,
+		    vnet_link_t link_type, const void *dst_address)
+{
+  ethernet_header_t *h;
+  ethernet_type_t type;
+  u8 *rewrite = NULL;
+
+  switch (link_type)
+    {
+#define _(a,b) case VNET_LINK_##a: type = ETHERNET_TYPE_##b; break
+      _(IP4, IP4);
+      _(IP6, IP6);
+      _(MPLS, MPLS);
+      _(ARP, ARP);
+#undef _
+    default:
+      return NULL;
+    }
+
+  vec_validate (rewrite, sizeof (ethernet_header_t));
+
+  h = (ethernet_header_t *) rewrite;
+  h->type = clib_host_to_net_u16 (type);
+
+  return (rewrite);
+}
+
+/* *INDENT-OFF* */
+VNET_HW_INTERFACE_CLASS (pipe_hw_interface_class) = {
+  .name = "Pipe",
+  .build_rewrite = pipe_build_rewrite,
+  .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
+};
+/* *INDENT-ON* */
+
+pipe_t *
+pipe_get (u32 sw_if_index)
+{
+  vec_validate_init_empty (pipe_main.pipes, sw_if_index, PIPE_INVALID);
+
+  return (&pipe_main.pipes[sw_if_index]);
+}
+
+uword
+unformat_pipe_interface (unformat_input_t * input, va_list * args)
+{
+  vnet_main_t *vnm = va_arg (*args, vnet_main_t *);
+  u32 *result = va_arg (*args, u32 *);
+  u32 hw_if_index;
+  ethernet_main_t *em = &ethernet_main;
+  ethernet_interface_t *eif;
+
+  if (!unformat_user (input, unformat_vnet_hw_interface, vnm, &hw_if_index))
+    return 0;
+
+  eif = ethernet_get_interface (em, hw_if_index);
+  if (eif)
+    {
+      *result = hw_if_index;
+      return 1;
+    }
+  return 0;
+}
+
+#define VNET_PIPE_TX_NEXT_ETHERNET_INPUT VNET_INTERFACE_TX_N_NEXT
+
+/*
+ * The TX function bounces the packets back to pipe-rx with the TX interface
+ * swapped to the RX.
+ */
+static uword
+pipe_tx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+  u32 n_left_from, n_left_to_next, n_copy, *from, *to_next;
+  u32 next_index = VNET_PIPE_TX_NEXT_ETHERNET_INPUT;
+  u32 i, sw_if_index = 0;
+  u32 n_pkts = 0, n_bytes = 0;
+  u32 thread_index = vm->thread_index;
+  vnet_main_t *vnm = vnet_get_main ();
+  vnet_interface_main_t *im = &vnm->interface_main;
+  vlib_buffer_t *b;
+  pipe_t *pipe;
+
+  n_left_from = frame->n_vectors;
+  from = vlib_frame_args (frame);
+
+  while (n_left_from > 0)
+    {
+      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+      n_copy = clib_min (n_left_from, n_left_to_next);
+
+      clib_memcpy (to_next, from, n_copy * sizeof (from[0]));
+      n_left_to_next -= n_copy;
+      n_left_from -= n_copy;
+      i = 0;
+      while (i < n_copy)
+	{
+	  b = vlib_get_buffer (vm, from[i]);
+	  sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_TX];
+
+	  pipe = &pipe_main.pipes[sw_if_index];
+	  // Set up RX index to be recv'd by the other end of the pipe
+	  vnet_buffer (b)->sw_if_index[VLIB_RX] = pipe->sw_if_index;
+	  vnet_buffer (b)->sw_if_index[VLIB_TX] = ~0;
+
+	  i++;
+	  n_pkts++;
+	  n_bytes += vlib_buffer_length_in_chain (vm, b);
+	}
+      from += n_copy;
+
+      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+
+      /* increment TX interface stat */
+      vlib_increment_combined_counter (im->combined_sw_if_counters +
+				       VNET_INTERFACE_COUNTER_TX,
+				       thread_index, sw_if_index, n_pkts,
+				       n_bytes);
+    }
+
+  return n_left_from;
+}
+
+static u8 *
+format_pipe_name (u8 * s, va_list * args)
+{
+  u32 dev_instance = va_arg (*args, u32);
+  return format (s, "pipe%d", dev_instance);
+}
+
+static clib_error_t *
+pipe_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
+{
+  vnet_hw_interface_t *hi;
+  u32 id, sw_if_index;
+
+  u32 hw_flags = ((flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) ?
+		  VNET_HW_INTERFACE_FLAG_LINK_UP : 0);
+  vnet_hw_interface_set_flags (vnm, hw_if_index, hw_flags);
+
+  /* *INDENT-OFF* */
+  hi = vnet_get_hw_interface (vnm, hw_if_index);
+  hash_foreach (id, sw_if_index, hi->sub_interface_sw_if_index_by_id,
+  ({
+    vnet_sw_interface_set_flags (vnm, sw_if_index, flags);
+  }));
+  /* *INDENT-ON* */
+
+  return (NULL);
+}
+
+/* *INDENT-OFF* */
+VNET_DEVICE_CLASS (pipe_device_class) = {
+  .name = "Pipe",
+  .format_device_name = format_pipe_name,
+  .tx_function = pipe_tx,
+  .admin_up_down_function = pipe_admin_up_down,
+};
+/* *INDENT-ON* */
+
+#define foreach_pipe_rx_next                    \
+  _ (DROP, "error-drop")
+
+typedef enum pipe_rx_next_t_
+{
+#define _(s,n) PIPE_RX_NEXT_##s,
+  foreach_pipe_rx_next
+#undef _
+    PIPE_RX_N_NEXT,
+} pipe_rx_next_t;
+
+typedef struct pipe_rx_trace_t_
+{
+  u8 packet_data[32];
+} pipe_rx_trace_t;
+
+static u8 *
+format_pipe_rx_trace (u8 * s, va_list * va)
+{
+  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
+  CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
+  pipe_rx_trace_t *t = va_arg (*va, pipe_rx_trace_t *);
+
+  s = format (s, "%U", format_ethernet_header, t->packet_data);
+
+  return s;
+}
+
+/*
+ * The pipe-rx node is a sibling of ethernet-input so steal it's
+ * next node mechanism
+ */
+static_always_inline void
+pipe_determine_next_node (ethernet_main_t * em,
+			  u32 is_l20,
+			  u32 type0,
+			  vlib_buffer_t * b0, pipe_rx_next_t * next0)
+{
+  if (is_l20)
+    {
+      *next0 = em->l2_next;
+    }
+  else if (type0 == ETHERNET_TYPE_IP4)
+    {
+      *next0 = em->l3_next.input_next_ip4;
+    }
+  else if (type0 == ETHERNET_TYPE_IP6)
+    {
+      *next0 = em->l3_next.input_next_ip6;
+    }
+  else if (type0 == ETHERNET_TYPE_MPLS)
+    {
+      *next0 = em->l3_next.input_next_mpls;
+
+    }
+  else if (em->redirect_l3)
+    {
+      // L3 Redirect is on, the cached common next nodes will be
+      // pointing to the redirect node, catch the uncommon types here
+      *next0 = em->redirect_l3_next;
+    }
+  else
+    {
+      // uncommon ethertype, check table
+      u32 i0;
+      i0 = sparse_vec_index (em->l3_next.input_next_by_type, type0);
+      *next0 = vec_elt (em->l3_next.input_next_by_type, i0);
+
+      // The table is not populated with LLC values, so check that now.
+      if (type0 < 0x600)
+	{
+	  *next0 = PIPE_RX_NEXT_DROP;
+	}
+    }
+}
+
+static_always_inline uword
+pipe_rx (vlib_main_t * vm,
+	 vlib_node_runtime_t * node, vlib_frame_t * from_frame)
+{
+  u32 n_left_from, next_index, *from, *to_next;
+  u32 n_left_to_next;
+
+  from = vlib_frame_vector_args (from_frame);
+  n_left_from = from_frame->n_vectors;
+
+  if (node->flags & VLIB_NODE_FLAG_TRACE)
+    vlib_trace_frame_buffers_only (vm, node,
+				   from,
+				   n_left_from,
+				   sizeof (from[0]),
+				   sizeof (pipe_rx_trace_t));
+
+  next_index = node->cached_next_index;
+
+  while (n_left_from > 0)
+    {
+      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+      while (n_left_from >= 4 && n_left_to_next >= 2)
+	{
+	  u32 bi0, sw_if_index0, bi1, sw_if_index1;
+	  pipe_rx_next_t next0, next1;
+	  ethernet_header_t *e0, *e1;
+	  vlib_buffer_t *b0, *b1;
+	  pipe_t *pipe0, *pipe1;
+	  u8 is_l20, is_l21;
+	  u16 type0, type1;
+
+	  // Prefetch next iteration
+	  {
+	    vlib_buffer_t *p2, *p3;
+
+	    p2 = vlib_get_buffer (vm, from[2]);
+	    p3 = vlib_get_buffer (vm, from[3]);
+	    vlib_prefetch_buffer_header (p2, STORE);
+	    vlib_prefetch_buffer_header (p3, STORE);
+	    CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, LOAD);
+	    CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, LOAD);
+	  }
+
+	  bi0 = from[0];
+	  to_next[0] = bi0;
+	  bi1 = from[1];
+	  to_next[1] = bi1;
+	  from += 2;
+	  to_next += 2;
+	  n_left_from -= 2;
+	  n_left_to_next -= 2;
+
+	  b0 = vlib_get_buffer (vm, bi0);
+	  b1 = vlib_get_buffer (vm, bi1);
+
+	  e0 = vlib_buffer_get_current (b0);
+	  e1 = vlib_buffer_get_current (b1);
+	  sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+	  sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
+	  type0 = clib_net_to_host_u16 (e0->type);
+	  type1 = clib_net_to_host_u16 (e1->type);
+	  pipe0 = &pipe_main.pipes[sw_if_index0];
+	  pipe1 = &pipe_main.pipes[sw_if_index1];
+
+	  vnet_buffer (b0)->l3_hdr_offset =
+	    vnet_buffer (b0)->l2_hdr_offset + vnet_buffer (b0)->l2.l2_len;
+	  vnet_buffer (b1)->l3_hdr_offset =
+	    vnet_buffer (b1)->l2_hdr_offset + vnet_buffer (b1)->l2.l2_len;
+	  b0->flags |= VNET_BUFFER_F_L3_HDR_OFFSET_VALID;
+	  b1->flags |= VNET_BUFFER_F_L3_HDR_OFFSET_VALID;
+
+	  is_l20 = pipe0->subint.flags & SUBINT_CONFIG_L2;
+	  is_l21 = pipe1->subint.flags & SUBINT_CONFIG_L2;
+	  pipe_determine_next_node (&ethernet_main, is_l20, type0, b0,
+				    &next0);
+	  pipe_determine_next_node (&ethernet_main, is_l21, type1, b1,
+				    &next1);
+
+	  if (!is_l20)
+	    vlib_buffer_advance (b0, sizeof (ethernet_header_t));
+	  else
+	    {
+	      u32 eth_start = vnet_buffer (b0)->l2_hdr_offset;
+	      vnet_buffer (b0)->l2.l2_len = b0->current_data - eth_start;
+	    }
+	  if (!is_l21)
+	    vlib_buffer_advance (b1, sizeof (ethernet_header_t));
+	  else
+	    {
+	      u32 eth_start = vnet_buffer (b1)->l2_hdr_offset;
+	      vnet_buffer (b1)->l2.l2_len = b1->current_data - eth_start;
+	    }
+
+	  vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+					   to_next, n_left_to_next,
+					   bi0, bi1, next0, next0);
+	}
+      while (n_left_from > 0 && n_left_to_next > 0)
+	{
+	  u32 bi0, sw_if_index0;
+	  vlib_buffer_t *b0;
+	  pipe_rx_next_t next0;
+	  ethernet_header_t *e0;
+	  pipe_t *pipe0;
+	  u16 type0;
+	  u8 is_l20;
+
+	  bi0 = from[0];
+	  to_next[0] = bi0;
+	  from += 1;
+	  to_next += 1;
+	  n_left_from -= 1;
+	  n_left_to_next -= 1;
+
+	  b0 = vlib_get_buffer (vm, bi0);
+
+	  e0 = vlib_buffer_get_current (b0);
+	  sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+	  type0 = clib_net_to_host_u16 (e0->type);
+	  pipe0 = &pipe_main.pipes[sw_if_index0];
+
+	  vnet_buffer (b0)->l3_hdr_offset =
+	    vnet_buffer (b0)->l2_hdr_offset + vnet_buffer (b0)->l2.l2_len;
+	  b0->flags |= VNET_BUFFER_F_L3_HDR_OFFSET_VALID;
+
+	  is_l20 = pipe0->subint.flags & SUBINT_CONFIG_L2;
+	  pipe_determine_next_node (&ethernet_main, is_l20, type0, b0,
+				    &next0);
+
+	  if (!is_l20)
+	    vlib_buffer_advance (b0, sizeof (ethernet_header_t));
+	  else
+	    {
+	      u32 eth_start = vnet_buffer (b0)->l2_hdr_offset;
+	      vnet_buffer (b0)->l2.l2_len = b0->current_data - eth_start;
+	    }
+
+	  vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+					   to_next, n_left_to_next,
+					   bi0, next0);
+	}
+
+      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+
+  return from_frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (pipe_rx_node) = {
+  .function = pipe_rx,
+  .name = "pipe-rx",
+  /* Takes a vector of packets. */
+  .vector_size = sizeof (u32),
+  .format_trace = format_pipe_rx_trace,
+
+  .sibling_of = "ethernet-input",
+};
+/* *INDENT-ON* */
+
+/*
+ * Maintain a bitmap of allocated pipe instance numbers.
+ */
+#define PIPE_MAX_INSTANCE		(16 * 1024)
+
+static u32
+pipe_instance_alloc (u8 is_specified, u32 want)
+{
+  /*
+   * Check for dynamically allocaetd instance number.
+   */
+  if (!is_specified)
+    {
+      u32 bit;
+
+      bit = clib_bitmap_first_clear (pipe_main.instances);
+      if (bit >= PIPE_MAX_INSTANCE)
+	{
+	  return ~0;
+	}
+      pipe_main.instances = clib_bitmap_set (pipe_main.instances, bit, 1);
+      return bit;
+    }
+
+  /*
+   * In range?
+   */
+  if (want >= PIPE_MAX_INSTANCE)
+    {
+      return ~0;
+    }
+
+  /*
+   * Already in use?
+   */
+  if (clib_bitmap_get (pipe_main.instances, want))
+    {
+      return ~0;
+    }
+
+  /*
+   * Grant allocation request.
+   */
+  pipe_main.instances = clib_bitmap_set (pipe_main.instances, want, 1);
+
+  return want;
+}
+
+static int
+pipe_instance_free (u32 instance)
+{
+  if (instance >= PIPE_MAX_INSTANCE)
+    {
+      return -1;
+    }
+
+  if (clib_bitmap_get (pipe_main.instances, instance) == 0)
+    {
+      return -1;
+    }
+
+  pipe_main.instances = clib_bitmap_set (pipe_main.instances, instance, 0);
+  return 0;
+}
+
+static clib_error_t *
+pipe_create_sub_interface (vnet_hw_interface_t * hi,
+			   uint32_t sub_id, u32 * sw_if_index)
+{
+  vnet_sw_interface_t template;
+
+  memset (&template, 0, sizeof (template));
+  template.type = VNET_SW_INTERFACE_TYPE_PIPE;
+  template.flood_class = VNET_FLOOD_CLASS_NORMAL;
+  template.sup_sw_if_index = hi->sw_if_index;
+  template.sub.id = sub_id;
+
+  return (vnet_create_sw_interface (vnet_get_main (),
+				    &template, sw_if_index));
+}
+
+int
+vnet_create_pipe_interface (u8 is_specified,
+			    u32 user_instance,
+			    u32 * parent_sw_if_index, u32 pipe_sw_if_index[2])
+{
+  vnet_main_t *vnm = vnet_get_main ();
+  vlib_main_t *vm = vlib_get_main ();
+  uint8_t address[6] = {
+    [0] = 0x22,
+    [1] = 0x22,
+  };
+  vnet_hw_interface_t *hi;
+  clib_error_t *error;
+  u32 hw_if_index;
+  u32 instance;
+  u32 slot;
+  int rv = 0;
+
+  ASSERT (parent_sw_if_index);
+
+  memset (address, 0, sizeof (address));
+
+  /*
+   * Allocate a pipe instance.  Either select one dynamically
+   * or try to use the desired user_instance number.
+   */
+  instance = pipe_instance_alloc (is_specified, user_instance);
+  if (instance == ~0)
+    {
+      return VNET_API_ERROR_INVALID_REGISTRATION;
+    }
+
+  /*
+   * Default MAC address (0000:0000:0000 + instance) is allocated
+   */
+  address[5] = instance;
+
+  error = ethernet_register_interface (vnm, pipe_device_class.index,
+				       instance, address, &hw_if_index,
+				       /* flag change */ 0);
+
+  if (error)
+    {
+      rv = VNET_API_ERROR_INVALID_REGISTRATION;
+      goto oops;
+    }
+
+  hi = vnet_get_hw_interface (vnm, hw_if_index);
+  *parent_sw_if_index = hi->sw_if_index;
+  slot = vlib_node_add_named_next_with_slot (vm, hi->tx_node_index,
+					     "pipe-rx",
+					     VNET_PIPE_TX_NEXT_ETHERNET_INPUT);
+  ASSERT (slot == VNET_PIPE_TX_NEXT_ETHERNET_INPUT);
+
+  /*
+   * create two sub-interfaces, one for each end of the pipe.
+   */
+  error = pipe_create_sub_interface (hi, 0, &pipe_sw_if_index[0]);
+
+  if (error)
+    goto oops;
+
+  error = pipe_create_sub_interface (hi, 1, &pipe_sw_if_index[1]);
+
+  if (error)
+    goto oops;
+
+  hash_set (hi->sub_interface_sw_if_index_by_id, 0, pipe_sw_if_index[0]);
+  hash_set (hi->sub_interface_sw_if_index_by_id, 1, pipe_sw_if_index[1]);
+
+  vec_validate_init_empty (pipe_main.pipes, pipe_sw_if_index[0],
+			   PIPE_INVALID);
+  vec_validate_init_empty (pipe_main.pipes, pipe_sw_if_index[1],
+			   PIPE_INVALID);
+
+  pipe_main.pipes[pipe_sw_if_index[0]].sw_if_index = pipe_sw_if_index[1];
+  pipe_main.pipes[pipe_sw_if_index[1]].sw_if_index = pipe_sw_if_index[0];
+
+  return 0;
+
+oops:
+  clib_error_report (error);
+  return rv;
+}
+
+typedef struct pipe_hw_walk_ctx_t_
+{
+  pipe_cb_fn_t cb;
+  void *ctx;
+} pipe_hw_walk_ctx_t;
+
+static walk_rc_t
+pipe_hw_walk (vnet_main_t * vnm, u32 hw_if_index, void *args)
+{
+  vnet_hw_interface_t *hi;
+  pipe_hw_walk_ctx_t *ctx;
+
+  ctx = args;
+  hi = vnet_get_hw_interface (vnm, hw_if_index);
+
+  if (hi->dev_class_index == pipe_device_class.index)
+    {
+      u32 pipe_sw_if_index[2], id, sw_if_index;
+
+      /* *INDENT-OFF* */
+      hash_foreach (id, sw_if_index, hi->sub_interface_sw_if_index_by_id,
+      ({
+        ASSERT(id < 2);
+        pipe_sw_if_index[id] = sw_if_index;
+      }));
+      /* *INDENT-ON* */
+
+      ctx->cb (hi->sw_if_index, pipe_sw_if_index, hi->dev_instance, ctx->ctx);
+    }
+
+  return (WALK_CONTINUE);
+}
+
+void
+pipe_walk (pipe_cb_fn_t fn, void *ctx)
+{
+  pipe_hw_walk_ctx_t wctx = {
+    .cb = fn,
+    .ctx = ctx,
+  };
+
+  ASSERT (fn);
+
+  vnet_hw_interface_walk (vnet_get_main (), pipe_hw_walk, &wctx);
+}
+
+static clib_error_t *
+create_pipe_interfaces (vlib_main_t * vm,
+			unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+  int rv;
+  u32 sw_if_index;
+  u32 pipe_sw_if_index[2];
+  u8 is_specified = 0;
+  u32 user_instance = 0;
+
+  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (input, "instance %d", &user_instance))
+	is_specified = 1;
+      else
+	break;
+    }
+
+  rv = vnet_create_pipe_interface (is_specified, user_instance,
+				   &sw_if_index, pipe_sw_if_index);
+
+  if (rv)
+    return clib_error_return (0, "vnet_create_pipe_interface failed");
+
+  vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name,
+		   vnet_get_main (), sw_if_index);
+  return 0;
+}
+
+/*?
+ * Create a pipe interface.
+ *
+ * @cliexpar
+ * The following two command syntaxes are equivalent:
+ * @cliexcmd{pipe create-interface [mac <mac-addr>] [instance <instance>]}
+ * Example of how to create a pipe interface:
+ * @cliexcmd{pipe create}
+ ?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (pipe_create_interface_command, static) = {
+  .path = "pipe create",
+  .short_help = "pipe create [instance <instance>]",
+  .function = create_pipe_interfaces,
+};
+/* *INDENT-ON* */
+
+int
+vnet_delete_pipe_interface (u32 sw_if_index)
+{
+  vnet_main_t *vnm = vnet_get_main ();
+  vnet_sw_interface_t *si;
+  vnet_hw_interface_t *hi;
+  u32 instance, id;
+  u32 hw_if_index;
+
+  if (pool_is_free_index (vnm->interface_main.sw_interfaces, sw_if_index))
+    return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+
+  si = vnet_get_sw_interface (vnm, sw_if_index);
+  hw_if_index = si->hw_if_index;
+  hi = vnet_get_hw_interface (vnm, hw_if_index);
+  instance = hi->dev_instance;
+
+  if (pipe_instance_free (instance) < 0)
+    {
+      return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+    }
+
+  /* *INDENT-OFF* */
+  hash_foreach (id, sw_if_index, hi->sub_interface_sw_if_index_by_id,
+  ({
+    vnet_delete_sub_interface(sw_if_index);
+    pipe_main.pipes[sw_if_index] = PIPE_INVALID;
+  }));
+  /* *INDENT-ON* */
+
+  ethernet_delete_interface (vnm, hw_if_index);
+
+  return 0;
+}
+
+static clib_error_t *
+delete_pipe_interfaces (vlib_main_t * vm,
+			unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+  vnet_main_t *vnm = vnet_get_main ();
+  u32 sw_if_index = ~0;
+  int rv;
+
+  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (input, "%U",
+		    unformat_vnet_sw_interface, vnm, &sw_if_index))
+	;
+      else
+	break;
+    }
+
+  if (sw_if_index == ~0)
+    return clib_error_return (0, "interface not specified");
+
+  rv = vnet_delete_pipe_interface (sw_if_index);
+
+  if (rv)
+    return clib_error_return (0, "vnet_delete_pipe_interface failed");
+
+  return 0;
+}
+
+/*?
+ * Delete a pipe interface.
+ *
+ * @cliexpar
+ * The following two command syntaxes are equivalent:
+ * @cliexcmd{pipe delete intfc <interface>}
+ * Example of how to delete a pipe interface:
+ * @cliexcmd{pipe delete-interface intfc loop0}
+ ?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (pipe_delete_interface_command, static) = {
+  .path = "pipe delete",
+  .short_help = "pipe delete <interface>",
+  .function = delete_pipe_interfaces,
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/devices/pipe/pipe.h b/src/vnet/devices/pipe/pipe.h
new file mode 100644
index 0000000..ef72934
--- /dev/null
+++ b/src/vnet/devices/pipe/pipe.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2018 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __PIPE_H__
+#define __PIPE_H__
+
+#include <vnet/ethernet/ethernet.h>
+
+/**
+ * represenation of a pipe interface
+ */
+typedef struct pipe_t_
+{
+  /** the SW if_index of the other end of the pipe */
+  u32 sw_if_index;
+
+  /** Sub-interface config */
+  subint_config_t subint;
+} pipe_t;
+
+/**
+ * Create a new pipe interface
+ *
+ * @param is_specified Has the user speficied a desired instance number
+ * @param user_instance The user's desired instnace
+ * @param parent_sw_index OUT the created parent interface
+ * @param pipe_sw_if_index OUT the ends of the pipe
+ */
+extern int vnet_create_pipe_interface (u8 is_specified,
+				       u32 user_instance,
+				       u32 * parent_sw_if_index,
+				       u32 pipe_sw_if_index[2]);
+extern int vnet_delete_pipe_interface (u32 parent_sw_if_index);
+
+/**
+ * Get the pipe instnace based on one end
+ */
+extern pipe_t *pipe_get (u32 sw_if_index);
+
+/**
+ * Call back function when walking all the pipes
+ */
+typedef walk_rc_t (*pipe_cb_fn_t) (u32 parent_sw_if_index,
+				   u32 pipe_sw_if_index[2],
+				   u32 instance, void *ctx);
+
+/**
+ * Walk all the of pipe interfaces
+ */
+extern void pipe_walk (pipe_cb_fn_t fn, void *ctx);
+
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/devices/pipe/pipe_api.c b/src/vnet/devices/pipe/pipe_api.c
new file mode 100644
index 0000000..46bbea5
--- /dev/null
+++ b/src/vnet/devices/pipe/pipe_api.c
@@ -0,0 +1,177 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2018 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vlibmemory/api.h>
+
+#include <vnet/devices/pipe/pipe.h>
+#include <vnet/vnet_msg_enum.h>
+
+#define vl_typedefs		/* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_typedefs
+
+#define vl_endianfun		/* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <vnet/vnet_all_api_h.h>
+#undef vl_printfun
+
+#include <vlibapi/api_helper_macros.h>
+vpe_api_main_t vpe_api_main;
+
+#define foreach_vpe_api_msg                                     \
+  _(PIPE_CREATE, pipe_create)                                   \
+  _(PIPE_DELETE, pipe_delete)                                   \
+  _(PIPE_DUMP,   pipe_dump)
+
+static void
+vl_api_pipe_create_t_handler (vl_api_pipe_create_t * mp)
+{
+  vl_api_pipe_create_reply_t *rmp;
+  u32 parent_sw_if_index;
+  u32 pipe_sw_if_index[2];
+  int rv;
+  u8 is_specified = mp->is_specified;
+  u32 user_instance = ntohl (mp->user_instance);
+
+  rv = vnet_create_pipe_interface (is_specified, user_instance,
+				   &parent_sw_if_index, pipe_sw_if_index);
+
+  /* *INDENT-OFF* */
+  REPLY_MACRO2(VL_API_PIPE_CREATE_REPLY,
+  ({
+    rmp->parent_sw_if_index = ntohl (parent_sw_if_index);
+    rmp->pipe_sw_if_index[0] = ntohl (pipe_sw_if_index[0]);
+    rmp->pipe_sw_if_index[1] = ntohl (pipe_sw_if_index[1]);
+  }));
+  /* *INDENT-ON* */
+}
+
+static void
+vl_api_pipe_delete_t_handler (vl_api_pipe_delete_t * mp)
+{
+  vl_api_pipe_delete_reply_t *rmp;
+  int rv;
+
+  rv = vnet_delete_pipe_interface (ntohl (mp->parent_sw_if_index));
+
+  REPLY_MACRO (VL_API_PIPE_DELETE_REPLY);
+}
+
+typedef struct pipe_dump_walk_t_
+{
+  vl_api_registration_t *reg;
+  u32 context;
+} pipe_dump_walk_t;
+
+static walk_rc_t
+pipe_send_details (u32 parent_sw_if_index,
+		   u32 pipe_sw_if_index[2], u32 instance, void *args)
+{
+  pipe_dump_walk_t *ctx = args;
+  vl_api_pipe_details_t *mp;
+
+  mp = vl_msg_api_alloc (sizeof (*mp));
+  if (!mp)
+    return (WALK_STOP);
+
+  mp->_vl_msg_id = ntohs (VL_API_PIPE_DETAILS);
+  mp->context = ctx->context;
+
+  mp->instance = ntohl (instance);
+  mp->parent_sw_if_index = ntohl (parent_sw_if_index);
+  mp->pipe_sw_if_index[0] = ntohl (pipe_sw_if_index[0]);
+  mp->pipe_sw_if_index[1] = ntohl (pipe_sw_if_index[1]);
+
+  vl_api_send_msg (ctx->reg, (u8 *) mp);
+
+  return (WALK_CONTINUE);
+}
+
+static void
+vl_api_pipe_dump_t_handler (vl_api_pipe_dump_t * mp)
+{
+  vl_api_registration_t *reg;
+
+  reg = vl_api_client_index_to_registration (mp->client_index);
+  if (!reg)
+    return;
+
+  pipe_dump_walk_t ctx = {
+    .reg = reg,
+    .context = mp->context,
+  };
+
+  pipe_walk (pipe_send_details, &ctx);
+}
+
+/*
+ * vpe_api_hookup
+ * Add vpe's API message handlers to the table.
+ * vlib has alread mapped shared memory and
+ * added the client registration handlers.
+ * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process()
+ */
+#define vl_msg_name_crc_list
+#include <vnet/devices/pipe/pipe.api.h>
+#undef vl_msg_name_crc_list
+
+static void
+setup_message_id_table (api_main_t * am)
+{
+#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id);
+  foreach_vl_msg_name_crc_pipe;
+#undef _
+}
+
+static clib_error_t *
+pipe_api_hookup (vlib_main_t * vm)
+{
+  api_main_t *am = &api_main;
+
+#define _(N,n)                                                  \
+    vl_msg_api_set_handlers(VL_API_##N, #n,                     \
+                           vl_api_##n##_t_handler,              \
+                           vl_noop_handler,                     \
+                           vl_api_##n##_t_endian,               \
+                           vl_api_##n##_t_print,                \
+                           sizeof(vl_api_##n##_t), 1);
+  foreach_vpe_api_msg;
+#undef _
+
+  /*
+   * Set up the (msg_name, crc, message-id) table
+   */
+  setup_message_id_table (am);
+
+  return 0;
+}
+
+VLIB_API_INIT_FUNCTION (pipe_api_hookup);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ethernet/interface.c b/src/vnet/ethernet/interface.c
index b3ea983..e39ae7b 100644
--- a/src/vnet/ethernet/interface.c
+++ b/src/vnet/ethernet/interface.c
@@ -91,7 +91,8 @@
   u8 *rewrite = NULL;
   u8 is_p2p = 0;
 
-  if (sub_sw->type == VNET_SW_INTERFACE_TYPE_P2P)
+  if ((sub_sw->type == VNET_SW_INTERFACE_TYPE_P2P) ||
+      (sub_sw->type == VNET_SW_INTERFACE_TYPE_PIPE))
     is_p2p = 1;
   if (sub_sw != sup_sw)
     {
@@ -197,7 +198,8 @@
   adj = adj_get (ai);
 
   vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index);
-  if (si->type == VNET_SW_INTERFACE_TYPE_P2P)
+  if ((si->type == VNET_SW_INTERFACE_TYPE_P2P) ||
+      (si->type == VNET_SW_INTERFACE_TYPE_PIPE))
     {
       default_update_adjacency (vnm, sw_if_index, ai);
     }
@@ -748,6 +750,7 @@
 
   si = vnet_get_sw_interface (vnm, sw_if_index);
   if (si->type == VNET_SW_INTERFACE_TYPE_SUB ||
+      si->type == VNET_SW_INTERFACE_TYPE_PIPE ||
       si->type == VNET_SW_INTERFACE_TYPE_P2P)
     {
       vnet_interface_main_t *im = &vnm->interface_main;
diff --git a/src/vnet/ethernet/node.c b/src/vnet/ethernet/node.c
index 55d8285..8667667 100755
--- a/src/vnet/ethernet/node.c
+++ b/src/vnet/ethernet/node.c
@@ -41,6 +41,7 @@
 #include <vnet/pg/pg.h>
 #include <vnet/ethernet/ethernet.h>
 #include <vnet/ethernet/p2p_ethernet.h>
+#include <vnet/devices/pipe/pipe.h>
 #include <vppinfra/sparse_vec.h>
 #include <vnet/l2/l2_bvi.h>
 
@@ -838,6 +839,14 @@
 	subint = vec_elt_at_index (p2pm->p2p_subif_pool, si->p2p.pool_index);
       *flags = SUBINT_CONFIG_P2P;
     }
+  else if (si->type == VNET_SW_INTERFACE_TYPE_PIPE)
+    {
+      pipe_t *pipe;
+
+      pipe = pipe_get (sw_if_index);
+      subint = &pipe->subint;
+      *flags = SUBINT_CONFIG_P2P;
+    }
   else if (si->sub.eth.flags.default_sub)
     {
       subint = &main_intf->default_subint;
@@ -1127,7 +1136,7 @@
     }
   else
     {
-      // Note that config is L3 by defaulty
+      // Note that config is L3 by default
       subint->flags = SUBINT_CONFIG_VALID | match_flags;
       subint->sw_if_index = ~0;	// because interfaces are initially down
     }
diff --git a/src/vnet/interface.c b/src/vnet/interface.c
index 34ad292..a9346a2 100644
--- a/src/vnet/interface.c
+++ b/src/vnet/interface.c
@@ -1042,8 +1042,9 @@
   u32 id, sw_if_index;
 
   hi = vnet_get_hw_interface (vnm, hw_if_index);
-  /* the super first, then the and sub interfaces */
-  fn (vnm, hi->sw_if_index, ctx);
+  /* the super first, then the sub interfaces */
+  if (WALK_STOP == fn (vnm, hi->sw_if_index, ctx))
+    return;
 
   /* *INDENT-OFF* */
   hash_foreach (id, sw_if_index,
@@ -1056,6 +1057,24 @@
 }
 
 void
+vnet_hw_interface_walk (vnet_main_t * vnm,
+			vnet_hw_interface_walk_t fn, void *ctx)
+{
+  vnet_interface_main_t *im;
+  vnet_hw_interface_t *hi;
+
+  im = &vnm->interface_main;
+
+  /* *INDENT-OFF* */
+  pool_foreach (hi, im->hw_interfaces,
+  ({
+    if (WALK_STOP == fn(vnm, hi->hw_if_index, ctx))
+      break;
+  }));
+  /* *INDENT-ON* */
+}
+
+void
 vnet_sw_interface_walk (vnet_main_t * vnm,
 			vnet_sw_interface_walk_t fn, void *ctx)
 {
@@ -1257,7 +1276,8 @@
 vnet_sw_interface_is_p2p (vnet_main_t * vnm, u32 sw_if_index)
 {
   vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index);
-  if (si->type == VNET_SW_INTERFACE_TYPE_P2P)
+  if ((si->type == VNET_SW_INTERFACE_TYPE_P2P) ||
+      (si->type == VNET_SW_INTERFACE_TYPE_PIPE))
     return 1;
 
   vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
diff --git a/src/vnet/interface.h b/src/vnet/interface.h
index 1181c34..f82cf9d 100644
--- a/src/vnet/interface.h
+++ b/src/vnet/interface.h
@@ -578,6 +578,7 @@
   /* A sub-interface. */
   VNET_SW_INTERFACE_TYPE_SUB,
   VNET_SW_INTERFACE_TYPE_P2P,
+  VNET_SW_INTERFACE_TYPE_PIPE,
 } vnet_sw_interface_type_t;
 
 typedef struct
diff --git a/src/vnet/interface_funcs.h b/src/vnet/interface_funcs.h
index c4dd3b7..08af7f2 100644
--- a/src/vnet/interface_funcs.h
+++ b/src/vnet/interface_funcs.h
@@ -82,6 +82,7 @@
 {
   vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, sw_if_index);
   if (sw->type == VNET_SW_INTERFACE_TYPE_SUB ||
+      sw->type == VNET_SW_INTERFACE_TYPE_PIPE ||
       sw->type == VNET_SW_INTERFACE_TYPE_P2P)
     sw = vnet_get_sw_interface (vnm, sw->sup_sw_if_index);
   return sw;
@@ -91,7 +92,8 @@
 vnet_get_sup_hw_interface (vnet_main_t * vnm, u32 sw_if_index)
 {
   vnet_sw_interface_t *sw = vnet_get_sup_sw_interface (vnm, sw_if_index);
-  ASSERT (sw->type == VNET_SW_INTERFACE_TYPE_HARDWARE);
+  ASSERT ((sw->type == VNET_SW_INTERFACE_TYPE_HARDWARE) ||
+	  (sw->type == VNET_SW_INTERFACE_TYPE_PIPE));
   return vnet_get_hw_interface (vnm, sw->hw_if_index);
 }
 
@@ -185,6 +187,20 @@
 void vnet_sw_interface_walk (vnet_main_t * vnm,
 			     vnet_sw_interface_walk_t fn, void *ctx);
 
+
+/**
+ * Call back walk type for walking all HW indices
+ */
+typedef walk_rc_t (*vnet_hw_interface_walk_t) (vnet_main_t * vnm,
+					       u32 hw_if_index, void *ctx);
+
+/**
+ * @brief
+ * Walk all the HW interface
+ */
+void vnet_hw_interface_walk (vnet_main_t * vnm,
+			     vnet_hw_interface_walk_t fn, void *ctx);
+
 /* Register a hardware interface instance. */
 u32 vnet_register_interface (vnet_main_t * vnm,
 			     u32 dev_class_index,
diff --git a/src/vnet/ip/ip6_neighbor.c b/src/vnet/ip/ip6_neighbor.c
index 6d91224..87bda43 100644
--- a/src/vnet/ip/ip6_neighbor.c
+++ b/src/vnet/ip/ip6_neighbor.c
@@ -4190,6 +4190,7 @@
 
 		  sw_if0 = vnet_get_sw_interface (vnm, sw_if_index);
 		  if (sw_if0->type == VNET_SW_INTERFACE_TYPE_SUB ||
+		      sw_if0->type == VNET_SW_INTERFACE_TYPE_PIPE ||
 		      sw_if0->type == VNET_SW_INTERFACE_TYPE_P2P)
 		    {
 		      /* make up  an interface id */
diff --git a/src/vnet/vnet_all_api_h.h b/src/vnet/vnet_all_api_h.h
index 06b76a5..f07be65 100644
--- a/src/vnet/vnet_all_api_h.h
+++ b/src/vnet/vnet_all_api_h.h
@@ -74,6 +74,7 @@
 #include <vnet/qos/qos.api.h>
 #include <vnet/dhcp/dhcp6_pd_client_cp.api.h>
 #include <vnet/dhcp/dhcp6_ia_na_client_cp.api.h>
+#include <vnet/devices/pipe/pipe.api.h>
 
 /*
  * fd.io coding-style-patch-verification: ON