tunnel: support copying TTL and flow label from inner to outer

Type: feature

The added functionality is to support copying TTL and flow label from
inner to outer. The .api was extened to support expressing this and also
adding a common tunnel endpoint type. i find it best to make API changes
in one patch so there are less versions of the API.

Signed-off-by: Neale Ranns <neale@graphiant.com>
Change-Id: I755c1e3f4c475058792af39c1abeda92129efb76
diff --git a/src/vnet/gre/gre.c b/src/vnet/gre/gre.c
index d842d68..a355a22 100644
--- a/src/vnet/gre/gre.c
+++ b/src/vnet/gre/gre.c
@@ -340,7 +340,7 @@
   ip0->ip6.payload_length =
     clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) -
 			  sizeof (ip0->ip6));
-  tunnel_encap_fixup_4o6 (flags, (ip4_header_t *) (ip0 + 1), &ip0->ip6);
+  tunnel_encap_fixup_4o6 (flags, b0, (ip4_header_t *) (ip0 + 1), &ip0->ip6);
 }
 
 static void
diff --git a/src/vnet/ip/ip4_packet.h b/src/vnet/ip/ip4_packet.h
index 0653092..513a744 100644
--- a/src/vnet/ip/ip4_packet.h
+++ b/src/vnet/ip/ip4_packet.h
@@ -359,6 +359,18 @@
   return (ip4->tos & IP_PACKET_TC_FIELD_ECN_MASK);
 }
 
+always_inline u8
+ip4_header_get_ttl (const ip4_header_t *ip4)
+{
+  return (ip4->ttl);
+}
+
+always_inline void
+ip4_header_set_ttl (ip4_header_t *ip4, u8 ttl)
+{
+  ip4->ttl = ttl;
+}
+
 always_inline void
 ip4_header_set_df (ip4_header_t * ip4)
 {
diff --git a/src/vnet/ip/ip6_packet.h b/src/vnet/ip/ip6_packet.h
index 1be2cea..7a8c31c 100644
--- a/src/vnet/ip/ip6_packet.h
+++ b/src/vnet/ip/ip6_packet.h
@@ -343,13 +343,6 @@
 	  & IP6_PACKET_ECN_MASK) >> 20;
 }
 
-static_always_inline u32
-ip6_flow_label_network_order (const ip6_header_t *ip6)
-{
-  return (clib_net_to_host_u32 (ip6->ip_version_traffic_class_and_flow_label) &
-	  IP6_PACKET_FL_MASK);
-}
-
 static_always_inline void
 ip6_set_traffic_class_network_order (ip6_header_t * ip6, ip_dscp_t dscp)
 {
@@ -376,10 +369,40 @@
   u32 tmp =
     clib_net_to_host_u32 (ip6->ip_version_traffic_class_and_flow_label);
   tmp &= 0xffcfffff;
-  tmp |= (ecn << 20);
+  tmp |= ((0x3 & ecn) << 20);
   ip6->ip_version_traffic_class_and_flow_label = clib_host_to_net_u32 (tmp);
 }
 
+static_always_inline u32
+ip6_flow_label_network_order (const ip6_header_t *ip6)
+{
+  u32 tmp =
+    clib_net_to_host_u32 (ip6->ip_version_traffic_class_and_flow_label);
+  return (tmp & 0xfffff);
+}
+
+static_always_inline void
+ip6_set_flow_label_network_order (ip6_header_t *ip6, u32 flow_label)
+{
+  u32 tmp =
+    clib_net_to_host_u32 (ip6->ip_version_traffic_class_and_flow_label);
+  tmp &= 0xfff00000;
+  tmp |= flow_label & 0x000fffff;
+  ip6->ip_version_traffic_class_and_flow_label = clib_host_to_net_u32 (tmp);
+}
+
+static_always_inline u32
+ip6_hop_limit_network_order (const ip6_header_t *ip6)
+{
+  return (ip6->hop_limit);
+}
+
+static_always_inline void
+ip6_set_hop_limit_network_order (ip6_header_t *ip6, u8 hop_limit)
+{
+  ip6->hop_limit = hop_limit;
+}
+
 always_inline void *
 ip6_next_header (ip6_header_t * i)
 {
diff --git a/src/vnet/ipip/ipip.c b/src/vnet/ipip/ipip.c
index ca52142..d43bcd1 100644
--- a/src/vnet/ipip/ipip.c
+++ b/src/vnet/ipip/ipip.c
@@ -184,7 +184,7 @@
   ip6->payload_length =
     clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b) -
 			  sizeof (*ip6));
-  tunnel_encap_fixup_4o6 (flags, ((ip4_header_t *) (ip6 + 1)), ip6);
+  tunnel_encap_fixup_4o6 (flags, b, ((ip4_header_t *) (ip6 + 1)), ip6);
 }
 
 static void
@@ -224,7 +224,8 @@
   ip6 = vlib_buffer_get_current (b);
   ip6->payload_length =
     clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b) - sizeof (*ip6));
-  tunnel_encap_fixup_mplso6 (flags, (mpls_unicast_header_t *) (ip6 + 1), ip6);
+  tunnel_encap_fixup_mplso6 (flags, b, (mpls_unicast_header_t *) (ip6 + 1),
+			     ip6);
 }
 
 static void
diff --git a/src/vnet/ipsec/esp_encrypt.c b/src/vnet/ipsec/esp_encrypt.c
index f291c08..08f82db 100644
--- a/src/vnet/ipsec/esp_encrypt.c
+++ b/src/vnet/ipsec/esp_encrypt.c
@@ -768,16 +768,15 @@
 	      else if (VNET_LINK_IP4 == lt)
 		{
 		  *next_hdr_ptr = IP_PROTOCOL_IP_IN_IP;
-		  tunnel_encap_fixup_4o6 (sa0->tunnel_flags,
-					  (const ip4_header_t *) payload,
-					  ip6);
+		  tunnel_encap_fixup_4o6 (sa0->tunnel_flags, b[0],
+					  (const ip4_header_t *) payload, ip6);
 		}
 	      else if (VNET_LINK_MPLS == lt)
 		{
 		  *next_hdr_ptr = IP_PROTOCOL_MPLS_IN_IP;
 		  tunnel_encap_fixup_mplso6 (
-		    sa0->tunnel_flags, (const mpls_unicast_header_t *) payload,
-		    ip6);
+		    sa0->tunnel_flags, b[0],
+		    (const mpls_unicast_header_t *) payload, ip6);
 		}
 	      else
 		ASSERT (0);
diff --git a/src/vnet/tunnel/tunnel.c b/src/vnet/tunnel/tunnel.c
index 38bde34..1bd03eb 100644
--- a/src/vnet/tunnel/tunnel.c
+++ b/src/vnet/tunnel/tunnel.c
@@ -16,6 +16,21 @@
  */
 
 #include <vnet/tunnel/tunnel.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/fib/fib_entry_track.h>
+
+#include <vnet/ip/ip6_inlines.h>
+
+const u8 TUNNEL_ENCAP_DECAP_FLAG_MASK = (
+#define _(a, b, c) TUNNEL_ENCAP_DECAP_FLAG_##a |
+  foreach_tunnel_encap_decap_flag
+#undef _
+  0);
+const u8 TUNNEL_FLAG_MASK = (
+#define _(a, b, c) TUNNEL_FLAG_##a |
+  foreach_tunnel_flag
+#undef _
+  0);
 
 u8 *
 format_tunnel_mode (u8 * s, va_list * args)
@@ -54,10 +69,11 @@
   tunnel_encap_decap_flags_t f = va_arg (*args, int);
 
   if (f == TUNNEL_ENCAP_DECAP_FLAG_NONE)
-    return (format (s, "none"));
+    s = format (s, "none");
 
-#define _(a,b,c) if (f & TUNNEL_ENCAP_DECAP_FLAG_##a) s = format(s, "%s ", b);
-  forech_tunnel_encap_decap_flag
+#define _(a, b, c)                                                            \
+  else if (f & TUNNEL_ENCAP_DECAP_FLAG_##a) s = format (s, "%s ", b);
+  foreach_tunnel_encap_decap_flag
 #undef _
     return (s);
 }
@@ -71,11 +87,176 @@
   *f |= TUNNEL_ENCAP_DECAP_FLAG_##a;\
   return 1;\
   }
-  forech_tunnel_encap_decap_flag;
+  foreach_tunnel_encap_decap_flag;
 #undef _
   return 0;
 }
 
+u8 *
+format_tunnel_flags (u8 *s, va_list *args)
+{
+  tunnel_flags_t f = va_arg (*args, int);
+
+  if (f == TUNNEL_FLAG_NONE)
+    s = format (s, "none");
+
+#define _(a, b, c) else if (f & TUNNEL_FLAG_##a) s = format (s, "%s ", c);
+  foreach_tunnel_flag
+#undef _
+    return (s);
+}
+
+uword
+unformat_tunnel_flags (unformat_input_t *input, va_list *args)
+{
+  tunnel_flags_t *f = va_arg (*args, tunnel_flags_t *);
+#define _(a, b, c)                                                            \
+  if (unformat (input, c))                                                    \
+    {                                                                         \
+      *f |= TUNNEL_FLAG_##a;                                                  \
+      return 1;                                                               \
+    }
+  foreach_tunnel_flag;
+#undef _
+  return 0;
+}
+
+ip_address_family_t
+tunnel_get_af (const tunnel_t *t)
+{
+  return (ip_addr_version (&t->t_src));
+}
+
+void
+tunnel_copy (const tunnel_t *src, tunnel_t *dst)
+{
+  ip_address_copy (&dst->t_dst, &src->t_dst);
+  ip_address_copy (&dst->t_src, &src->t_src);
+
+  dst->t_encap_decap_flags = src->t_encap_decap_flags;
+  dst->t_flags = src->t_flags;
+  dst->t_mode = src->t_mode;
+  dst->t_table_id = src->t_table_id;
+  dst->t_dscp = src->t_dscp;
+  dst->t_hop_limit = src->t_hop_limit;
+  dst->t_fib_index = src->t_fib_index;
+
+  dst->t_flags &= ~TUNNEL_FLAG_RESOLVED;
+  dst->t_fib_entry_index = FIB_NODE_INDEX_INVALID;
+  dst->t_sibling = ~0;
+}
+
+u8 *
+format_tunnel (u8 *s, va_list *args)
+{
+  const tunnel_t *t = va_arg (*args, tunnel_t *);
+  u32 indent = va_arg (*args, u32);
+
+  s = format (s, "%Utable-ID:%d [%U->%U] hop-limit:%d %U %U [%U] [%U]",
+	      format_white_space, indent, t->t_table_id, format_ip_address,
+	      &t->t_src, format_ip_address, &t->t_dst, t->t_hop_limit,
+	      format_tunnel_mode, t->t_mode, format_ip_dscp, t->t_dscp,
+	      format_tunnel_flags, t->t_flags, format_tunnel_encap_decap_flags,
+	      t->t_encap_decap_flags);
+  if (t->t_flags & TUNNEL_FLAG_RESOLVED)
+    s = format (s, " [resolved via fib-entry: %d]", t->t_fib_entry_index);
+
+  return (s);
+}
+
+uword
+unformat_tunnel (unformat_input_t *input, va_list *args)
+{
+  tunnel_t *t = va_arg (*args, tunnel_t *);
+
+  if (!unformat (input, "tunnel"))
+    return (0);
+
+  unformat (input, "src %U", unformat_ip_address, &t->t_src);
+  unformat (input, "dst %U", unformat_ip_address, &t->t_dst);
+  unformat (input, "table-id:%d", &t->t_table_id);
+  unformat (input, "hop-limit:%d", &t->t_hop_limit);
+  unformat (input, "%U", unformat_ip_dscp, &t->t_dscp);
+  unformat (input, "%U", unformat_tunnel_encap_decap_flags,
+	    &t->t_encap_decap_flags);
+  unformat (input, "%U", unformat_tunnel_flags, &t->t_flags);
+  unformat (input, "%U", unformat_tunnel_mode, &t->t_mode);
+
+  ASSERT (!"Check not 4 and 6");
+
+  return (1);
+}
+
+int
+tunnel_resolve (tunnel_t *t, fib_node_type_t child_type, index_t child_index)
+{
+  fib_prefix_t pfx;
+
+  ip_address_to_fib_prefix (&t->t_dst, &pfx);
+
+  t->t_fib_index = fib_table_find (pfx.fp_proto, t->t_table_id);
+
+  if (t->t_fib_index == ~((u32) 0))
+    return VNET_API_ERROR_NO_SUCH_FIB;
+
+  t->t_fib_entry_index = fib_entry_track (t->t_fib_index, &pfx, child_type,
+					  child_index, &t->t_sibling);
+
+  t->t_flags |= TUNNEL_FLAG_RESOLVED;
+
+  return (0);
+}
+
+void
+tunnel_unresolve (tunnel_t *t)
+{
+  if (t->t_flags & TUNNEL_FLAG_RESOLVED)
+    fib_entry_untrack (t->t_fib_entry_index, t->t_sibling);
+
+  t->t_flags &= ~TUNNEL_FLAG_RESOLVED;
+}
+
+void
+tunnel_contribute_forwarding (const tunnel_t *t, dpo_id_t *dpo)
+{
+  fib_forward_chain_type_t fct;
+
+  fct = fib_forw_chain_type_from_fib_proto (
+    ip_address_family_to_fib_proto (ip_addr_version (&t->t_src)));
+
+  fib_entry_contribute_forwarding (t->t_fib_entry_index, fct, dpo);
+}
+
+void
+tunnel_build_v6_hdr (const tunnel_t *t, ip_protocol_t next_proto,
+		     ip6_header_t *ip)
+{
+  ip->ip_version_traffic_class_and_flow_label =
+    clib_host_to_net_u32 (0x60000000);
+  ip6_set_dscp_network_order (ip, t->t_dscp);
+
+  ip->hop_limit = 254;
+  ip6_address_copy (&ip->src_address, &ip_addr_v6 (&t->t_src));
+  ip6_address_copy (&ip->dst_address, &ip_addr_v6 (&t->t_dst));
+
+  ip->protocol = next_proto;
+  ip->hop_limit = (t->t_hop_limit == 0 ? 254 : t->t_hop_limit);
+  ip6_set_flow_label_network_order (
+    ip, ip6_compute_flow_hash (ip, IP_FLOW_HASH_DEFAULT));
+}
+
+void
+tunnel_build_v4_hdr (const tunnel_t *t, ip_protocol_t next_proto,
+		     ip4_header_t *ip)
+{
+  ip->ip_version_and_header_length = 0x45;
+  ip->ttl = (t->t_hop_limit == 0 ? 254 : t->t_hop_limit);
+  ip->src_address.as_u32 = t->t_src.ip.ip4.as_u32;
+  ip->dst_address.as_u32 = t->t_dst.ip.ip4.as_u32;
+  ip->tos = t->t_dscp << 2;
+  ip->protocol = next_proto;
+  ip->checksum = ip4_header_checksum (ip);
+}
 
 /*
  * fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/tunnel/tunnel.h b/src/vnet/tunnel/tunnel.h
index de06b60..acf0e4c 100644
--- a/src/vnet/tunnel/tunnel.h
+++ b/src/vnet/tunnel/tunnel.h
@@ -18,7 +18,8 @@
 #ifndef __TUNNEL_H__
 #define __TUNNEL_H__
 
-#include <vlib/vlib.h>
+#include <vnet/ip/ip_types.h>
+#include <vnet/fib/fib_node.h>
 
 #define foreach_tunnel_mode     \
   _(P2P, "point-to-point")      \
@@ -37,28 +38,90 @@
 /**
  * Keep these idenitical to those in ipip.api
  */
-#define forech_tunnel_encap_decap_flag              \
-  _(NONE, "none", 0x0)                              \
-  _(ENCAP_COPY_DF, "encap-copy-df", 0x1)            \
-  _(ENCAP_SET_DF, "encap-set-df", 0x2)              \
-  _(ENCAP_COPY_DSCP, "encap-copy-dscp", 0x4)        \
-  _(ENCAP_COPY_ECN, "encap-copy-ecn", 0x8)          \
-  _(DECAP_COPY_ECN, "decap-copy-ecn", 0x10)         \
-  _(ENCAP_INNER_HASH, "encap-inner-hash", 0x20)
+#define foreach_tunnel_encap_decap_flag                                       \
+  _ (NONE, "none", 0x0)                                                       \
+  _ (ENCAP_COPY_DF, "encap-copy-df", 0x1)                                     \
+  _ (ENCAP_SET_DF, "encap-set-df", 0x2)                                       \
+  _ (ENCAP_COPY_DSCP, "encap-copy-dscp", 0x4)                                 \
+  _ (ENCAP_COPY_ECN, "encap-copy-ecn", 0x8)                                   \
+  _ (DECAP_COPY_ECN, "decap-copy-ecn", 0x10)                                  \
+  _ (ENCAP_INNER_HASH, "encap-inner-hash", 0x20)                              \
+  _ (ENCAP_COPY_HOP_LIMIT, "encap-copy-hop-limit", 0x40)                      \
+  _ (ENCAP_COPY_FLOW_LABEL, "encap-copy-flow-label", 0x80)
 
 typedef enum tunnel_encap_decap_flags_t_
 {
 #define _(a,b,c) TUNNEL_ENCAP_DECAP_FLAG_##a = c,
-  forech_tunnel_encap_decap_flag
+  foreach_tunnel_encap_decap_flag
 #undef _
 } __clib_packed tunnel_encap_decap_flags_t;
 
-#define TUNNEL_FLAG_MASK (0x1f)
+extern const u8 TUNNEL_ENCAP_DECAP_FLAG_MASK;
 
 extern u8 *format_tunnel_encap_decap_flags (u8 * s, va_list * args);
-extern uword
-unformat_tunnel_encap_decap_flags (unformat_input_t * input, va_list * args);
+extern uword unformat_tunnel_encap_decap_flags (unformat_input_t *input,
+						va_list *args);
+
+#define foreach_tunnel_flag                                                   \
+  _ (RESOLVED, 0, "resolved")                                                 \
+  _ (TRACK_MTU, 1, "track-mtu")
+
+typedef enum tunnel_flags_t_
+{
+  TUNNEL_FLAG_NONE = 0,
+#define _(n, b, s) TUNNEL_FLAG_##n = (1 << b),
+  foreach_tunnel_flag
+#undef _
+} __clib_packed tunnel_flags_t;
+
+extern const u8 TUNNEL_FLAG_MASK;
+
+extern u8 *format_tunnel_flags (u8 *s, va_list *args);
+extern uword unformat_tunnel_flags (unformat_input_t *input, va_list *args);
+
+/**
+ * A representation of an IP tunnel config
+ */
+typedef struct tunnel_t_
+{
+  ip_address_t t_src;
+  ip_address_t t_dst;
+  tunnel_encap_decap_flags_t t_encap_decap_flags;
+  tunnel_flags_t t_flags;
+  tunnel_mode_t t_mode;
+  u32 t_table_id;
+  ip_dscp_t t_dscp;
+  u8 t_hop_limit;
+
+  /**
+   * derived data
+   */
+  u32 t_fib_index;
+
+  fib_node_index_t t_fib_entry_index;
+  u32 t_sibling;
+
+} tunnel_t;
+
+extern u8 *format_tunnel (u8 *s, va_list *args);
+extern uword unformat_tunnel (unformat_input_t *input, va_list *args);
+
+extern void tunnel_copy (const tunnel_t *src, tunnel_t *dst);
+extern int tunnel_resolve (tunnel_t *t, fib_node_type_t child_type,
+			   index_t child_index);
+extern void tunnel_unresolve (tunnel_t *t);
+
+extern ip_address_family_t tunnel_get_af (const tunnel_t *t);
+
+extern void tunnel_contribute_forwarding (const tunnel_t *t, dpo_id_t *dpo);
+
+extern void tunnel_build_v6_hdr (const tunnel_t *t, ip_protocol_t next_proto,
+				 ip6_header_t *ip);
+extern void tunnel_build_v4_hdr (const tunnel_t *t, ip_protocol_t next_proto,
+				 ip4_header_t *ip);
+
 #endif
+
 /*
  * fd.io coding-style-patch-verification: ON
  *
diff --git a/src/vnet/tunnel/tunnel_dp.h b/src/vnet/tunnel/tunnel_dp.h
index deef408..3db22a8 100644
--- a/src/vnet/tunnel/tunnel_dp.h
+++ b/src/vnet/tunnel/tunnel_dp.h
@@ -19,18 +19,21 @@
 #define __TUNNEL_DP_H__
 
 #include <vnet/tunnel/tunnel.h>
+#include <vnet/mpls/mpls_lookup.h>
 
 static_always_inline void
 tunnel_encap_fixup_4o4 (tunnel_encap_decap_flags_t flags,
 			const ip4_header_t * inner, ip4_header_t * outer)
 {
-  if (flags & TUNNEL_ENCAP_DECAP_FLAG_ENCAP_COPY_DSCP)
+  if (PREDICT_FALSE (flags & TUNNEL_ENCAP_DECAP_FLAG_ENCAP_COPY_DSCP))
     ip4_header_set_dscp (outer, ip4_header_get_dscp (inner));
-  if (flags & TUNNEL_ENCAP_DECAP_FLAG_ENCAP_COPY_ECN)
+  if (PREDICT_FALSE (flags & TUNNEL_ENCAP_DECAP_FLAG_ENCAP_COPY_ECN))
     ip4_header_set_ecn (outer, ip4_header_get_ecn (inner));
-  if ((flags & TUNNEL_ENCAP_DECAP_FLAG_ENCAP_COPY_DF) &&
-      ip4_header_get_df (inner))
+  if (PREDICT_FALSE ((flags & TUNNEL_ENCAP_DECAP_FLAG_ENCAP_COPY_DF) &&
+		     ip4_header_get_df (inner)))
     ip4_header_set_df (outer);
+  if (PREDICT_FALSE (flags & TUNNEL_ENCAP_DECAP_FLAG_ENCAP_COPY_HOP_LIMIT))
+    ip4_header_set_ttl (outer, ip4_header_get_ttl (inner));
 }
 
 static_always_inline void
@@ -38,8 +41,8 @@
 				 const ip4_header_t * inner,
 				 ip4_header_t * outer)
 {
-  if (flags & (TUNNEL_ENCAP_DECAP_FLAG_ENCAP_COPY_DSCP |
-	       TUNNEL_ENCAP_DECAP_FLAG_ENCAP_COPY_ECN))
+  if (PREDICT_FALSE (flags & (TUNNEL_ENCAP_DECAP_FLAG_ENCAP_COPY_DSCP |
+			      TUNNEL_ENCAP_DECAP_FLAG_ENCAP_COPY_ECN)))
     {
       ip_csum_t sum = outer->checksum;
       u8 tos = outer->tos;
@@ -53,8 +56,8 @@
 	ip_csum_update (outer->checksum, tos, outer->tos, ip4_header_t, tos);
       outer->checksum = ip_csum_fold (sum);
     }
-  if ((flags & TUNNEL_ENCAP_DECAP_FLAG_ENCAP_COPY_DF) &&
-      ip4_header_get_df (inner))
+  if (PREDICT_FALSE ((flags & TUNNEL_ENCAP_DECAP_FLAG_ENCAP_COPY_DF) &&
+		     ip4_header_get_df (inner)))
     {
       ip_csum_t sum = outer->checksum;
       u16 tos = outer->flags_and_fragment_offset;
@@ -92,10 +95,12 @@
 tunnel_encap_fixup_6o4 (tunnel_encap_decap_flags_t flags,
 			const ip6_header_t * inner, ip4_header_t * outer)
 {
-  if (flags & TUNNEL_ENCAP_DECAP_FLAG_ENCAP_COPY_DSCP)
+  if (PREDICT_FALSE (flags & TUNNEL_ENCAP_DECAP_FLAG_ENCAP_COPY_DSCP))
     ip4_header_set_dscp (outer, ip6_dscp_network_order (inner));
-  if (flags & TUNNEL_ENCAP_DECAP_FLAG_ENCAP_COPY_ECN)
+  if (PREDICT_FALSE (flags & TUNNEL_ENCAP_DECAP_FLAG_ENCAP_COPY_ECN))
     ip4_header_set_ecn (outer, ip6_ecn_network_order ((inner)));
+  if (PREDICT_FALSE (flags & TUNNEL_ENCAP_DECAP_FLAG_ENCAP_COPY_HOP_LIMIT))
+    ip4_header_set_ttl (outer, ip6_hop_limit_network_order (inner));
 }
 
 static_always_inline void
@@ -103,8 +108,8 @@
 				 const ip6_header_t * inner,
 				 ip4_header_t * outer)
 {
-  if (flags & (TUNNEL_ENCAP_DECAP_FLAG_ENCAP_COPY_DSCP |
-	       TUNNEL_ENCAP_DECAP_FLAG_ENCAP_COPY_ECN))
+  if (PREDICT_FALSE (flags & (TUNNEL_ENCAP_DECAP_FLAG_ENCAP_COPY_DSCP |
+			      TUNNEL_ENCAP_DECAP_FLAG_ENCAP_COPY_ECN)))
     {
       ip_csum_t sum = outer->checksum;
       u8 tos = outer->tos;
@@ -124,30 +129,50 @@
 tunnel_encap_fixup_6o6 (tunnel_encap_decap_flags_t flags,
 			const ip6_header_t * inner, ip6_header_t * outer)
 {
-  if (flags & TUNNEL_ENCAP_DECAP_FLAG_ENCAP_COPY_DSCP)
+  if (PREDICT_FALSE (flags & TUNNEL_ENCAP_DECAP_FLAG_ENCAP_COPY_DSCP))
     ip6_set_dscp_network_order (outer, ip6_dscp_network_order (inner));
-  if (flags & TUNNEL_ENCAP_DECAP_FLAG_ENCAP_COPY_ECN)
+  if (PREDICT_FALSE (flags & TUNNEL_ENCAP_DECAP_FLAG_ENCAP_COPY_ECN))
     ip6_set_ecn_network_order (outer, ip6_ecn_network_order (inner));
+  if (PREDICT_FALSE (flags & TUNNEL_ENCAP_DECAP_FLAG_ENCAP_COPY_FLOW_LABEL))
+    ip6_set_flow_label_network_order (outer,
+				      ip6_flow_label_network_order (inner));
+  if (PREDICT_FALSE (flags & TUNNEL_ENCAP_DECAP_FLAG_ENCAP_COPY_HOP_LIMIT))
+    ip6_set_hop_limit_network_order (outer,
+				     ip6_hop_limit_network_order (inner));
 }
 
 static_always_inline void
 tunnel_encap_fixup_4o6 (tunnel_encap_decap_flags_t flags,
-			const ip4_header_t * inner, ip6_header_t * outer)
+			const vlib_buffer_t *b, const ip4_header_t *inner,
+			ip6_header_t *outer)
 {
-  if (flags & TUNNEL_ENCAP_DECAP_FLAG_ENCAP_COPY_DSCP)
+  if (PREDICT_FALSE (flags & TUNNEL_ENCAP_DECAP_FLAG_ENCAP_COPY_DSCP))
     ip6_set_dscp_network_order (outer, ip4_header_get_dscp (inner));
-  if (flags & TUNNEL_ENCAP_DECAP_FLAG_ENCAP_COPY_ECN)
+  if (PREDICT_FALSE (flags & TUNNEL_ENCAP_DECAP_FLAG_ENCAP_COPY_ECN))
     ip6_set_ecn_network_order (outer, ip4_header_get_ecn (inner));
+  if (PREDICT_FALSE (flags & TUNNEL_ENCAP_DECAP_FLAG_ENCAP_COPY_HOP_LIMIT))
+    ip6_set_hop_limit_network_order (outer, ip4_header_get_ttl (inner));
+  if (PREDICT_FALSE (flags & TUNNEL_ENCAP_DECAP_FLAG_ENCAP_COPY_FLOW_LABEL))
+    ip6_set_flow_label_network_order (
+      outer, (0 != vnet_buffer (b)->ip.flow_hash ?
+		vnet_buffer (b)->ip.flow_hash :
+		ip4_compute_flow_hash (inner, IP_FLOW_HASH_DEFAULT)));
 }
 
 static_always_inline void
 tunnel_encap_fixup_mplso6 (tunnel_encap_decap_flags_t flags,
+			   const vlib_buffer_t *b,
 			   const mpls_unicast_header_t *inner,
 			   ip6_header_t *outer)
 {
   if (flags & TUNNEL_ENCAP_DECAP_FLAG_ENCAP_COPY_DSCP)
     ip6_set_dscp_network_order (outer,
 				vnet_mpls_uc_get_exp (inner->label_exp_s_ttl));
+  if (PREDICT_FALSE (flags & TUNNEL_ENCAP_DECAP_FLAG_ENCAP_COPY_FLOW_LABEL))
+    ip6_set_flow_label_network_order (
+      outer, (0 != vnet_buffer (b)->ip.flow_hash ?
+		vnet_buffer (b)->ip.flow_hash :
+		mpls_compute_flow_hash (inner, IP_FLOW_HASH_DEFAULT)));
 }
 
 static_always_inline void
@@ -163,7 +188,7 @@
 tunnel_decap_fixup_4o6 (tunnel_encap_decap_flags_t flags,
 			ip4_header_t * inner, const ip6_header_t * outer)
 {
-  if (flags & TUNNEL_ENCAP_DECAP_FLAG_DECAP_COPY_ECN)
+  if (PREDICT_FALSE (flags & TUNNEL_ENCAP_DECAP_FLAG_DECAP_COPY_ECN))
     ip4_header_set_ecn_w_chksum (inner, ip6_ecn_network_order (outer));
 }
 
@@ -171,7 +196,7 @@
 tunnel_decap_fixup_6o6 (tunnel_encap_decap_flags_t flags,
 			ip6_header_t * inner, const ip6_header_t * outer)
 {
-  if (flags & TUNNEL_ENCAP_DECAP_FLAG_DECAP_COPY_ECN)
+  if (PREDICT_FALSE (flags & TUNNEL_ENCAP_DECAP_FLAG_DECAP_COPY_ECN))
     ip6_set_ecn_network_order (inner, ip6_ecn_network_order (outer));
 }
 
@@ -179,7 +204,7 @@
 tunnel_decap_fixup_6o4 (tunnel_encap_decap_flags_t flags,
 			ip6_header_t * inner, const ip4_header_t * outer)
 {
-  if (flags & TUNNEL_ENCAP_DECAP_FLAG_DECAP_COPY_ECN)
+  if (PREDICT_FALSE (flags & TUNNEL_ENCAP_DECAP_FLAG_DECAP_COPY_ECN))
     ip6_set_ecn_network_order (inner, ip4_header_get_ecn (outer));
 }
 
@@ -187,7 +212,7 @@
 tunnel_decap_fixup_4o4 (tunnel_encap_decap_flags_t flags,
 			ip4_header_t * inner, const ip4_header_t * outer)
 {
-  if (flags & TUNNEL_ENCAP_DECAP_FLAG_DECAP_COPY_ECN)
+  if (PREDICT_FALSE (flags & TUNNEL_ENCAP_DECAP_FLAG_DECAP_COPY_ECN))
     ip4_header_set_ecn_w_chksum (inner, ip4_header_get_ecn (outer));
 }
 
diff --git a/src/vnet/tunnel/tunnel_types.api b/src/vnet/tunnel/tunnel_types.api
index 7ce7099..1426717 100644
--- a/src/vnet/tunnel/tunnel_types.api
+++ b/src/vnet/tunnel/tunnel_types.api
@@ -13,7 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-option version = "1.0.0";
+option version = "1.0.1";
+
+import "vnet/interface_types.api";
+import "vnet/ip/ip_types.api";
 
 /**
  * Flags controlling tunnel behaviour
@@ -33,6 +36,10 @@
   TUNNEL_API_ENCAP_DECAP_FLAG_DECAP_COPY_ECN = 0x10,
   /** at encap, compute flow hash on the inner packet for more entropy */
   TUNNEL_API_ENCAP_DECAP_FLAG_ENCAP_INNER_HASH = 0x20 [backwards_compatible],
+  /** at encap, copy the hop-limit/TTL of the payload into the tunnel header */
+  TUNNEL_API_ENCAP_DECAP_FLAG_ENCAP_COPY_HOP_LIMIT = 0x40 [backwards_compatible],
+  /** at encap, copy the Flow Label of the payload into the tunnel header */
+  TUNNEL_API_ENCAP_DECAP_FLAG_ENCAP_COPY_FLOW_LABEL = 0x80 [backwards_compatible],
 };
 
 /**
@@ -46,6 +53,41 @@
   TUNNEL_API_MODE_MP,
 };
 
+/**
+ * tunnel mode
+ */
+enumflag tunnel_flags : u8
+{
+  /**
+   * the tunnel's MTU tracks the MTU of its destination
+   *  Currently NOT IMPLEMENTATION (this is a API place-holder)
+   */
+  TUNNEL_API_FLAG_TRACK_MTU = 0x1,
+};
+
+/**
+ * An IP{v4,v6} tunnel.
+ */
+typedef tunnel
+{
+  /** If non-~0, specifies a custom dev instance */
+  u32 instance;
+  /** Tunnel source and destination address */
+  vl_api_address_t src;
+  vl_api_address_t dst;
+  /** ignored on create, set in details/dump */
+  vl_api_interface_index_t sw_if_index;
+  /** The table ID in which the source and destination addresses are valid */
+  u32 table_id;
+  vl_api_tunnel_encap_decap_flags_t encap_decap_flags;
+  vl_api_tunnel_mode_t mode;
+  vl_api_tunnel_flags_t flags;
+  /* DSCP value for the tunnel encap, ignored if ECNAP_COPY_DSCP flag is set */
+  vl_api_ip_dscp_t dscp;
+  /* TTL=0 is considered 255 */
+  u8 hop_limit;
+};
+
 /*
  * Local Variables:
  * eval: (c-set-style "gnu")
diff --git a/src/vnet/tunnel/tunnel_types_api.c b/src/vnet/tunnel/tunnel_types_api.c
index 1d48178..894eecb 100644
--- a/src/vnet/tunnel/tunnel_types_api.c
+++ b/src/vnet/tunnel/tunnel_types_api.c
@@ -17,6 +17,8 @@
 
 #include <vnet/api_errno.h>
 #include <vnet/tunnel/tunnel_types_api.h>
+#include <vnet/ip/ip_types_api.h>
+#include <vnet/fib/fib_table.h>
 
 #include <vnet/tunnel/tunnel_types.api_enum.h>
 #include <vnet/tunnel/tunnel_types.api_types.h>
@@ -25,12 +27,14 @@
 STATIC_ASSERT (sizeof (vl_api_tunnel_encap_decap_flags_t) ==
 	       sizeof (tunnel_encap_decap_flags_t),
 	       "tunnel API and internal flags enum size differ");
+STATIC_ASSERT (sizeof (vl_api_tunnel_flags_t) == sizeof (tunnel_flags_t),
+	       "tunnel API and internal flags enum size differ");
 
 int
 tunnel_encap_decap_flags_decode (vl_api_tunnel_encap_decap_flags_t f,
 				 tunnel_encap_decap_flags_t * o)
 {
-  if (f & ~TUNNEL_FLAG_MASK)
+  if (f & ~TUNNEL_ENCAP_DECAP_FLAG_MASK)
     /* unknown flags set */
     return (VNET_API_ERROR_INVALID_VALUE_2);
 
@@ -45,6 +49,23 @@
 }
 
 int
+tunnel_flags_decode (vl_api_tunnel_flags_t f, tunnel_flags_t *o)
+{
+  if (f & ~TUNNEL_FLAG_MASK)
+    /* unknown flags set */
+    return (VNET_API_ERROR_INVALID_VALUE_2);
+
+  *o = (tunnel_flags_t) f;
+  return (0);
+}
+
+vl_api_tunnel_flags_t
+tunnel_flags_encode (tunnel_flags_t f)
+{
+  return ((vl_api_tunnel_flags_t) f);
+}
+
+int
 tunnel_mode_decode (vl_api_tunnel_mode_t in, tunnel_mode_t * out)
 {
   switch (in)
@@ -78,6 +99,65 @@
   return (out);
 }
 
+int
+tunnel_decode (const vl_api_tunnel_t *in, tunnel_t *out)
+{
+  int rv;
+
+  ip_address_decode2 (&in->src, &out->t_src);
+  ip_address_decode2 (&in->dst, &out->t_dst);
+
+  if (ip_addr_version (&out->t_src) != ip_addr_version (&out->t_dst))
+    return (VNET_API_ERROR_INVALID_PROTOCOL);
+
+  if (0 == ip_address_cmp (&out->t_src, &out->t_dst))
+    return (VNET_API_ERROR_SAME_SRC_DST);
+
+  rv = tunnel_encap_decap_flags_decode (in->encap_decap_flags,
+					&out->t_encap_decap_flags);
+
+  if (rv)
+    return (rv);
+
+  rv = tunnel_mode_decode (in->mode, &out->t_mode);
+
+  if (rv)
+    return (rv);
+
+  rv = tunnel_flags_decode (in->flags, &out->t_flags);
+
+  if (rv)
+    return (rv);
+
+  out->t_table_id = clib_net_to_host_u32 (in->table_id);
+  out->t_fib_index = fib_table_find (
+    ip_address_family_to_fib_proto (ip_addr_version (&out->t_dst)),
+    out->t_table_id);
+
+  if (~0 == out->t_fib_index)
+    return (VNET_API_ERROR_NO_SUCH_FIB);
+
+  out->t_dscp = ip_dscp_decode (in->dscp);
+  out->t_hop_limit = in->hop_limit;
+
+  return (0);
+}
+
+void
+tunnel_encode (const tunnel_t *in, vl_api_tunnel_t *out)
+{
+  ip_address_encode2 (&in->t_src, &out->src);
+  ip_address_encode2 (&in->t_dst, &out->dst);
+
+  out->encap_decap_flags =
+    tunnel_encap_decap_flags_encode (in->t_encap_decap_flags);
+  out->mode = tunnel_mode_encode (in->t_mode);
+  out->flags = tunnel_flags_encode (in->t_flags);
+  out->table_id = clib_host_to_net_u32 (in->t_table_id);
+  out->dscp = ip_dscp_encode (in->t_dscp);
+  out->hop_limit = in->t_hop_limit;
+}
+
 /*
  * fd.io coding-style-patch-verification: ON
  *
diff --git a/src/vnet/tunnel/tunnel_types_api.h b/src/vnet/tunnel/tunnel_types_api.h
index 06439dc..0d32a68 100644
--- a/src/vnet/tunnel/tunnel_types_api.h
+++ b/src/vnet/tunnel/tunnel_types_api.h
@@ -36,6 +36,12 @@
 extern int tunnel_mode_decode (vl_api_tunnel_mode_t in, tunnel_mode_t * out);
 extern vl_api_tunnel_mode_t tunnel_mode_encode (tunnel_mode_t in);
 
+extern int tunnel_flags_decode (vl_api_tunnel_flags_t in, tunnel_flags_t *out);
+extern vl_api_tunnel_flags_t tunnel_flags_encode (tunnel_flags_t in);
+
+extern int tunnel_decode (const vl_api_tunnel_t *in, tunnel_t *out);
+extern void tunnel_encode (const tunnel_t *in, vl_api_tunnel_t *out);
+
 #endif
 
 /*