VPP-1277: IPIP - Copy TOS/TC from inner packet to outer.
Add support for either copying TOS/TC from inner packet to outer,
or set to fixed value.
Change-Id: I716a95f875349acec94317b266c8cf9f2f81a785
Signed-off-by: Ole Troan <ot@cisco.com>
diff --git a/src/vnet/ipip/ipip.api b/src/vnet/ipip/ipip.api
index 988eee5..95fc48a 100644
--- a/src/vnet/ipip/ipip.api
+++ b/src/vnet/ipip/ipip.api
@@ -13,17 +13,46 @@
* limitations under the License.
*/
-option version = "1.0.0";
+/**
+ * The IPIP module implements IP{v4,v6} over IP{v4,v6} tunnelling as
+ * described in RFC2473 and to some extent the largely historical
+ * RFC1853. The module also supports an IPv4 over IPv6 automatic
+ * tunnelling mechanism called 6RD (RFC5969).
+ *
+ * The IPIP API module supports a CRD model for adding, deleting and
+ * listing tunnels. A tunnel is represented as an interface in
+ * VPP. The "handle" representing a tunnel is the sw_if_index. As any
+ * interface, the user must configure an IPv4 and/or IPv6 address on
+ * the interface. This is the inner or payload protocol.
+ *
+ * Tunnel MTU: The tunnel MTU (the payload MTU) is configurable per
+ * protocol. If a tunnel MTU is larger than the path MTU, the outer
+ * packet will be fragmented. Fragmentation support is configurable,
+ * as it can have severe performance issues, and might be used as an
+ * attack vector (the remote side must reassemble.)
+ *
+ * Traffic class / TOS field can either be configured to a fixed
+ * value, or can be copied from the inner to the outer header.
+ * (For now we have stolen ~0 to indicate copy).
+ *
+ * Note:
+ *
+ * - The Tunnel encapsulation limit described in RFC2473 is not
+ * implemented.
+ *
+ * - ICMP proxying, as in a tunnel head-end receiving ICMP erors on
+ * the outer packet is currently not relayed to the original source
+ * of the packet.
+ *
+ * - PMTUD / MTU probing and tunnel keepalives are not yet implemented.
+ *
+ */
-/** \brief Create or delete an IPIP tunnel
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param is_ipv6 - Use 0 for IPv4, 1 for IPv6
- @param instance - optional unique custom device instance, else ~0.
- @param src_address - Source IP address
- @param dst_address - Destination IP address, can be multicast
- @param fib_index - Encap FIB table ID
-*/
+option version = "1.1.0";
+
+/**
+ * Create an IP{v4,v6} over IP{v4,v6} tunnel.
+ */
define ipip_add_tunnel
{
u32 client_index;
@@ -33,6 +62,8 @@
u8 src_address[16];
u8 dst_address[16];
u32 fib_index;
+ u8 tc_tos; /* If ~0, the TOS/TC value is copied from
+ inner packet, otherwise set to value */
};
define ipip_add_tunnel_reply
@@ -42,6 +73,9 @@
u32 sw_if_index;
};
+/**
+ * Delete an IP{v4,v6} over IP{v4,v6} tunnel.
+ */
autoreply define ipip_del_tunnel
{
u32 client_index;
@@ -49,6 +83,9 @@
u32 sw_if_index;
};
+/**
+ * Create an IPv4 over IPv6 automatic tunnel (6RD)
+ */
define ipip_6rd_add_tunnel
{
u32 client_index;
@@ -60,6 +97,8 @@
u8 ip6_prefix_len;
u8 ip4_prefix_len;
u8 security_check;
+ u8 tc_tos; /* If ~0, the TOS/TC value is copied from
+ inner packet, otherwise set to value */
};
define ipip_6rd_add_tunnel_reply
@@ -69,6 +108,9 @@
u32 sw_if_index;
};
+/**
+ * Delete an IPv4 over IPv6 automatic tunnel (6RD)
+ */
autoreply define ipip_6rd_del_tunnel
{
u32 client_index;
@@ -76,6 +118,9 @@
u32 sw_if_index;
};
+/**
+ * List all IPIP tunnels
+ */
define ipip_tunnel_dump
{
u32 client_index;
@@ -92,6 +137,7 @@
u8 src_address[16];
u8 dst_address[16];
u32 fib_index;
+ u8 tc_tos;
};
/*
diff --git a/src/vnet/ipip/ipip.c b/src/vnet/ipip/ipip.c
index 82c961c..c39d27d 100644
--- a/src/vnet/ipip/ipip.c
+++ b/src/vnet/ipip/ipip.c
@@ -74,6 +74,8 @@
ip4->src_address.as_u32 = t->tunnel_src.ip4.as_u32;
ip4->dst_address.as_u32 = t->tunnel_dst.ip4.as_u32;
ip4->checksum = ip4_header_checksum (ip4);
+ if (t->tc_tos != 0xFF)
+ ip4->tos = t->tc_tos;
break;
case IPIP_TRANSPORT_IP6:
@@ -81,6 +83,8 @@
ip6 = (ip6_header_t *) rewrite;
ip6->ip_version_traffic_class_and_flow_label =
clib_host_to_net_u32 (6 << 28);
+ if (t->tc_tos != 0xFF)
+ ip6_set_traffic_class_network_order (ip6, t->tc_tos);
ip6->hop_limit = 64;
/* fixup ip6 header length and protocol after-the-fact */
ip6->src_address.as_u64[0] = t->tunnel_src.ip6.as_u64[0];
@@ -88,6 +92,7 @@
ip6->dst_address.as_u64[0] = t->tunnel_dst.ip6.as_u64[0];
ip6->dst_address.as_u64[1] = t->tunnel_dst.ip6.as_u64[1];
break;
+
default:
/* pass through */
;
@@ -100,11 +105,29 @@
const void *data)
{
ip4_header_t *ip4;
+ const ipip_tunnel_t *t = data;
ip4 = vlib_buffer_get_current (b);
ip4->length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b));
- ip4->protocol =
- adj->ia_link == VNET_LINK_IP6 ? IP_PROTOCOL_IPV6 : IP_PROTOCOL_IP_IN_IP;
+ switch (adj->ia_link)
+ {
+ case VNET_LINK_IP6:
+ ip4->protocol = IP_PROTOCOL_IPV6;
+ if (t->tc_tos == 0xFF)
+ ip4->tos =
+ ip6_traffic_class_network_order ((const ip6_header_t *) (ip4 + 1));
+ break;
+
+ case VNET_LINK_IP4:
+ ip4->protocol = IP_PROTOCOL_IP_IN_IP;
+ if (t->tc_tos == 0xFF)
+ ip4->tos = ((ip4_header_t *) (ip4 + 1))->tos;
+ break;
+
+ default:
+ break;
+ }
+
ip4->checksum = ip4_header_checksum (ip4);
}
@@ -113,13 +136,32 @@
const void *data)
{
ip6_header_t *ip6;
+ const ipip_tunnel_t *t = data;
ip6 = vlib_buffer_get_current (b);
ip6->payload_length =
clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b) -
sizeof (*ip6));
- ip6->protocol =
- adj->ia_link == VNET_LINK_IP6 ? IP_PROTOCOL_IPV6 : IP_PROTOCOL_IP_IN_IP;
+ switch (adj->ia_link)
+ {
+ case VNET_LINK_IP6:
+ ip6->protocol = IP_PROTOCOL_IPV6;
+ if (t->tc_tos == 0xFF)
+ ip6_set_traffic_class_network_order (ip6,
+ ip6_traffic_class_network_order ((const ip6_header_t *) (ip6 + 1)));
+ break;
+
+ case VNET_LINK_IP4:
+ ip6->protocol = IP_PROTOCOL_IP_IN_IP;
+ if (t->tc_tos == 0xFF)
+ ip6_set_traffic_class_network_order (ip6,
+ ((ip4_header_t *) (ip6 +
+ 1))->tos);
+ break;
+
+ default:
+ break;
+ }
}
static void
@@ -216,7 +258,7 @@
f = t->transport == IPIP_TRANSPORT_IP6 ? ipip6_fixup : ipip4_fixup;
- adj_nbr_midchain_update_rewrite (ai, f, NULL,
+ adj_nbr_midchain_update_rewrite (ai, f, t,
(VNET_LINK_ETHERNET ==
adj_get_link_type (ai) ?
ADJ_FLAG_MIDCHAIN_NO_COUNT :
@@ -420,7 +462,7 @@
int
ipip_add_tunnel (ipip_transport_t transport,
u32 instance, ip46_address_t * src, ip46_address_t * dst,
- u32 fib_index, u32 * sw_if_indexp)
+ u32 fib_index, u8 tc_tos, u32 * sw_if_indexp)
{
ipip_main_t *gm = &ipip_main;
vnet_main_t *vnm = gm->vnet_main;
@@ -467,6 +509,7 @@
t->hw_if_index = hw_if_index;
t->fib_index = fib_index;
t->sw_if_index = sw_if_index;
+ t->tc_tos = tc_tos;
t->transport = transport;
vec_validate_init_empty (gm->tunnel_index_by_sw_if_index, sw_if_index, ~0);
diff --git a/src/vnet/ipip/ipip.h b/src/vnet/ipip/ipip.h
index f52094f..6afb188 100644
--- a/src/vnet/ipip/ipip.h
+++ b/src/vnet/ipip/ipip.h
@@ -81,6 +81,7 @@
u32 sw_if_index;
u32 dev_instance; /* Real device instance in tunnel vector */
u32 user_instance; /* Instance name being shown to user */
+ u8 tc_tos;
union
{
@@ -148,7 +149,7 @@
int ipip_add_tunnel (ipip_transport_t transport, u32 instance,
ip46_address_t * src, ip46_address_t * dst,
- u32 fib_index, u32 * sw_if_indexp);
+ u32 fib_index, u8 tc_tos, u32 * sw_if_indexp);
int ipip_del_tunnel (u32 sw_if_index);
int sixrd_add_tunnel (ip6_address_t * ip6_prefix, u8 ip6_prefix_len,
ip4_address_t * ip4_prefix, u8 ip4_prefix_len,
diff --git a/src/vnet/ipip/ipip_api.c b/src/vnet/ipip/ipip_api.c
index a663329..455792b 100644
--- a/src/vnet/ipip/ipip_api.c
+++ b/src/vnet/ipip/ipip_api.c
@@ -69,7 +69,7 @@
rv = ipip_add_tunnel (mp->is_ipv6 ? IPIP_TRANSPORT_IP6 : IPIP_TRANSPORT_IP4,
ntohl (mp->instance), &src, &dst,
- ntohl (mp->fib_index), &sw_if_index);
+ ntohl (mp->fib_index), mp->tc_tos, &sw_if_index);
/* *INDENT-OFF* */
REPLY_MACRO2(VL_API_IPIP_ADD_TUNNEL_REPLY,
diff --git a/src/vnet/ipip/ipip_cli.c b/src/vnet/ipip/ipip_cli.c
index 45e6451..7a68c20 100644
--- a/src/vnet/ipip/ipip_cli.c
+++ b/src/vnet/ipip/ipip_cli.c
@@ -72,6 +72,7 @@
&src,
&dst,
fib_index,
+ 0,
&sw_if_index);
switch (rv) {