ipsec: enable UDP encap for IPv6 ESP tun protect

Type: improvement

If an SA protecting an IPv6 tunnel interface has UDP encapsulation
enabled, the code in esp_encrypt_inline() inserts a UDP header but does
not set the next protocol or the UDP payload length, so the peer that
receives the packet drops it. Set the next protocol field and the UDP
payload length correctly.

The port(s) for UDP encapsulation of IPsec was not registered for IPv6.
Add this registration for IPv6 SAs when UDP encapsulation is enabled.

Add punt handling for IPv6 IKE on NAT-T port.
Add registration of linux-cp for the new punt reason.
Add unit tests of IPv6 ESP w/ UDP encapsulation on tun protect

Signed-off-by: Matthew Smith <mgsmith@netgate.com>
Change-Id: Ibb28e423ab8c7bcea2c1964782a788a0f4da5268
diff --git a/src/vnet/ipsec/esp_encrypt.c b/src/vnet/ipsec/esp_encrypt.c
index d28f4f5..4ed3bf7 100644
--- a/src/vnet/ipsec/esp_encrypt.c
+++ b/src/vnet/ipsec/esp_encrypt.c
@@ -887,42 +887,40 @@
 	  else
 	    l2_len = 0;
 
+	  u16 len;
+	  len = payload_len_total + hdr_len - l2_len;
+
 	  if (VNET_LINK_IP6 == lt)
 	    {
 	      ip6_header_t *ip6 = (ip6_header_t *) (old_ip_hdr);
 	      if (PREDICT_TRUE (NULL == ext_hdr))
 		{
 		  *next_hdr_ptr = ip6->protocol;
-		  ip6->protocol = IP_PROTOCOL_IPSEC_ESP;
+		  ip6->protocol =
+		    (udp) ? IP_PROTOCOL_UDP : IP_PROTOCOL_IPSEC_ESP;
 		}
 	      else
 		{
 		  *next_hdr_ptr = ext_hdr->next_hdr;
-		  ext_hdr->next_hdr = IP_PROTOCOL_IPSEC_ESP;
+		  ext_hdr->next_hdr =
+		    (udp) ? IP_PROTOCOL_UDP : IP_PROTOCOL_IPSEC_ESP;
 		}
 	      ip6->payload_length =
-		clib_host_to_net_u16 (payload_len_total + hdr_len - l2_len -
-				      sizeof (ip6_header_t));
+		clib_host_to_net_u16 (len - sizeof (ip6_header_t));
 	    }
 	  else if (VNET_LINK_IP4 == lt)
 	    {
-	      u16 len;
 	      ip4_header_t *ip4 = (ip4_header_t *) (old_ip_hdr);
 	      *next_hdr_ptr = ip4->protocol;
-	      len = payload_len_total + hdr_len - l2_len;
-	      if (udp)
-		{
-		  esp_update_ip4_hdr (ip4, len, /* is_transport */ 1, 1);
-		  udp_len = len - ip_len;
-		}
-	      else
-		esp_update_ip4_hdr (ip4, len, /* is_transport */ 1, 0);
+	      esp_update_ip4_hdr (ip4, len, /* is_transport */ 1,
+				  (udp != NULL));
 	    }
 
 	  clib_memcpy_le64 (ip_hdr, old_ip_hdr, ip_len);
 
 	  if (udp)
 	    {
+	      udp_len = len - ip_len;
 	      esp_fill_udp_hdr (sa0, udp, udp_len);
 	    }
 
diff --git a/src/vnet/ipsec/ipsec.c b/src/vnet/ipsec/ipsec.c
index 3c22fbb..2dd077a 100644
--- a/src/vnet/ipsec/ipsec.c
+++ b/src/vnet/ipsec/ipsec.c
@@ -182,14 +182,24 @@
   *out_next_index = vlib_node_add_next (vm, prev_node->index, node->index);
 }
 
+static inline uword
+ipsec_udp_registration_key (u16 port, u8 is_ip4)
+{
+  uword key = (is_ip4) ? AF_IP4 : AF_IP6;
+
+  key |= (uword) (port << 16);
+  return key;
+}
+
 void
-ipsec_unregister_udp_port (u16 port)
+ipsec_unregister_udp_port (u16 port, u8 is_ip4)
 {
   ipsec_main_t *im = &ipsec_main;
   u32 n_regs;
-  uword *p;
+  uword *p, key;
 
-  p = hash_get (im->udp_port_registrations, port);
+  key = ipsec_udp_registration_key (port, is_ip4);
+  p = hash_get (im->udp_port_registrations, key);
 
   ASSERT (p);
 
@@ -197,33 +207,35 @@
 
   if (0 == --n_regs)
     {
-      udp_unregister_dst_port (vlib_get_main (), port, 1);
-      hash_unset (im->udp_port_registrations, port);
+      udp_unregister_dst_port (vlib_get_main (), port, is_ip4);
+      hash_unset (im->udp_port_registrations, key);
     }
   else
     {
-      hash_unset (im->udp_port_registrations, port);
-      hash_set (im->udp_port_registrations, port, n_regs);
+      hash_unset (im->udp_port_registrations, key);
+      hash_set (im->udp_port_registrations, key, n_regs);
     }
 }
 
 void
-ipsec_register_udp_port (u16 port)
+ipsec_register_udp_port (u16 port, u8 is_ip4)
 {
   ipsec_main_t *im = &ipsec_main;
-  u32 n_regs;
-  uword *p;
+  u32 n_regs, node_index;
+  uword *p, key;
 
-  p = hash_get (im->udp_port_registrations, port);
+  key = ipsec_udp_registration_key (port, is_ip4);
+  node_index =
+    (is_ip4) ? ipsec4_tun_input_node.index : ipsec6_tun_input_node.index;
+  p = hash_get (im->udp_port_registrations, key);
 
   n_regs = (p ? p[0] : 0);
 
   if (0 == n_regs++)
-    udp_register_dst_port (vlib_get_main (), port,
-			   ipsec4_tun_input_node.index, 1);
+    udp_register_dst_port (vlib_get_main (), port, node_index, is_ip4);
 
-  hash_unset (im->udp_port_registrations, port);
-  hash_set (im->udp_port_registrations, port, n_regs);
+  hash_unset (im->udp_port_registrations, key);
+  hash_set (im->udp_port_registrations, key, n_regs);
 }
 
 u32
diff --git a/src/vnet/ipsec/ipsec.h b/src/vnet/ipsec/ipsec.h
index fc7b6cd..06bb299 100644
--- a/src/vnet/ipsec/ipsec.h
+++ b/src/vnet/ipsec/ipsec.h
@@ -364,8 +364,8 @@
 clib_error_t *ipsec_rsc_in_use (ipsec_main_t * im);
 void ipsec_set_async_mode (u32 is_enabled);
 
-extern void ipsec_register_udp_port (u16 udp_port);
-extern void ipsec_unregister_udp_port (u16 udp_port);
+extern void ipsec_register_udp_port (u16 udp_port, u8 is_ip4);
+extern void ipsec_unregister_udp_port (u16 udp_port, u8 is_ip4);
 
 extern clib_error_t *ipsec_register_next_header (vlib_main_t *vm,
 						 u8 next_header,
diff --git a/src/vnet/ipsec/ipsec_punt.h b/src/vnet/ipsec/ipsec_punt.h
index afed908..9b9fc80 100644
--- a/src/vnet/ipsec/ipsec_punt.h
+++ b/src/vnet/ipsec/ipsec_punt.h
@@ -20,7 +20,8 @@
 #define foreach_ipsec_punt_reason                                             \
   _ (IP4_SPI_UDP_0, "ipsec4-spi-o-udp-0", IP4_PACKET)                         \
   _ (IP4_NO_SUCH_TUNNEL, "ipsec4-no-such-tunnel", IP4_PACKET)                 \
-  _ (IP6_NO_SUCH_TUNNEL, "ipsec6-no-such-tunnel", IP6_PACKET)
+  _ (IP6_NO_SUCH_TUNNEL, "ipsec6-no-such-tunnel", IP6_PACKET)                 \
+  _ (IP6_SPI_UDP_0, "ipsec6-spi-o-udp-0", IP6_PACKET)
 
 typedef enum ipsec_punt_reason_t_
 {
diff --git a/src/vnet/ipsec/ipsec_sa.c b/src/vnet/ipsec/ipsec_sa.c
index 387d8a7..5c80545 100644
--- a/src/vnet/ipsec/ipsec_sa.c
+++ b/src/vnet/ipsec/ipsec_sa.c
@@ -325,7 +325,8 @@
 	sa->udp_hdr.src_port = clib_host_to_net_u16 (src_port);
 
       if (ipsec_sa_is_set_IS_INBOUND (sa))
-	ipsec_register_udp_port (clib_host_to_net_u16 (sa->udp_hdr.dst_port));
+	ipsec_register_udp_port (clib_host_to_net_u16 (sa->udp_hdr.dst_port),
+				 !ipsec_sa_is_set_IS_TUNNEL_V6 (sa));
     }
 
   hash_set (im->sa_index_by_sa_id, sa->id, sa_index);
@@ -353,7 +354,8 @@
   if (ipsec_sa_is_set_IS_ASYNC (sa))
     vnet_crypto_request_async_mode (0);
   if (ipsec_sa_is_set_UDP_ENCAP (sa) && ipsec_sa_is_set_IS_INBOUND (sa))
-    ipsec_unregister_udp_port (clib_net_to_host_u16 (sa->udp_hdr.dst_port));
+    ipsec_unregister_udp_port (clib_net_to_host_u16 (sa->udp_hdr.dst_port),
+			       !ipsec_sa_is_set_IS_TUNNEL_V6 (sa));
 
   if (ipsec_sa_is_set_IS_TUNNEL (sa) && !ipsec_sa_is_set_IS_INBOUND (sa))
     dpo_reset (&sa->dpo);
diff --git a/src/vnet/ipsec/ipsec_tun.c b/src/vnet/ipsec/ipsec_tun.c
index 543be8a..82f5a11 100644
--- a/src/vnet/ipsec/ipsec_tun.c
+++ b/src/vnet/ipsec/ipsec_tun.c
@@ -101,14 +101,12 @@
   if (0 == ipsec_tun_node_regs[af]++)
     {
       if (AF_IP4 == af)
-	{
-	  ipsec_register_udp_port (UDP_DST_PORT_ipsec);
-	  ip4_register_protocol (IP_PROTOCOL_IPSEC_ESP,
-				 ipsec4_tun_input_node.index);
-	}
+	ip4_register_protocol (IP_PROTOCOL_IPSEC_ESP,
+			       ipsec4_tun_input_node.index);
       else
 	ip6_register_protocol (IP_PROTOCOL_IPSEC_ESP,
 			       ipsec6_tun_input_node.index);
+      ipsec_register_udp_port (UDP_DST_PORT_ipsec, (AF_IP4 == af));
     }
 }
 
@@ -119,12 +117,10 @@
   if (0 == --ipsec_tun_node_regs[af])
     {
       if (AF_IP4 == af)
-	{
-	  ipsec_unregister_udp_port (UDP_DST_PORT_ipsec);
-	  ip4_unregister_protocol (IP_PROTOCOL_IPSEC_ESP);
-	}
+	ip4_unregister_protocol (IP_PROTOCOL_IPSEC_ESP);
       else
 	ip6_unregister_protocol (IP_PROTOCOL_IPSEC_ESP);
+      ipsec_unregister_udp_port (UDP_DST_PORT_ipsec, (AF_IP4 == af));
     }
 }
 
diff --git a/src/vnet/ipsec/ipsec_tun_in.c b/src/vnet/ipsec/ipsec_tun_in.c
index 8e97fbc..eec0362 100644
--- a/src/vnet/ipsec/ipsec_tun_in.c
+++ b/src/vnet/ipsec/ipsec_tun_in.c
@@ -86,11 +86,21 @@
 }
 
 always_inline u16
-ipsec_ip6_if_no_tunnel (vlib_node_runtime_t * node,
-			vlib_buffer_t * b, const esp_header_t * esp)
+ipsec_ip6_if_no_tunnel (vlib_node_runtime_t *node, vlib_buffer_t *b,
+			const esp_header_t *esp, const ip6_header_t *ip6)
 {
-  b->error = node->errors[IPSEC_TUN_ERROR_NO_TUNNEL];
-  b->punt_reason = ipsec_punt_reason[IPSEC_PUNT_IP6_NO_SUCH_TUNNEL];
+  if (PREDICT_FALSE (0 == esp->spi))
+    {
+      b->error = node->errors[IPSEC_TUN_ERROR_SPI_0];
+      b->punt_reason = ipsec_punt_reason[(ip6->protocol == IP_PROTOCOL_UDP ?
+						  IPSEC_PUNT_IP6_SPI_UDP_0 :
+						  IPSEC_PUNT_IP6_NO_SUCH_TUNNEL)];
+    }
+  else
+    {
+      b->error = node->errors[IPSEC_TUN_ERROR_NO_TUNNEL];
+      b->punt_reason = ipsec_punt_reason[IPSEC_PUNT_IP6_NO_SUCH_TUNNEL];
+    }
 
   return VNET_DEVICE_INPUT_NEXT_PUNT;
 }
@@ -164,8 +174,35 @@
       if (is_ip6)
 	{
 	  ip60 = (ip6_header_t *) ip40;
-	  esp0 = (esp_header_t *) (ip60 + 1);
-	  buf_rewind0 = hdr_sz0 = sizeof (ip6_header_t);
+	  if (ip60->protocol == IP_PROTOCOL_UDP)
+	    {
+	      /* NAT UDP port 4500 case, don't advance any more */
+	      esp0 = (esp_header_t *) ((u8 *) ip60 + sizeof (ip6_header_t) +
+				       sizeof (udp_header_t));
+	      hdr_sz0 = 0;
+	      buf_rewind0 = sizeof (ip6_header_t) + sizeof (udp_header_t);
+
+	      const udp_header_t *udp0 =
+		(udp_header_t *) ((u8 *) ip60 + sizeof (ip6_header_t));
+
+	      /* length 9 = sizeof(udp_header) + 1 byte of special SPI */
+	      if (clib_net_to_host_u16 (udp0->length) == 9 &&
+		  esp0->spi_bytes[0] == 0xff)
+		{
+		  b[0]->error = node->errors[IPSEC_TUN_ERROR_NAT_KEEPALIVE];
+
+		  next[0] = VNET_DEVICE_INPUT_NEXT_IP6_DROP;
+		  len0 = 0;
+
+		  vlib_buffer_advance (b[0], -buf_rewind0);
+		  goto trace00;
+		}
+	    }
+	  else
+	    {
+	      esp0 = (esp_header_t *) (ip60 + 1);
+	      buf_rewind0 = hdr_sz0 = sizeof (ip6_header_t);
+	    }
 	}
       else
 	{
@@ -240,7 +277,7 @@
 		}
 	      else
 		{
-		  next[0] = ipsec_ip6_if_no_tunnel (node, b[0], esp0);
+		  next[0] = ipsec_ip6_if_no_tunnel (node, b[0], esp0, ip60);
 		  n_no_tunnel++;
 		  goto trace00;
 		}