ipsec: support UDP encap/decap for NAT traversal

Change-Id: I65c12617ad49e4d5ef242e53988782f0cefa5684
Signed-off-by: Klement Sekera <ksekera@cisco.com>
diff --git a/src/vnet/ipsec/esp.h b/src/vnet/ipsec/esp.h
index 8f7f8fa..73d4326 100644
--- a/src/vnet/ipsec/esp.h
+++ b/src/vnet/ipsec/esp.h
@@ -44,6 +44,14 @@
 
 /* *INDENT-OFF* */
 typedef CLIB_PACKED (struct {
+  ip4_header_t ip4;
+  udp_header_t udp;
+  esp_header_t esp;
+}) ip4_and_udp_and_esp_header_t;
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct {
   ip6_header_t ip6;
   esp_header_t esp;
 }) ip6_and_esp_header_t;
diff --git a/src/vnet/ipsec/esp_encrypt.c b/src/vnet/ipsec/esp_encrypt.c
index 44a0451..0ce5e54 100644
--- a/src/vnet/ipsec/esp_encrypt.c
+++ b/src/vnet/ipsec/esp_encrypt.c
@@ -18,6 +18,7 @@
 #include <vnet/vnet.h>
 #include <vnet/api_errno.h>
 #include <vnet/ip/ip.h>
+#include <vnet/udp/udp.h>
 
 #include <vnet/ipsec/ipsec.h>
 #include <vnet/ipsec/esp.h>
@@ -65,6 +66,7 @@
 {
   u32 spi;
   u32 seq;
+  u8 udp_encap;
   ipsec_crypto_alg_t crypto_alg;
   ipsec_integ_alg_t integ_alg;
 } esp_encrypt_trace_t;
@@ -77,10 +79,11 @@
   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
   esp_encrypt_trace_t *t = va_arg (*args, esp_encrypt_trace_t *);
 
-  s = format (s, "esp: spi %u seq %u crypto %U integrity %U",
+  s = format (s, "esp: spi %u seq %u crypto %U integrity %U%s",
 	      t->spi, t->seq,
 	      format_ipsec_crypto_alg, t->crypto_alg,
-	      format_ipsec_integ_alg, t->integ_alg);
+	      format_ipsec_integ_alg, t->integ_alg,
+	      t->udp_encap ? " udp-encap-enabled" : "");
   return s;
 }
 
@@ -155,13 +158,14 @@
 	  vlib_buffer_t *i_b0, *o_b0 = 0;
 	  u32 sa_index0;
 	  ipsec_sa_t *sa0;
-	  ip4_and_esp_header_t *ih0, *oh0 = 0;
+	  ip4_and_esp_header_t *oh0 = 0;
 	  ip6_and_esp_header_t *ih6_0, *oh6_0 = 0;
+	  ip4_and_udp_and_esp_header_t *iuh0, *ouh0 = 0;
 	  uword last_empty_buffer;
 	  esp_header_t *o_esp0;
 	  esp_footer_t *f0;
 	  u8 is_ipv6;
-	  u8 ip_hdr_size;
+	  u8 ip_udp_hdr_size;
 	  u8 next_hdr_type;
 	  u32 ip_proto = 0;
 	  u8 transport_mode = 0;
@@ -198,7 +202,7 @@
 	  o_b0 = vlib_get_buffer (vm, o_bi0);
 	  o_b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID;
 	  o_b0->current_data = sizeof (ethernet_header_t);
-	  ih0 = vlib_buffer_get_current (i_b0);
+	  iuh0 = vlib_buffer_get_current (i_b0);
 	  vlib_prefetch_buffer_with_index (vm,
 					   empty_buffers[last_empty_buffer -
 							 1], STORE);
@@ -211,18 +215,18 @@
 
 	  /* is ipv6 */
 	  if (PREDICT_FALSE
-	      ((ih0->ip4.ip_version_and_header_length & 0xF0) == 0x60))
+	      ((iuh0->ip4.ip_version_and_header_length & 0xF0) == 0x60))
 	    {
 	      is_ipv6 = 1;
 	      ih6_0 = vlib_buffer_get_current (i_b0);
-	      ip_hdr_size = sizeof (ip6_header_t);
 	      next_hdr_type = IP_PROTOCOL_IPV6;
 	      oh6_0 = vlib_buffer_get_current (o_b0);
-	      o_esp0 = vlib_buffer_get_current (o_b0) + sizeof (ip6_header_t);
 
 	      oh6_0->ip6.ip_version_traffic_class_and_flow_label =
 		ih6_0->ip6.ip_version_traffic_class_and_flow_label;
 	      oh6_0->ip6.protocol = IP_PROTOCOL_IPSEC_ESP;
+	      ip_udp_hdr_size = sizeof (ip6_header_t);
+	      o_esp0 = vlib_buffer_get_current (o_b0) + ip_udp_hdr_size;
 	      oh6_0->ip6.hop_limit = 254;
 	      oh6_0->ip6.src_address.as_u64[0] =
 		ih6_0->ip6.src_address.as_u64[0];
@@ -232,8 +236,8 @@
 		ih6_0->ip6.dst_address.as_u64[0];
 	      oh6_0->ip6.dst_address.as_u64[1] =
 		ih6_0->ip6.dst_address.as_u64[1];
-	      oh6_0->esp.spi = clib_net_to_host_u32 (sa0->spi);
-	      oh6_0->esp.seq = clib_net_to_host_u32 (sa0->seq);
+	      o_esp0->spi = clib_net_to_host_u32 (sa0->spi);
+	      o_esp0->seq = clib_net_to_host_u32 (sa0->seq);
 	      ip_proto = ih6_0->ip6.protocol;
 
 	      next0 = ESP_ENCRYPT_NEXT_IP6_LOOKUP;
@@ -241,22 +245,37 @@
 	  else
 	    {
 	      is_ipv6 = 0;
-	      ip_hdr_size = sizeof (ip4_header_t);
 	      next_hdr_type = IP_PROTOCOL_IP_IN_IP;
 	      oh0 = vlib_buffer_get_current (o_b0);
-	      o_esp0 = vlib_buffer_get_current (o_b0) + sizeof (ip4_header_t);
+	      ouh0 = vlib_buffer_get_current (o_b0);
 
 	      oh0->ip4.ip_version_and_header_length = 0x45;
-	      oh0->ip4.tos = ih0->ip4.tos;
+	      oh0->ip4.tos = iuh0->ip4.tos;
 	      oh0->ip4.fragment_id = 0;
 	      oh0->ip4.flags_and_fragment_offset = 0;
 	      oh0->ip4.ttl = 254;
-	      oh0->ip4.protocol = IP_PROTOCOL_IPSEC_ESP;
-	      oh0->ip4.src_address.as_u32 = ih0->ip4.src_address.as_u32;
-	      oh0->ip4.dst_address.as_u32 = ih0->ip4.dst_address.as_u32;
-	      oh0->esp.spi = clib_net_to_host_u32 (sa0->spi);
-	      oh0->esp.seq = clib_net_to_host_u32 (sa0->seq);
-	      ip_proto = ih0->ip4.protocol;
+	      if (sa0->udp_encap)
+		{
+		  ouh0->udp.src_port =
+		    clib_host_to_net_u16 (UDP_DST_PORT_ipsec);
+		  ouh0->udp.dst_port =
+		    clib_host_to_net_u16 (UDP_DST_PORT_ipsec);
+		  ouh0->udp.checksum = 0;
+		  ouh0->ip4.protocol = IP_PROTOCOL_UDP;
+		  ip_udp_hdr_size =
+		    sizeof (udp_header_t) + sizeof (ip4_header_t);
+		}
+	      else
+		{
+		  oh0->ip4.protocol = IP_PROTOCOL_IPSEC_ESP;
+		  ip_udp_hdr_size = sizeof (ip4_header_t);
+		}
+	      o_esp0 = vlib_buffer_get_current (o_b0) + ip_udp_hdr_size;
+	      oh0->ip4.src_address.as_u32 = iuh0->ip4.src_address.as_u32;
+	      oh0->ip4.dst_address.as_u32 = iuh0->ip4.dst_address.as_u32;
+	      o_esp0->spi = clib_net_to_host_u32 (sa0->spi);
+	      o_esp0->seq = clib_net_to_host_u32 (sa0->seq);
+	      ip_proto = iuh0->ip4.protocol;
 
 	      next0 = ESP_ENCRYPT_NEXT_IP4_LOOKUP;
 	    }
@@ -299,7 +318,7 @@
 		  vnet_buffer (o_b0)->sw_if_index[VLIB_TX] =
 		    vnet_buffer (i_b0)->sw_if_index[VLIB_TX];
 		}
-	      vlib_buffer_advance (i_b0, ip_hdr_size);
+	      vlib_buffer_advance (i_b0, ip_udp_hdr_size);
 	    }
 
 	  ASSERT (sa0->crypto_alg < IPSEC_CRYPTO_N_ALG);
@@ -327,7 +346,7 @@
 	      f0->pad_length = pad_bytes;
 	      f0->next_header = next_hdr_type;
 
-	      o_b0->current_length = ip_hdr_size + sizeof (esp_header_t) +
+	      o_b0->current_length = ip_udp_hdr_size + sizeof (esp_header_t) +
 		BLOCK_SIZE * blocks + IV_SIZE;
 
 	      vnet_buffer (o_b0)->sw_if_index[VLIB_RX] =
@@ -338,14 +357,14 @@
 	      RAND_bytes (iv, sizeof (iv));
 
 	      clib_memcpy ((u8 *) vlib_buffer_get_current (o_b0) +
-			   ip_hdr_size + sizeof (esp_header_t), iv,
+			   ip_udp_hdr_size + sizeof (esp_header_t), iv,
 			   em->ipsec_proto_main_crypto_algs[sa0->
 							    crypto_alg].iv_size);
 
 	      esp_encrypt_cbc (sa0->crypto_alg,
 			       (u8 *) vlib_buffer_get_current (i_b0),
 			       (u8 *) vlib_buffer_get_current (o_b0) +
-			       ip_hdr_size + sizeof (esp_header_t) +
+			       ip_udp_hdr_size + sizeof (esp_header_t) +
 			       IV_SIZE, BLOCK_SIZE * blocks,
 			       sa0->crypto_key, iv);
 	    }
@@ -354,7 +373,7 @@
 					     sa0->integ_key_len,
 					     (u8 *) o_esp0,
 					     o_b0->current_length -
-					     ip_hdr_size,
+					     ip_udp_hdr_size,
 					     vlib_buffer_get_current (o_b0) +
 					     o_b0->current_length,
 					     sa0->use_esn, sa0->seq_hi);
@@ -371,6 +390,12 @@
 	      oh0->ip4.length =
 		clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, o_b0));
 	      oh0->ip4.checksum = ip4_header_checksum (&oh0->ip4);
+	      if (sa0->udp_encap)
+		{
+		  ouh0->udp.length =
+		    clib_host_to_net_u16 (oh0->ip4.length -
+					  ip4_header_bytes (&oh0->ip4));
+		}
 	    }
 
 	  if (transport_mode)
@@ -387,6 +412,7 @@
 		    vlib_add_trace (vm, node, o_b0, sizeof (*tr));
 		  tr->spi = sa0->spi;
 		  tr->seq = sa0->seq - 1;
+		  tr->udp_encap = sa0->udp_encap;
 		  tr->crypto_alg = sa0->crypto_alg;
 		  tr->integ_alg = sa0->integ_alg;
 		}
diff --git a/src/vnet/ipsec/ipsec.api b/src/vnet/ipsec/ipsec.api
index 5b8b04d..0765839 100644
--- a/src/vnet/ipsec/ipsec.api
+++ b/src/vnet/ipsec/ipsec.api
@@ -130,6 +130,7 @@
     @param is_tunnel_ipv6 - IPsec tunnel mode is IPv6 if non-zero, else IPv4 tunnel only valid if is_tunnel is non-zero
     @param tunnel_src_address - IPsec tunnel source address IPv6 if is_tunnel_ipv6 is non-zero, else IPv4. Only valid if is_tunnel is non-zero
     @param tunnel_dst_address - IPsec tunnel destination address IPv6 if is_tunnel_ipv6 is non-zero, else IPv4. Only valid if is_tunnel is non-zero
+    @param udp_encap - enable UDP encapsulation for NAT traversal
 
     To be added:
      Anti-replay
@@ -163,6 +164,7 @@
   u8 is_tunnel_ipv6;
   u8 tunnel_src_address[16];
   u8 tunnel_dst_address[16];
+  u8 udp_encap;
 };
 
 /** \brief IPsec: Update Security Association keys
@@ -587,6 +589,7 @@
     @param last_seq_hi - high 32 bits of highest ESN received inbound
     @param replay_window - bit map of seq nums received relative to last_seq if using anti-replay
     @param total_data_size - total bytes sent or received
+    @param udp_encap - 1 if UDP encap enabled, 0 otherwise
 */
 define ipsec_sa_details {
   u32 context;
@@ -618,6 +621,7 @@
   u64 replay_window;
 
   u64 total_data_size;
+  u8 udp_encap;
 };
 
 /** \brief Set key on IPsec interface
diff --git a/src/vnet/ipsec/ipsec.c b/src/vnet/ipsec/ipsec.c
index 4894022..928cafd 100644
--- a/src/vnet/ipsec/ipsec.c
+++ b/src/vnet/ipsec/ipsec.c
@@ -19,12 +19,14 @@
 #include <vnet/api_errno.h>
 #include <vnet/ip/ip.h>
 #include <vnet/interface.h>
+#include <vnet/udp/udp.h>
 
 #include <vnet/ipsec/ipsec.h>
 #include <vnet/ipsec/ikev2.h>
 #include <vnet/ipsec/esp.h>
 #include <vnet/ipsec/ah.h>
 
+
 ipsec_main_t ipsec_main;
 
 u32
@@ -411,7 +413,8 @@
 }
 
 int
-ipsec_add_del_sa (vlib_main_t * vm, ipsec_sa_t * new_sa, int is_add)
+ipsec_add_del_sa (vlib_main_t * vm, ipsec_sa_t * new_sa, int is_add,
+		  u8 udp_encap)
 {
   ipsec_main_t *im = &ipsec_main;
   ipsec_sa_t *sa = 0;
@@ -450,6 +453,7 @@
       pool_get (im->sad, sa);
       clib_memcpy (sa, new_sa, sizeof (*sa));
       sa_index = sa - im->sad;
+      sa->udp_encap = udp_encap ? 1 : 0;
       hash_set (im->sa_index_by_sa_id, sa->id, sa_index);
       if (im->cb.add_del_sa_sess_cb)
 	{
diff --git a/src/vnet/ipsec/ipsec.h b/src/vnet/ipsec/ipsec.h
index bf9fab2..5b54832 100644
--- a/src/vnet/ipsec/ipsec.h
+++ b/src/vnet/ipsec/ipsec.h
@@ -127,6 +127,7 @@
 
   u8 is_tunnel;
   u8 is_tunnel_ip6;
+  u8 udp_encap;
   ip46_address_t tunnel_src_addr;
   ip46_address_t tunnel_dst_addr;
 
@@ -318,7 +319,8 @@
 int ipsec_add_del_spd (vlib_main_t * vm, u32 spd_id, int is_add);
 int ipsec_add_del_policy (vlib_main_t * vm, ipsec_policy_t * policy,
 			  int is_add);
-int ipsec_add_del_sa (vlib_main_t * vm, ipsec_sa_t * new_sa, int is_add);
+int ipsec_add_del_sa (vlib_main_t * vm, ipsec_sa_t * new_sa, int is_add,
+		      u8 udp_encap);
 int ipsec_set_sa_key (vlib_main_t * vm, ipsec_sa_t * sa_update);
 
 u32 ipsec_get_sa_index_by_sa_id (u32 sa_id);
diff --git a/src/vnet/ipsec/ipsec_api.c b/src/vnet/ipsec/ipsec_api.c
index 74f993a..fc7d481 100644
--- a/src/vnet/ipsec/ipsec_api.c
+++ b/src/vnet/ipsec/ipsec_api.c
@@ -241,7 +241,7 @@
       goto out;
     }
 
-  rv = ipsec_add_del_sa (vm, &sa, mp->is_add);
+  rv = ipsec_add_del_sa (vm, &sa, mp->is_add, mp->udp_encap);
 #else
   rv = VNET_API_ERROR_UNIMPLEMENTED;
   goto out;
@@ -457,6 +457,7 @@
   if (sa->use_anti_replay)
     mp->replay_window = clib_host_to_net_u64 (sa->replay_window);
   mp->total_data_size = clib_host_to_net_u64 (sa->total_data_size);
+  mp->udp_encap = sa->udp_encap;
 
   vl_api_send_msg (reg, (u8 *) mp);
 }
diff --git a/src/vnet/ipsec/ipsec_cli.c b/src/vnet/ipsec/ipsec_cli.c
index ef57d52..238ef97 100644
--- a/src/vnet/ipsec/ipsec_cli.c
+++ b/src/vnet/ipsec/ipsec_cli.c
@@ -176,7 +176,7 @@
 	goto done;
     }
 
-  ipsec_add_del_sa (vm, &sa, is_add);
+  ipsec_add_del_sa (vm, &sa, is_add, 0 /* enable nat traversal */ );
 
 done:
   unformat_free (line_input);
@@ -451,9 +451,10 @@
   /* *INDENT-OFF* */
   pool_foreach (sa, im->sad, ({
     if (sa->id) {
-      vlib_cli_output(vm, "sa %u spi %u mode %s protocol %s", sa->id, sa->spi,
+      vlib_cli_output(vm, "sa %u spi %u mode %s protocol %s%s", sa->id, sa->spi,
                       sa->is_tunnel ? "tunnel" : "transport",
-                      sa->protocol ? "esp" : "ah");
+                      sa->protocol ? "esp" : "ah",
+		      sa->udp_encap ? " udp-encap-enabled" : "");
       if (sa->protocol == IPSEC_PROTOCOL_ESP) {
         vlib_cli_output(vm, "  crypto alg %U%s%U integrity alg %U%s%U",
                         format_ipsec_crypto_alg, sa->crypto_alg,
diff --git a/src/vnet/ipsec/ipsec_input.c b/src/vnet/ipsec/ipsec_input.c
index 9aa5654..08269d0 100644
--- a/src/vnet/ipsec/ipsec_input.c
+++ b/src/vnet/ipsec/ipsec_input.c
@@ -216,7 +216,9 @@
 
 	  ip0 = vlib_buffer_get_current (b0);
 
-	  if (PREDICT_TRUE (ip0->protocol == IP_PROTOCOL_IPSEC_ESP))
+	  if (PREDICT_TRUE
+	      (ip0->protocol == IP_PROTOCOL_IPSEC_ESP
+	       || ip0->protocol == IP_PROTOCOL_UDP))
 	    {
 #if 0
 	      clib_warning
@@ -228,6 +230,13 @@
 #endif
 
 	      esp0 = (esp_header_t *) ((u8 *) ip0 + ip4_header_bytes (ip0));
+	      if (PREDICT_FALSE (ip0->protocol == IP_PROTOCOL_UDP))
+		{
+		  esp0 =
+		    (esp_header_t *) ((u8 *) esp0 + sizeof (udp_header_t));
+		}
+	      /* FIXME TODO missing check whether there is enough data inside
+	       * IP/UDP to contain ESP header & stuff ? */
 	      p0 = ipsec_input_protect_policy_match (spd0,
 						     clib_net_to_host_u32
 						     (ip0->src_address.
@@ -245,7 +254,7 @@
 		  vnet_buffer (b0)->ipsec.sad_index = p0->sa_index;
 		  vnet_buffer (b0)->ipsec.flags = 0;
 		  next0 = im->esp_decrypt_next_index;
-		  vlib_buffer_advance (b0, ip4_header_bytes (ip0));
+		  vlib_buffer_advance (b0, ((u8 *) esp0 - (u8 *) ip0));
 		  goto trace0;
 		}
 
@@ -255,7 +264,8 @@
 		{
 		  ipsec_input_trace_t *tr =
 		    vlib_add_trace (vm, node, b0, sizeof (*tr));
-		  if (ip0->protocol == IP_PROTOCOL_IPSEC_ESP)
+		  if (ip0->protocol == IP_PROTOCOL_IPSEC_ESP ||
+		      ip0->protocol == IP_PROTOCOL_UDP)
 		    {
 		      if (p0)
 			tr->sa_id = p0->sa_id;
diff --git a/src/vnet/udp/udp.h b/src/vnet/udp/udp.h
index c60dea0..0382bc2 100644
--- a/src/vnet/udp/udp.h
+++ b/src/vnet/udp/udp.h
@@ -53,6 +53,7 @@
 _ (3785, bfd_echo4)                             \
 _ (4341, lisp_gpe)                              \
 _ (4342, lisp_cp)                          	\
+_ (4500, ipsec)                                 \
 _ (4739, ipfix)                                 \
 _ (4789, vxlan)					\
 _ (4789, vxlan6)				\