udp session: jumbo frames and configurable mtu

Type: improvement

Signed-off-by: Florin Coras <fcoras@cisco.com>
Change-Id: I6b750bef5df0f8544e05177ccd480f87a020832d
diff --git a/src/vnet/session/application_interface.h b/src/vnet/session/application_interface.h
index b410c63..d09432d 100644
--- a/src/vnet/session/application_interface.h
+++ b/src/vnet/session/application_interface.h
@@ -694,8 +694,13 @@
 
   svm_fifo_peek (f, 0, sizeof (ph), (u8 *) & ph);
   ASSERT (ph.data_length >= ph.data_offset);
-  svm_fifo_peek (f, sizeof (ph), sizeof (*at), (u8 *) at);
 
+  /* Check if we have the full dgram */
+  if (max_deq < (ph.data_length + SESSION_CONN_HDR_LEN)
+      && len >= ph.data_length)
+    return 0;
+
+  svm_fifo_peek (f, sizeof (ph), sizeof (*at), (u8 *) at);
   len = clib_min (len, ph.data_length - ph.data_offset);
   rv = svm_fifo_peek (f, ph.data_offset + SESSION_CONN_HDR_LEN, len, buf);
   if (peek)
diff --git a/src/vnet/udp/udp.c b/src/vnet/udp/udp.c
index 48d518a..daab453 100644
--- a/src/vnet/udp/udp.c
+++ b/src/vnet/udp/udp.c
@@ -196,6 +196,7 @@
   listener->c_proto = TRANSPORT_PROTO_UDP;
   listener->c_s_index = session_index;
   listener->c_fib_index = lcl->fib_index;
+  listener->mss = um->default_mtu - sizeof (udp_header_t);
   listener->flags |= UDP_CONN_F_OWNS_PORT | UDP_CONN_F_LISTEN;
   lcl_ext = (transport_endpoint_cfg_t *) lcl;
   if (lcl_ext->transport_flags & TRANSPORT_CFG_F_CONNECTED)
@@ -409,10 +410,14 @@
 udp_session_send_params (transport_connection_t * tconn,
 			 transport_send_params_t * sp)
 {
+  udp_connection_t *uc;
+
+  uc = udp_get_connection_from_transport (tconn);
+
   /* No constraint on TX window */
   sp->snd_space = ~0;
   /* TODO figure out MTU of output interface */
-  sp->snd_mss = 1460;
+  sp->snd_mss = uc->mss;
   sp->tx_offset = 0;
   sp->flags = 0;
   return 0;
@@ -423,6 +428,7 @@
 {
   vlib_main_t *vm = vlib_get_main ();
   u32 thread_index = vm->thread_index;
+  udp_main_t *um = &udp_main;
   ip46_address_t lcl_addr;
   udp_connection_t *uc;
   u16 lcl_port;
@@ -483,6 +489,7 @@
   uc->c_is_ip4 = rmt->is_ip4;
   uc->c_proto = TRANSPORT_PROTO_UDP;
   uc->c_fib_index = rmt->fib_index;
+  uc->mss = rmt->mss ? rmt->mss : (um->default_mtu - sizeof (udp_header_t));
   uc->flags |= UDP_CONN_F_OWNS_PORT;
   if (rmt->transport_flags & TRANSPORT_CFG_F_CONNECTED)
     uc->flags |= UDP_CONN_F_CONNECTED;
@@ -639,6 +646,8 @@
     vlib_node_add_next (vm, udp4_local_node.index, udp4_input_node.index);
   um->local_to_input_edge[UDP_IP6] =
     vlib_node_add_next (vm, udp6_local_node.index, udp6_input_node.index);
+
+  um->default_mtu = 1500;
   return 0;
 }
 
@@ -650,6 +659,24 @@
 };
 /* *INDENT-ON* */
 
+static clib_error_t *
+udp_config_fn (vlib_main_t * vm, unformat_input_t * input)
+{
+  udp_main_t *um = &udp_main;
+  u32 tmp;
+
+  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (input, "mtu %u", &tmp))
+	um->default_mtu = tmp;
+      else
+	return clib_error_return (0, "unknown input `%U'",
+				  format_unformat_error, input);
+    }
+  return 0;
+}
+
+VLIB_CONFIG_FUNCTION (udp_config_fn, "udp");
 
 static clib_error_t *
 show_udp_punt_fn (vlib_main_t * vm, unformat_input_t * input,
diff --git a/src/vnet/udp/udp.h b/src/vnet/udp/udp.h
index 9e1aad6..f7985c9 100644
--- a/src/vnet/udp/udp.h
+++ b/src/vnet/udp/udp.h
@@ -63,6 +63,7 @@
   transport_connection_t connection;	/**< must be first */
   clib_spinlock_t rx_lock;		/**< rx fifo lock */
   u8 flags;				/**< connection flags */
+  u16 mss;				/**< connection mss */
 } udp_connection_t;
 
 #define foreach_udp4_dst_port			\
@@ -171,6 +172,7 @@
   clib_spinlock_t *peekers_write_locks;
   udp_connection_t *listener_pool;
 
+  u16 default_mtu;
 } udp_main_t;
 
 extern udp_main_t udp_main;
diff --git a/src/vnet/udp/udp_input.c b/src/vnet/udp/udp_input.c
index 4b22cbe..1e1c60b 100644
--- a/src/vnet/udp/udp_input.c
+++ b/src/vnet/udp/udp_input.c
@@ -208,6 +208,7 @@
 	      child0->c_rmt_port = udp0->src_port;
 	      child0->c_is_ip4 = is_ip4;
 	      child0->c_fib_index = tc0->fib_index;
+	      child0->mss = uc0->mss;
 	      child0->flags |= UDP_CONN_F_CONNECTED;
 
 	      if (session_stream_accept (&child0->connection,
@@ -238,7 +239,14 @@
 	  error0 = UDP_ERROR_FIFO_FULL;
 	  goto trace0;
 	}
-      hdr0.data_length = b0->current_length = data_len;
+
+      hdr0.data_length = data_len;
+      if (PREDICT_TRUE (!(b0->flags & VLIB_BUFFER_NEXT_PRESENT)))
+	b0->current_length = data_len;
+      else
+	b0->total_length_not_including_first_buffer = data_len
+	  - b0->current_length;
+
       hdr0.data_offset = 0;
       ip_set (&hdr0.lcl_ip, lcl_addr, is_ip4);
       ip_set (&hdr0.rmt_ip, rmt_addr, is_ip4);