udp session: jumbo frames and configurable mtu
Type: improvement
Signed-off-by: Florin Coras <fcoras@cisco.com>
Change-Id: I6b750bef5df0f8544e05177ccd480f87a020832d
diff --git a/src/vnet/session/application_interface.h b/src/vnet/session/application_interface.h
index b410c63..d09432d 100644
--- a/src/vnet/session/application_interface.h
+++ b/src/vnet/session/application_interface.h
@@ -694,8 +694,13 @@
svm_fifo_peek (f, 0, sizeof (ph), (u8 *) & ph);
ASSERT (ph.data_length >= ph.data_offset);
- svm_fifo_peek (f, sizeof (ph), sizeof (*at), (u8 *) at);
+ /* Check if we have the full dgram */
+ if (max_deq < (ph.data_length + SESSION_CONN_HDR_LEN)
+ && len >= ph.data_length)
+ return 0;
+
+ svm_fifo_peek (f, sizeof (ph), sizeof (*at), (u8 *) at);
len = clib_min (len, ph.data_length - ph.data_offset);
rv = svm_fifo_peek (f, ph.data_offset + SESSION_CONN_HDR_LEN, len, buf);
if (peek)
diff --git a/src/vnet/udp/udp.c b/src/vnet/udp/udp.c
index 48d518a..daab453 100644
--- a/src/vnet/udp/udp.c
+++ b/src/vnet/udp/udp.c
@@ -196,6 +196,7 @@
listener->c_proto = TRANSPORT_PROTO_UDP;
listener->c_s_index = session_index;
listener->c_fib_index = lcl->fib_index;
+ listener->mss = um->default_mtu - sizeof (udp_header_t);
listener->flags |= UDP_CONN_F_OWNS_PORT | UDP_CONN_F_LISTEN;
lcl_ext = (transport_endpoint_cfg_t *) lcl;
if (lcl_ext->transport_flags & TRANSPORT_CFG_F_CONNECTED)
@@ -409,10 +410,14 @@
udp_session_send_params (transport_connection_t * tconn,
transport_send_params_t * sp)
{
+ udp_connection_t *uc;
+
+ uc = udp_get_connection_from_transport (tconn);
+
/* No constraint on TX window */
sp->snd_space = ~0;
/* TODO figure out MTU of output interface */
- sp->snd_mss = 1460;
+ sp->snd_mss = uc->mss;
sp->tx_offset = 0;
sp->flags = 0;
return 0;
@@ -423,6 +428,7 @@
{
vlib_main_t *vm = vlib_get_main ();
u32 thread_index = vm->thread_index;
+ udp_main_t *um = &udp_main;
ip46_address_t lcl_addr;
udp_connection_t *uc;
u16 lcl_port;
@@ -483,6 +489,7 @@
uc->c_is_ip4 = rmt->is_ip4;
uc->c_proto = TRANSPORT_PROTO_UDP;
uc->c_fib_index = rmt->fib_index;
+ uc->mss = rmt->mss ? rmt->mss : (um->default_mtu - sizeof (udp_header_t));
uc->flags |= UDP_CONN_F_OWNS_PORT;
if (rmt->transport_flags & TRANSPORT_CFG_F_CONNECTED)
uc->flags |= UDP_CONN_F_CONNECTED;
@@ -639,6 +646,8 @@
vlib_node_add_next (vm, udp4_local_node.index, udp4_input_node.index);
um->local_to_input_edge[UDP_IP6] =
vlib_node_add_next (vm, udp6_local_node.index, udp6_input_node.index);
+
+ um->default_mtu = 1500;
return 0;
}
@@ -650,6 +659,24 @@
};
/* *INDENT-ON* */
+static clib_error_t *
+udp_config_fn (vlib_main_t * vm, unformat_input_t * input)
+{
+ udp_main_t *um = &udp_main;
+ u32 tmp;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "mtu %u", &tmp))
+ um->default_mtu = tmp;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+ return 0;
+}
+
+VLIB_CONFIG_FUNCTION (udp_config_fn, "udp");
static clib_error_t *
show_udp_punt_fn (vlib_main_t * vm, unformat_input_t * input,
diff --git a/src/vnet/udp/udp.h b/src/vnet/udp/udp.h
index 9e1aad6..f7985c9 100644
--- a/src/vnet/udp/udp.h
+++ b/src/vnet/udp/udp.h
@@ -63,6 +63,7 @@
transport_connection_t connection; /**< must be first */
clib_spinlock_t rx_lock; /**< rx fifo lock */
u8 flags; /**< connection flags */
+ u16 mss; /**< connection mss */
} udp_connection_t;
#define foreach_udp4_dst_port \
@@ -171,6 +172,7 @@
clib_spinlock_t *peekers_write_locks;
udp_connection_t *listener_pool;
+ u16 default_mtu;
} udp_main_t;
extern udp_main_t udp_main;
diff --git a/src/vnet/udp/udp_input.c b/src/vnet/udp/udp_input.c
index 4b22cbe..1e1c60b 100644
--- a/src/vnet/udp/udp_input.c
+++ b/src/vnet/udp/udp_input.c
@@ -208,6 +208,7 @@
child0->c_rmt_port = udp0->src_port;
child0->c_is_ip4 = is_ip4;
child0->c_fib_index = tc0->fib_index;
+ child0->mss = uc0->mss;
child0->flags |= UDP_CONN_F_CONNECTED;
if (session_stream_accept (&child0->connection,
@@ -238,7 +239,14 @@
error0 = UDP_ERROR_FIFO_FULL;
goto trace0;
}
- hdr0.data_length = b0->current_length = data_len;
+
+ hdr0.data_length = data_len;
+ if (PREDICT_TRUE (!(b0->flags & VLIB_BUFFER_NEXT_PRESENT)))
+ b0->current_length = data_len;
+ else
+ b0->total_length_not_including_first_buffer = data_len
+ - b0->current_length;
+
hdr0.data_offset = 0;
ip_set (&hdr0.lcl_ip, lcl_addr, is_ip4);
ip_set (&hdr0.rmt_ip, rmt_addr, is_ip4);