Cleanup URI code and TCP bugfixing
- Add CLI/API to enable session layer, by default it's disabled
- Improve rcv wnd computation
- Improvements to tx path
- URI code cleanup
- Builtin test tcp server
- Improve src port allocation
Change-Id: I2ace498e76a0771d4c31a8075cc14fe33d7dfa38
Signed-off-by: Florin Coras <fcoras@cisco.com>
diff --git a/src/vnet/api_errno.h b/src/vnet/api_errno.h
index 5e65ac7..74d39bd 100644
--- a/src/vnet/api_errno.h
+++ b/src/vnet/api_errno.h
@@ -103,7 +103,8 @@
_(BFD_EAGAIN, -111, "BFD object cannot be manipulated at this time") \
_(INVALID_GPE_MODE, -112, "Invalid GPE mode") \
_(LISP_GPE_ENTRIES_PRESENT, -113, "LISP GPE entries are present") \
-_(ADDRESS_FOUND_FOR_INTERFACE, -114, "Address found for interface")
+_(ADDRESS_FOUND_FOR_INTERFACE, -114, "Address found for interface") \
+_(SESSION_CONNECT_FAIL, -115, "Session failed to connect")
typedef enum
{
diff --git a/src/vnet/session/application.c b/src/vnet/session/application.c
index a561e7d..a542eeb 100644
--- a/src/vnet/session/application.c
+++ b/src/vnet/session/application.c
@@ -154,6 +154,15 @@
return pool_elt_at_index (app_pool, index);
}
+application_t *
+application_get_if_valid (u32 index)
+{
+ if (pool_is_free_index (app_pool, index))
+ return 0;
+
+ return pool_elt_at_index (app_pool, index);
+}
+
u32
application_get_index (application_t * app)
{
@@ -209,7 +218,7 @@
regp = vl_api_client_index_to_registration (srv->api_client_index);
if (!regp)
- server_name = format (0, "%s%c", regp->name, 0);
+ server_name = format (0, "builtin-%d%c", srv->index, 0);
else
server_name = regp->name;
@@ -269,11 +278,17 @@
show_app_command_fn (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_command_t * cmd)
{
+ session_manager_main_t *smm = &session_manager_main;
application_t *app;
int do_server = 0;
int do_client = 0;
int verbose = 0;
+ if (!smm->is_enabled)
+ {
+ clib_error_return (0, "session layer is not enabled");
+ }
+
while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
if (unformat (input, "server"))
@@ -323,16 +338,20 @@
/* *INDENT-ON* */
}
else
- vlib_cli_output (vm, "No active server bindings");
+ vlib_cli_output (vm, "No active client bindings");
}
return 0;
}
+/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_app_command, static) =
{
-.path = "show app",.short_help =
- "show app [server|client] [verbose]",.function = show_app_command_fn,};
+ .path = "show app",
+ .short_help = "show app [server|client] [verbose]",
+ .function = show_app_command_fn,
+};
+/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/session/application.h b/src/vnet/session/application.h
index 027d696..480828f 100644
--- a/src/vnet/session/application.h
+++ b/src/vnet/session/application.h
@@ -100,6 +100,7 @@
session_cb_vft_t * cb_fns);
void application_del (application_t * app);
application_t *application_get (u32 index);
+application_t *application_get_if_valid (u32 index);
application_t *application_lookup (u32 api_client_index);
u32 application_get_index (application_t * app);
diff --git a/src/vnet/session/application_interface.c b/src/vnet/session/application_interface.c
index 0ea77fd..6ddfb70 100644
--- a/src/vnet/session/application_interface.c
+++ b/src/vnet/session/application_interface.c
@@ -51,7 +51,7 @@
prefix.fp_proto = FIB_PROTOCOL_IP6;
}
- clib_memcpy (&prefix.fp_addr, ip46_address, sizeof (ip46_address));
+ clib_memcpy (&prefix.fp_addr, ip46_address, sizeof (ip46_address_t));
fei = fib_table_lookup (0, &prefix);
flags = fib_entry_get_flags (fei);
@@ -186,9 +186,7 @@
/*
* Not connecting to a local server. Create regular session
*/
- stream_session_open (sst, ip46, port, app->index);
-
- return 0;
+ return stream_session_open (sst, ip46, port, app->index);
}
/**
diff --git a/src/vnet/session/node.c b/src/vnet/session/node.c
index e467f4e..399077d 100644
--- a/src/vnet/session/node.c
+++ b/src/vnet/session/node.c
@@ -104,9 +104,13 @@
snd_space0 = transport_vft->send_space (tc0);
snd_mss0 = transport_vft->send_mss (tc0);
+ /* Can't make any progress */
if (snd_space0 == 0 || svm_fifo_max_dequeue (s0->server_tx_fifo) == 0
|| snd_mss0 == 0)
- return 0;
+ {
+ vec_add1 (smm->evts_partially_read[thread_index], *e0);
+ return 0;
+ }
ASSERT (e0->enqueue_length > 0);
@@ -143,7 +147,12 @@
if (PREDICT_FALSE (n_bufs < 0.9 * VLIB_FRAME_SIZE))
{
/* Keep track of how much we've dequeued and exit */
- e0->enqueue_length -= max_len_to_snd0 - left_to_snd0;
+ if (left_to_snd0 != max_len_to_snd0)
+ {
+ e0->enqueue_length -= max_len_to_snd0 - left_to_snd0;
+ vec_add1 (smm->evts_partially_read[thread_index], *e0);
+ }
+
return -1;
}
@@ -185,12 +194,13 @@
t0->server_thread_index = s0->thread_index;
}
+ /* *INDENT-OFF* */
if (1)
{
- ELOG_TYPE_DECLARE (e) =
- {
- .format = "evt-dequeue: id %d length %d",.format_args =
- "i4i4",};
+ ELOG_TYPE_DECLARE (e) = {
+ .format = "evt-dequeue: id %d length %d",
+ .format_args = "i4i4",
+ };
struct
{
u32 data[2];
@@ -199,6 +209,7 @@
ed->data[0] = e0->event_id;
ed->data[1] = e0->enqueue_length;
}
+ /* *INDENT-ON* */
len_to_deq0 = (left_to_snd0 < snd_mss0) ? left_to_snd0 : snd_mss0;
@@ -289,7 +300,7 @@
{
session_manager_main_t *smm = vnet_get_session_manager_main ();
session_fifo_event_t *my_fifo_events, *e;
- u32 n_to_dequeue;
+ u32 n_to_dequeue, n_events;
unix_shared_memory_queue_t *q;
int n_tx_packets = 0;
u32 my_thread_index = vm->cpu_index;
@@ -309,14 +320,16 @@
/* min number of events we can dequeue without blocking */
n_to_dequeue = q->cursize;
- if (n_to_dequeue == 0)
- return 0;
-
my_fifo_events = smm->fifo_events[my_thread_index];
- /* If we didn't manage to process previous events try going
+ if (n_to_dequeue == 0 && vec_len (my_fifo_events) == 0)
+ return 0;
+
+ /*
+ * If we didn't manage to process previous events try going
* over them again without dequeuing new ones.
- * XXX: Block senders to sessions that can't keep up */
+ */
+ /* XXX: Block senders to sessions that can't keep up */
if (vec_len (my_fifo_events) >= 100)
goto skip_dequeue;
@@ -338,8 +351,8 @@
smm->fifo_events[my_thread_index] = my_fifo_events;
skip_dequeue:
-
- for (i = 0; i < n_to_dequeue; i++)
+ n_events = vec_len (my_fifo_events);
+ for (i = 0; i < n_events; i++)
{
svm_fifo_t *f0; /* $$$ prefetch 1 ahead maybe */
stream_session_t *s0;
@@ -354,8 +367,13 @@
/* $$$ add multiple event queues, per vpp worker thread */
ASSERT (server_thread_index0 == my_thread_index);
- s0 = pool_elt_at_index (smm->sessions[my_thread_index],
- server_session_index0);
+ s0 = stream_session_get_if_valid (server_session_index0,
+ my_thread_index);
+ if (!s0)
+ {
+ clib_warning ("It's dead Jim!");
+ continue;
+ }
ASSERT (s0->thread_index == my_thread_index);
@@ -380,11 +398,11 @@
done:
/* Couldn't process all events. Probably out of buffers */
- if (PREDICT_FALSE (i < n_to_dequeue))
+ if (PREDICT_FALSE (i < n_events))
{
session_fifo_event_t *partially_read =
smm->evts_partially_read[my_thread_index];
- vec_add (partially_read, &my_fifo_events[i], n_to_dequeue - i);
+ vec_add (partially_read, &my_fifo_events[i], n_events - i);
vec_free (my_fifo_events);
smm->fifo_events[my_thread_index] = partially_read;
smm->evts_partially_read[my_thread_index] = 0;
@@ -413,8 +431,7 @@
.n_errors = ARRAY_LEN (session_queue_error_strings),
.error_strings = session_queue_error_strings,
.n_next_nodes = SESSION_QUEUE_N_NEXT,
- /* .state = VLIB_NODE_STATE_DISABLED, enable on-demand? */
- /* edit / add dispositions here */
+ .state = VLIB_NODE_STATE_DISABLED,
.next_nodes =
{
[SESSION_QUEUE_NEXT_DROP] = "error-drop",
diff --git a/src/vnet/session/session.api b/src/vnet/session/session.api
index a7b28c1..582765b 100644
--- a/src/vnet/session/session.api
+++ b/src/vnet/session/session.api
@@ -422,6 +422,28 @@
i32 retval;
u64 handle;
};
+
+/** \brief enable/disable session layer
+ @param client_index - opaque cookie to identify the sender
+ client to vpp direction only
+ @param context - sender context, to match reply w/ request
+ @param is_enable - disable session layer if 0, enable otherwise
+*/
+define session_enable_disable {
+ u32 client_index;
+ u32 context;
+ u8 is_enable;
+};
+
+/** \brief Reply for session enable/disable
+ @param context - returned sender context, to match reply w/ request
+ @param retval - return code
+*/
+define session_enable_disable_reply {
+ u32 context;
+ i32 retval;
+};
+
/*
* Local Variables:
* eval: (c-set-style "gnu")
diff --git a/src/vnet/session/session.c b/src/vnet/session/session.c
index 539da61..422527e 100644
--- a/src/vnet/session/session.c
+++ b/src/vnet/session/session.c
@@ -311,11 +311,11 @@
}
transport_connection_t *
-stream_session_lookup_transport4 (session_manager_main_t * smm,
- ip4_address_t * lcl, ip4_address_t * rmt,
+stream_session_lookup_transport4 (ip4_address_t * lcl, ip4_address_t * rmt,
u16 lcl_port, u16 rmt_port, u8 proto,
u32 my_thread_index)
{
+ session_manager_main_t *smm = &session_manager_main;
session_kv4_t kv4;
stream_session_t *s;
int rv;
@@ -345,11 +345,11 @@
}
transport_connection_t *
-stream_session_lookup_transport6 (session_manager_main_t * smm,
- ip6_address_t * lcl, ip6_address_t * rmt,
+stream_session_lookup_transport6 (ip6_address_t * lcl, ip6_address_t * rmt,
u16 lcl_port, u16 rmt_port, u8 proto,
u32 my_thread_index)
{
+ session_manager_main_t *smm = &session_manager_main;
stream_session_t *s;
session_kv6_t kv6;
int rv;
@@ -554,7 +554,7 @@
u8 * added_a_segment)
{
svm_fifo_segment_private_t *fifo_segment;
- u32 fifo_size, default_fifo_size = 8192 /* TODO config */ ;
+ u32 fifo_size, default_fifo_size = 128 << 10; /* TODO config */
int i;
*added_a_segment = 0;
@@ -948,7 +948,7 @@
connects_session_manager_init (session_manager_main_t * smm, u8 session_type)
{
session_manager_t *sm;
- u32 connect_fifo_size = 8 << 10; /* Config? */
+ u32 connect_fifo_size = 256 << 10; /* Config? */
u32 default_segment_size = 1 << 20;
pool_get (smm->session_managers, sm);
@@ -1055,10 +1055,15 @@
svm_fifo_segment_free_fifo (fifo_segment, s->server_rx_fifo);
svm_fifo_segment_free_fifo (fifo_segment, s->server_tx_fifo);
- /* Cleanup app if client */
- app = application_get (s->app_index);
+ app = application_get_if_valid (s->app_index);
+
+ /* No app. A possibility: after disconnect application called unbind */
+ if (!app)
+ return;
+
if (app->mode == APP_CLIENT)
{
+ /* Cleanup app if client */
application_del (app);
}
else if (app->mode == APP_SERVER)
@@ -1068,6 +1073,7 @@
svm_fifo_t **fifos;
u32 fifo_index;
+ /* For server, see if any segments can be removed */
sm = session_manager_get (app->session_manager_index);
/* Delete fifo */
@@ -1096,10 +1102,10 @@
{
stream_session_t *s;
+ /* App might've been removed already */
s = stream_session_get_if_valid (tc->s_index, tc->thread_index);
if (!s)
{
- clib_warning ("Surprised!");
return;
}
stream_session_delete (s);
@@ -1151,16 +1157,24 @@
return 0;
}
-void
+int
stream_session_open (u8 sst, ip46_address_t * addr, u16 port_host_byte_order,
u32 app_index)
{
transport_connection_t *tc;
u32 tci;
u64 value;
+ int rv;
/* Ask transport to open connection */
- tci = tp_vfts[sst].open (addr, port_host_byte_order);
+ rv = tp_vfts[sst].open (addr, port_host_byte_order);
+ if (rv < 0)
+ {
+ clib_warning ("Transport failed to open connection.");
+ return VNET_API_ERROR_SESSION_CONNECT_FAIL;
+ }
+
+ tci = rv;
/* Get transport connection */
tc = tp_vfts[sst].get_half_open (tci);
@@ -1170,6 +1184,8 @@
/* Add to the half-open lookup table */
stream_session_half_open_table_add (sst, tc, value);
+
+ return 0;
}
/**
@@ -1216,16 +1232,13 @@
}
static clib_error_t *
-session_manager_main_init (vlib_main_t * vm)
+session_manager_main_enable (vlib_main_t * vm)
{
- u32 num_threads;
- vlib_thread_main_t *vtm = vlib_get_thread_main ();
session_manager_main_t *smm = &session_manager_main;
+ vlib_thread_main_t *vtm = vlib_get_thread_main ();
+ u32 num_threads;
int i;
- smm->vlib_main = vm;
- smm->vnet_main = vnet_get_main ();
-
num_threads = 1 /* main thread */ + vtm->n_threads;
if (num_threads < 1)
@@ -1272,11 +1285,48 @@
for (i = 0; i < SESSION_N_TYPES; i++)
smm->connect_manager_index[i] = INVALID_INDEX;
+ smm->is_enabled = 1;
+
return 0;
}
-VLIB_INIT_FUNCTION (session_manager_main_init);
+clib_error_t *
+vnet_session_enable_disable (vlib_main_t * vm, u8 is_en)
+{
+ if (is_en)
+ {
+ if (session_manager_main.is_enabled)
+ return 0;
+ vlib_node_set_state (vm, session_queue_node.index,
+ VLIB_NODE_STATE_POLLING);
+
+ return session_manager_main_enable (vm);
+ }
+ else
+ {
+ session_manager_main.is_enabled = 0;
+ vlib_node_set_state (vm, session_queue_node.index,
+ VLIB_NODE_STATE_DISABLED);
+ }
+
+ return 0;
+}
+
+
+clib_error_t *
+session_manager_main_init (vlib_main_t * vm)
+{
+ session_manager_main_t *smm = &session_manager_main;
+
+ smm->vlib_main = vm;
+ smm->vnet_main = vnet_get_main ();
+ smm->is_enabled = 0;
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (session_manager_main_init)
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/vnet/session/session.h b/src/vnet/session/session.h
index cf14cca..46e5ce2 100644
--- a/src/vnet/session/session.h
+++ b/src/vnet/session/session.h
@@ -213,12 +213,15 @@
/** Per transport rx function that can either dequeue or peek */
session_fifo_rx_fn *session_rx_fns[SESSION_N_TYPES];
+ u8 is_enabled;
+
/* Convenience */
vlib_main_t *vlib_main;
vnet_main_t *vnet_main;
};
extern session_manager_main_t session_manager_main;
+extern vlib_node_registration_t session_queue_node;
/*
* Session manager function
@@ -276,14 +279,12 @@
ip6_address_t * rmt, u16 lcl_port,
u16 rmt_port, u8, u32 thread_index);
transport_connection_t
- * stream_session_lookup_transport4 (session_manager_main_t * smm,
- ip4_address_t * lcl,
+ * stream_session_lookup_transport4 (ip4_address_t * lcl,
ip4_address_t * rmt, u16 lcl_port,
u16 rmt_port, u8 proto,
u32 thread_index);
transport_connection_t
- * stream_session_lookup_transport6 (session_manager_main_t * smm,
- ip6_address_t * lcl,
+ * stream_session_lookup_transport6 (ip6_address_t * lcl,
ip6_address_t * rmt, u16 lcl_port,
u16 rmt_port, u8 proto,
u32 thread_index);
@@ -338,6 +339,14 @@
return svm_fifo_max_enqueue (s->server_rx_fifo);
}
+always_inline u32
+stream_session_fifo_size (transport_connection_t * tc)
+{
+ stream_session_t *s = stream_session_get (tc->s_index, tc->thread_index);
+ return s->server_rx_fifo->nitems;
+}
+
+
int
stream_session_enqueue_data (transport_connection_t * tc, u8 * data, u16 len,
u8 queue_event);
@@ -356,8 +365,8 @@
int
stream_session_accept (transport_connection_t * tc, u32 listener_index,
u8 sst, u8 notify);
-void stream_session_open (u8 sst, ip46_address_t * addr,
- u16 port_host_byte_order, u32 api_client_index);
+int stream_session_open (u8 sst, ip46_address_t * addr,
+ u16 port_host_byte_order, u32 api_client_index);
void stream_session_disconnect (stream_session_t * s);
void stream_session_cleanup (stream_session_t * s);
int
@@ -369,6 +378,8 @@
void session_register_transport (u8 type, const transport_proto_vft_t * vft);
transport_proto_vft_t *session_get_transport_vft (u8 type);
+clib_error_t *vnet_session_enable_disable (vlib_main_t * vm, u8 is_en);
+
#endif /* __included_session_h__ */
/*
diff --git a/src/vnet/session/session_api.c b/src/vnet/session/session_api.c
index 9d06868..8852fc6 100644
--- a/src/vnet/session/session_api.c
+++ b/src/vnet/session/session_api.c
@@ -52,6 +52,8 @@
_(DISCONNECT_SOCK_REPLY, disconnect_sock_reply) \
_(ACCEPT_SOCK_REPLY, accept_sock_reply) \
_(RESET_SOCK_REPLY, reset_sock_reply) \
+_(SESSION_ENABLE_DISABLE, session_enable_disable) \
+
static int
send_add_segment_callback (u32 api_client_index, const u8 * segment_name,
@@ -146,7 +148,6 @@
mp = vl_msg_api_alloc (sizeof (*mp));
mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_CONNECT_URI_REPLY);
mp->context = app->api_context;
- mp->retval = is_fail;
if (!is_fail)
{
vpp_queue = session_manager_get_vpp_event_queue (s->thread_index);
@@ -157,6 +158,7 @@
mp->session_type = s->session_type;
mp->vpp_event_queue_address = (u64) vpp_queue;
mp->client_event_queue_address = (u64) app->event_queue;
+ mp->retval = 0;
session_manager_get_segment_info (s->server_segment_index, &seg_name,
&mp->segment_size);
@@ -164,12 +166,22 @@
if (mp->segment_name_length)
clib_memcpy (mp->segment_name, seg_name, mp->segment_name_length);
}
+ else
+ {
+ mp->retval = VNET_API_ERROR_SESSION_CONNECT_FAIL;
+ }
vl_msg_api_send_shmem (q, (u8 *) & mp);
/* Remove client if connect failed */
if (is_fail)
- application_del (app);
+ {
+ application_del (app);
+ }
+ else
+ {
+ s->session_state = SESSION_STATE_READY;
+ }
return 0;
}
@@ -432,6 +444,17 @@
}
static void
+vl_api_session_enable_disable_t_handler (vl_api_session_enable_disable_t * mp)
+{
+ vl_api_session_enable_disable_reply_t *rmp;
+ vlib_main_t *vm = vlib_get_main ();
+ int rv = 0;
+
+ vnet_session_enable_disable (vm, mp->is_enable);
+ REPLY_MACRO (VL_API_SESSION_ENABLE_DISABLE_REPLY);
+}
+
+static void
vl_api_bind_uri_t_handler (vl_api_bind_uri_t * mp)
{
vl_api_bind_uri_reply_t *rmp;
@@ -476,7 +499,6 @@
}
}));
/* *INDENT-ON* */
-
}
static void
@@ -493,7 +515,9 @@
static void
vl_api_connect_uri_t_handler (vl_api_connect_uri_t * mp)
{
+ vl_api_connect_uri_reply_t *rmp;
vnet_connect_args_t _a, *a = &_a;
+ int rv;
a->uri = (char *) mp->uri;
a->api_client_index = mp->client_index;
@@ -501,7 +525,19 @@
a->options = mp->options;
a->session_cb_vft = &uri_session_cb_vft;
a->mp = mp;
- vnet_connect_uri (a);
+
+ rv = vnet_connect_uri (a);
+
+ if (rv == 0 || rv == VNET_CONNECT_REDIRECTED)
+ return;
+
+ /* Got some error, relay it */
+
+ /* *INDENT-OFF* */
+ REPLY_MACRO2 (VL_API_CONNECT_URI_REPLY, ({
+ rmp->retval = rv;
+ }));
+ /* *INDENT-ON* */
}
static void
@@ -662,7 +698,9 @@
static void
vl_api_connect_sock_t_handler (vl_api_connect_sock_t * mp)
{
+ vl_api_connect_sock_reply_t *rmp;
vnet_connect_args_t _a, *a = &_a;
+ int rv;
clib_memcpy (&a->tep.ip, mp->ip,
(mp->is_ip4 ? sizeof (ip4_address_t) :
@@ -675,7 +713,18 @@
a->api_context = mp->context;
a->mp = mp;
- vnet_connect (a);
+ rv = vnet_connect (a);
+
+ if (rv == 0 || rv == VNET_CONNECT_REDIRECTED)
+ return;
+
+ /* Got some error, relay it */
+
+ /* *INDENT-OFF* */
+ REPLY_MACRO2 (VL_API_CONNECT_URI_REPLY, ({
+ rmp->retval = rv;
+ }));
+ /* *INDENT-ON* */
}
static void
diff --git a/src/vnet/session/session_cli.c b/src/vnet/session/session_cli.c
index b2943a1..b029ee6 100644
--- a/src/vnet/session/session_cli.c
+++ b/src/vnet/session/session_cli.c
@@ -60,7 +60,7 @@
}
else
{
- clib_warning ("Session in unknown state!");
+ clib_warning ("Session in state: %d!", ss->session_state);
}
vec_free (str);
@@ -78,6 +78,11 @@
stream_session_t *s;
u8 *str = 0;
+ if (!smm->is_enabled)
+ {
+ clib_error_return (0, "session layer is not enabled");
+ }
+
while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
if (unformat (input, "verbose"))
@@ -126,11 +131,14 @@
return 0;
}
-VLIB_CLI_COMMAND (show_uri_command, static) =
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_session_command, static) =
{
-.path = "show session",.short_help = "show session [verbose]",.function =
- show_session_command_fn,};
-
+ .path = "show session",
+ .short_help = "show session [verbose]",
+ .function = show_session_command_fn,
+};
+/* *INDENT-ON* */
static clib_error_t *
clear_session_command_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -142,6 +150,11 @@
stream_session_t *pool, *session;
application_t *server;
+ if (!smm->is_enabled)
+ {
+ clib_error_return (0, "session layer is not enabled");
+ }
+
while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
if (unformat (input, "thread %d", &thread_index))
@@ -174,11 +187,43 @@
return 0;
}
-VLIB_CLI_COMMAND (clear_uri_session_command, static) =
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (clear_session_command, static) =
{
-.path = "clear session",.short_help =
- "clear session thread <thread> session <index>",.function =
- clear_session_command_fn,};
+ .path = "clear session",
+ .short_help = "clear session thread <thread> session <index>",
+ .function = clear_session_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+session_enable_disable_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ u8 is_en = 1;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "enable"))
+ is_en = 1;
+ else if (unformat (input, "disable"))
+ is_en = 0;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ return vnet_session_enable_disable (vm, is_en);
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (session_enable_disable_command, static) =
+{
+ .path = "session",
+ .short_help = "session [enable|disable]",
+ .function = session_enable_disable_fn,
+};
+/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/tcp/builtin_server.c b/src/vnet/tcp/builtin_server.c
new file mode 100644
index 0000000..be65642
--- /dev/null
+++ b/src/vnet/tcp/builtin_server.c
@@ -0,0 +1,135 @@
+/*
+* Copyright (c) 2015-2017 Cisco and/or its affiliates.
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at:
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+#include <vnet/vnet.h>
+#include <vlibmemory/api.h>
+#include <vnet/session/application.h>
+#include <vnet/session/application_interface.h>
+
+int
+builtin_session_accept_callback (stream_session_t * s)
+{
+ clib_warning ("called...");
+ s->session_state = SESSION_STATE_READY;
+ return 0;
+}
+
+void
+builtin_session_disconnect_callback (stream_session_t * s)
+{
+ clib_warning ("called...");
+}
+
+int
+builtin_session_connected_callback (u32 client_index,
+ stream_session_t * s, u8 is_fail)
+{
+ clib_warning ("called...");
+ return -1;
+}
+
+int
+builtin_add_segment_callback (u32 client_index,
+ const u8 * seg_name, u32 seg_size)
+{
+ clib_warning ("called...");
+ return -1;
+}
+
+int
+builtin_redirect_connect_callback (u32 client_index, void *mp)
+{
+ clib_warning ("called...");
+ return -1;
+}
+
+int
+builtin_server_rx_callback (stream_session_t * s)
+{
+ clib_warning ("called...");
+ return 0;
+}
+
+static session_cb_vft_t builtin_session_cb_vft = {
+ .session_accept_callback = builtin_session_accept_callback,
+ .session_disconnect_callback = builtin_session_disconnect_callback,
+ .session_connected_callback = builtin_session_connected_callback,
+ .add_segment_callback = builtin_add_segment_callback,
+ .redirect_connect_callback = builtin_redirect_connect_callback,
+ .builtin_server_rx_callback = builtin_server_rx_callback
+};
+
+static int
+server_create (vlib_main_t * vm)
+{
+ vnet_bind_args_t _a, *a = &_a;
+ u64 options[SESSION_OPTIONS_N_OPTIONS];
+ char segment_name[128];
+
+ memset (a, 0, sizeof (*a));
+ memset (options, 0, sizeof (options));
+
+ a->uri = "tcp://0.0.0.0/80";
+ a->api_client_index = ~0;
+ a->session_cb_vft = &builtin_session_cb_vft;
+ a->options = options;
+ a->options[SESSION_OPTIONS_SEGMENT_SIZE] = 256 << 10;
+ a->options[SESSION_OPTIONS_RX_FIFO_SIZE] = 64 << 10;
+ a->options[SESSION_OPTIONS_TX_FIFO_SIZE] = 64 << 10;
+ a->segment_name = segment_name;
+ a->segment_name_length = ARRAY_LEN (segment_name);
+
+ return vnet_bind_uri (a);
+}
+
+static clib_error_t *
+server_create_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ int rv;
+#if 0
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "whatever %d", &whatever))
+ ;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+#endif
+
+ rv = server_create (vm);
+ switch (rv)
+ {
+ case 0:
+ break;
+ default:
+ return clib_error_return (0, "server_create returned %d", rv);
+ }
+ return 0;
+}
+
+VLIB_CLI_COMMAND (server_create_command, static) =
+{
+.path = "test server",.short_help = "test server",.function =
+ server_create_command_fn,};
+
+/*
+* fd.io coding-style-patch-verification: ON
+*
+* Local Variables:
+* eval: (c-set-style "gnu")
+* End:
+*/
diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c
index 0f9b709..e5feaeb 100644
--- a/src/vnet/tcp/tcp.c
+++ b/src/vnet/tcp/tcp.c
@@ -217,6 +217,7 @@
return 0;
}
+#define PORT_MASK ((1 << 16)- 1)
/**
* Allocate local port and add if successful add entry to local endpoint
* table to mark the pair as used.
@@ -224,7 +225,6 @@
u16
tcp_allocate_local_port (tcp_main_t * tm, ip46_address_t * ip)
{
- u8 unique = 0;
transport_endpoint_t *tep;
u32 time_now, tei;
u16 min = 1024, max = 65535, tries; /* XXX configurable ? */
@@ -235,37 +235,34 @@
/* Start at random point or max */
pool_get (tm->local_endpoints, tep);
clib_memcpy (&tep->ip, ip, sizeof (*ip));
- tep->port = random_u32 (&time_now) << 16;
- tep->port = tep->port < min ? max : tep->port;
/* Search for first free slot */
- while (tries)
+ for (; tries >= 0; tries--)
{
- tei = transport_endpoint_lookup (&tm->local_endpoints_table, &tep->ip,
- tep->port);
- if (tei == TRANSPORT_ENDPOINT_INVALID_INDEX)
+ u16 port = 0;
+
+ /* Find a port in the specified range */
+ while (1)
{
- unique = 1;
- break;
+ port = random_u32 (&time_now) & PORT_MASK;
+ if (PREDICT_TRUE (port >= min && port < max))
+ break;
}
- tep->port--;
+ tep->port = port;
- if (tep->port < min)
- tep->port = max;
-
- tries--;
+ /* Look it up */
+ tei = transport_endpoint_lookup (&tm->local_endpoints_table, &tep->ip,
+ tep->port);
+ /* If not found, we're done */
+ if (tei == TRANSPORT_ENDPOINT_INVALID_INDEX)
+ {
+ transport_endpoint_table_add (&tm->local_endpoints_table, tep,
+ tep - tm->local_endpoints);
+ return tep->port;
+ }
}
-
- if (unique)
- {
- transport_endpoint_table_add (&tm->local_endpoints_table, tep,
- tep - tm->local_endpoints);
-
- return tep->port;
- }
-
- /* Failed */
+ /* No free ports */
pool_put (tm->local_endpoints, tep);
return -1;
}
@@ -360,7 +357,10 @@
/* Allocate source port */
lcl_port = tcp_allocate_local_port (tm, &lcl_addr);
if (lcl_port < 1)
- return -1;
+ {
+ clib_warning ("Failed to allocate src port");
+ return -1;
+ }
/*
* Create connection and send SYN
diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h
index 22f00a6..3560509 100644
--- a/src/vnet/tcp/tcp.h
+++ b/src/vnet/tcp/tcp.h
@@ -30,7 +30,8 @@
#define TCP_MAX_OPTION_SPACE 40
#define TCP_DUPACK_THRESHOLD 3
-#define TCP_DEFAULT_RX_FIFO_SIZE 64 << 10
+#define TCP_MAX_RX_FIFO_SIZE 2 << 20
+#define TCP_IW_N_SEGMENTS 10
/** TCP FSM state definitions as per RFC793. */
#define foreach_tcp_fsm_state \
@@ -590,7 +591,6 @@
/**
* Push TCP header to buffer
*
- * @param vm - vlib_main
* @param b - buffer to write the header to
* @param sp_net - source port net order
* @param dp_net - destination port net order
diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c
index daa0683..0a907d0 100644
--- a/src/vnet/tcp/tcp_input.c
+++ b/src/vnet/tcp/tcp_input.c
@@ -711,7 +711,7 @@
if (tcp_opts_sack_permitted (&tc->opt))
tcp_rcv_sacks (tc, vnet_buffer (b)->tcp.ack_number);
- new_snd_wnd = clib_net_to_host_u32 (th->window) << tc->snd_wscale;
+ new_snd_wnd = clib_net_to_host_u16 (th->window) << tc->snd_wscale;
if (tcp_ack_is_dupack (tc, b, new_snd_wnd))
{
@@ -1320,7 +1320,6 @@
/* Parse options */
tcp_options_parse (tcp0, &new_tc0->opt);
- tcp_connection_init_vars (new_tc0);
if (tcp_opts_tstamp (&new_tc0->opt))
{
@@ -1331,11 +1330,13 @@
if (tcp_opts_wscale (&new_tc0->opt))
new_tc0->snd_wscale = new_tc0->opt.wscale;
- new_tc0->snd_wnd = clib_net_to_host_u32 (tcp0->window)
- << new_tc0->snd_wscale;
+ /* No scaling */
+ new_tc0->snd_wnd = clib_net_to_host_u16 (tcp0->window);
new_tc0->snd_wl1 = seq0;
new_tc0->snd_wl2 = ack0;
+ tcp_connection_init_vars (new_tc0);
+
/* SYN-ACK: See if we can switch to ESTABLISHED state */
if (tcp_ack (tcp0))
{
@@ -1345,6 +1346,9 @@
new_tc0->snd_una = ack0;
new_tc0->state = TCP_STATE_ESTABLISHED;
+ /* Make sure las is initialized for the wnd computation */
+ new_tc0->rcv_las = new_tc0->rcv_nxt;
+
/* Notify app that we have connection */
stream_session_connect_notify (&new_tc0->connection, sst, 0);
@@ -1575,7 +1579,7 @@
/* Initialize session variables */
tc0->snd_una = vnet_buffer (b0)->tcp.ack_number;
- tc0->snd_wnd = clib_net_to_host_u32 (tcp0->window)
+ tc0->snd_wnd = clib_net_to_host_u16 (tcp0->window)
<< tc0->opt.wscale;
tc0->snd_wl1 = vnet_buffer (b0)->tcp.seq_number;
tc0->snd_wl2 = vnet_buffer (b0)->tcp.ack_number;
@@ -1899,7 +1903,6 @@
}
tcp_options_parse (th0, &child0->opt);
- tcp_connection_init_vars (child0);
child0->irs = vnet_buffer (b0)->tcp.seq_number;
child0->rcv_nxt = vnet_buffer (b0)->tcp.seq_number + 1;
@@ -1913,6 +1916,16 @@
child0->tsval_recent_age = tcp_time_now ();
}
+ if (tcp_opts_wscale (&child0->opt))
+ child0->snd_wscale = child0->opt.wscale;
+
+ /* No scaling */
+ child0->snd_wnd = clib_net_to_host_u16 (th0->window);
+ child0->snd_wl1 = vnet_buffer (b0)->tcp.seq_number;
+ child0->snd_wl2 = vnet_buffer (b0)->tcp.ack_number;
+
+ tcp_connection_init_vars (child0);
+
/* Reuse buffer to make syn-ack and send */
tcp_make_synack (child0, b0);
next0 = tcp_next_output (is_ip4);
@@ -1923,7 +1936,7 @@
}
- b0->error = error0 ? node->errors[error0] : 0;
+ b0->error = node->errors[error0];
vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
n_left_to_next, bi0, next0);
@@ -2069,7 +2082,6 @@
u32 n_left_from, next_index, *from, *to_next;
u32 my_thread_index = vm->cpu_index;
tcp_main_t *tm = vnet_get_tcp_main ();
- session_manager_main_t *ssm = vnet_get_session_manager_main ();
from = vlib_frame_vector_args (from_frame);
n_left_from = from_frame->n_vectors;
@@ -2109,26 +2121,26 @@
/* lookup session */
tc0 =
- (tcp_connection_t *) stream_session_lookup_transport4 (ssm,
- &ip40->dst_address,
- &ip40->src_address,
- tcp0->dst_port,
- tcp0->src_port,
- SESSION_TYPE_IP4_TCP,
- my_thread_index);
+ (tcp_connection_t *)
+ stream_session_lookup_transport4 (&ip40->dst_address,
+ &ip40->src_address,
+ tcp0->dst_port,
+ tcp0->src_port,
+ SESSION_TYPE_IP4_TCP,
+ my_thread_index);
}
else
{
ip60 = vlib_buffer_get_current (b0);
tcp0 = ip6_next_header (ip60);
tc0 =
- (tcp_connection_t *) stream_session_lookup_transport6 (ssm,
- &ip60->src_address,
- &ip60->dst_address,
- tcp0->src_port,
- tcp0->dst_port,
- SESSION_TYPE_IP6_TCP,
- my_thread_index);
+ (tcp_connection_t *)
+ stream_session_lookup_transport6 (&ip60->src_address,
+ &ip60->dst_address,
+ tcp0->src_port,
+ tcp0->dst_port,
+ SESSION_TYPE_IP6_TCP,
+ my_thread_index);
}
/* Session exists */
diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c
index dbcf1f7..7e431cd 100644
--- a/src/vnet/tcp/tcp_output.c
+++ b/src/vnet/tcp/tcp_output.c
@@ -91,24 +91,30 @@
}
/**
+ * TCP's IW as recommended by RFC6928
+ */
+always_inline u32
+tcp_initial_wnd_unscaled (tcp_connection_t * tc)
+{
+ return TCP_IW_N_SEGMENTS * dummy_mtu;
+}
+
+/**
* Compute initial window and scale factor. As per RFC1323, window field in
* SYN and SYN-ACK segments is never scaled.
*/
u32
tcp_initial_window_to_advertise (tcp_connection_t * tc)
{
- u32 available_space;
+ u32 max_fifo;
/* Initial wnd for SYN. Fifos are not allocated yet.
- * Use some predefined value */
- if (tc->state != TCP_STATE_SYN_RCVD)
- {
- return TCP_DEFAULT_RX_FIFO_SIZE;
- }
+ * Use some predefined value. For SYN-ACK we still want the
+ * scale to be computed in the same way */
+ max_fifo = TCP_MAX_RX_FIFO_SIZE;
- available_space = stream_session_max_enqueue (&tc->connection);
- tc->rcv_wscale = tcp_window_compute_scale (available_space);
- tc->rcv_wnd = clib_min (available_space, TCP_WND_MAX << tc->rcv_wscale);
+ tc->rcv_wscale = tcp_window_compute_scale (max_fifo);
+ tc->rcv_wnd = tcp_initial_wnd_unscaled (tc);
return clib_min (tc->rcv_wnd, TCP_WND_MAX);
}
@@ -119,23 +125,43 @@
u32
tcp_window_to_advertise (tcp_connection_t * tc, tcp_state_t state)
{
- u32 available_space, wnd, scaled_space;
+ u32 available_space, max_fifo, observed_wnd;
- if (state != TCP_STATE_ESTABLISHED)
+ if (state < TCP_STATE_ESTABLISHED)
return tcp_initial_window_to_advertise (tc);
+ /*
+ * Figure out how much space we have available
+ */
available_space = stream_session_max_enqueue (&tc->connection);
- scaled_space = available_space >> tc->rcv_wscale;
+ max_fifo = stream_session_fifo_size (&tc->connection);
- /* Need to update scale */
- if (PREDICT_FALSE ((scaled_space == 0 && available_space != 0))
- || (scaled_space >= TCP_WND_MAX))
- tc->rcv_wscale = tcp_window_compute_scale (available_space);
+ ASSERT (tc->opt.mss < max_fifo);
- wnd = clib_min (available_space, TCP_WND_MAX << tc->rcv_wscale);
- tc->rcv_wnd = wnd;
+ if (available_space < tc->opt.mss && available_space < max_fifo / 8)
+ available_space = 0;
- return wnd >> tc->rcv_wscale;
+ /*
+ * Use the above and what we know about what we've previously advertised
+ * to compute the new window
+ */
+ observed_wnd = tc->rcv_wnd - (tc->rcv_nxt - tc->rcv_las);
+
+ /* Bad. Thou shalt not shrink */
+ if (available_space < observed_wnd)
+ {
+ if (available_space == 0)
+ clib_warning ("Didn't shrink rcv window despite not having space");
+ }
+
+ tc->rcv_wnd = clib_min (available_space, TCP_WND_MAX << tc->rcv_wscale);
+
+ if (tc->rcv_wnd == 0)
+ {
+ tc->flags |= TCP_CONN_SENT_RCV_WND0;
+ }
+
+ return tc->rcv_wnd >> tc->rcv_wscale;
}
/**
@@ -225,7 +251,7 @@
}
always_inline int
-tcp_make_syn_options (tcp_options_t * opts, u32 initial_wnd)
+tcp_make_syn_options (tcp_options_t * opts, u8 wnd_scale)
{
u8 len = 0;
@@ -234,7 +260,7 @@
len += TCP_OPTION_LEN_MSS;
opts->flags |= TCP_OPTS_FLAG_WSCALE;
- opts->wscale = tcp_window_compute_scale (initial_wnd);
+ opts->wscale = wnd_scale;
len += TCP_OPTION_LEN_WINDOW_SCALE;
opts->flags |= TCP_OPTS_FLAG_TSTAMP;
@@ -327,8 +353,7 @@
case TCP_STATE_SYN_RCVD:
return tcp_make_synack_options (tc, opts);
case TCP_STATE_SYN_SENT:
- return tcp_make_syn_options (opts,
- tcp_initial_window_to_advertise (tc));
+ return tcp_make_syn_options (opts, tc->rcv_wscale);
default:
clib_warning ("Not handled!");
return 0;
@@ -732,7 +757,7 @@
/* Make and write options */
memset (&snd_opts, 0, sizeof (snd_opts));
- tcp_opts_len = tcp_make_syn_options (&snd_opts, initial_wnd);
+ tcp_opts_len = tcp_make_syn_options (&snd_opts, tc->rcv_wscale);
tcp_hdr_opts_len = tcp_opts_len + sizeof (tcp_header_t);
th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->iss,
@@ -900,7 +925,7 @@
tcp_reuse_buffer (vm, b);
- ASSERT (tc->state == TCP_STATE_ESTABLISHED);
+ ASSERT (tc->state >= TCP_STATE_ESTABLISHED);
ASSERT (max_bytes != 0);
if (tcp_opts_sack_permitted (&tc->opt))
@@ -929,7 +954,6 @@
max_bytes);
ASSERT (n_bytes != 0);
- tc->snd_nxt += n_bytes;
tcp_push_hdr_i (tc, b, tc->state);
return n_bytes;
@@ -967,7 +991,7 @@
tcp_get_free_buffer_index (tm, &bi);
b = vlib_get_buffer (vm, bi);
- if (tc->state == TCP_STATE_ESTABLISHED)
+ if (tc->state >= TCP_STATE_ESTABLISHED)
{
tcp_fastrecovery_off (tc);
@@ -977,6 +1001,12 @@
/* Figure out what and how many bytes we can send */
snd_space = tcp_available_snd_space (tc);
max_bytes = clib_min (tc->snd_mss, snd_space);
+
+ if (max_bytes == 0)
+ {
+ clib_warning ("no wnd to retransmit");
+ return;
+ }
tcp_prepare_retransmit_segment (tc, b, max_bytes);
tc->rtx_bytes += max_bytes;
@@ -996,7 +1026,11 @@
tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
vlib_buffer_make_headroom (b, MAX_HDRS_LEN);
+
tcp_push_hdr_i (tc, b, tc->state);
+
+ /* Account for the SYN */
+ tc->snd_nxt += 1;
}
if (!is_syn)
@@ -1163,8 +1197,8 @@
if (PREDICT_FALSE
(vnet_buffer (b0)->tcp.flags & TCP_BUF_FLAG_DUPACK))
{
+ ASSERT (tc0->snt_dupacks > 0);
tc0->snt_dupacks--;
- ASSERT (tc0->snt_dupacks >= 0);
if (!tcp_session_has_ooo_data (tc0))
{
error0 = TCP_ERROR_FILTERED_DUPACKS;