VPP-659 TCP improvements
- builtin test echo server
- fix SYN-ACK retransmit canceling
- avoid sending spurious ACK if in LAST_ACK
- improved client dummy test app
- renamed tx fifo dequeuing and sending functions to avoid confusion
- improved RST handling
Change-Id: Ia14aad3df319540dcf6e6a4e18a9f8d423a4b83b
Signed-off-by: Florin Coras <fcoras@cisco.com>
Signed-off-by: Dave Barach <dave@barachs.net>
diff --git a/src/vnet/session/application.c b/src/vnet/session/application.c
index a542eeb..513e5fa 100644
--- a/src/vnet/session/application.c
+++ b/src/vnet/session/application.c
@@ -92,6 +92,19 @@
pool_put (app_pool, app);
}
+static void
+application_verify_cb_fns (application_type_t type, session_cb_vft_t * cb_fns)
+{
+ if (type == APP_SERVER && cb_fns->session_accept_callback == 0)
+ clib_warning ("No accept callback function provided");
+ if (type == APP_CLIENT && cb_fns->session_connected_callback == 0)
+ clib_warning ("No session connected callback function provided");
+ if (cb_fns->session_disconnect_callback == 0)
+ clib_warning ("No session disconnect callback function provided");
+ if (cb_fns->session_reset_callback == 0)
+ clib_warning ("No session reset callback function provided");
+}
+
application_t *
application_new (application_type_t type, session_type_t sst,
u32 api_client_index, u32 flags, session_cb_vft_t * cb_fns)
@@ -142,6 +155,9 @@
app->flags = flags;
app->cb_fns = *cb_fns;
+ /* Check that the obvious things are properly set up */
+ application_verify_cb_fns (type, cb_fns);
+
/* Add app to lookup by api_client_index table */
application_table_add (app);
diff --git a/src/vnet/session/application.h b/src/vnet/session/application.h
index 480828f..a60a8b8 100644
--- a/src/vnet/session/application.h
+++ b/src/vnet/session/application.h
@@ -45,7 +45,8 @@
void (*session_reset_callback) (stream_session_t * s);
/* Direct RX callback, for built-in servers */
- int (*builtin_server_rx_callback) (stream_session_t * session);
+ int (*builtin_server_rx_callback) (stream_session_t * session,
+ session_fifo_event_t * ep);
/* Redirect connection to local server */
int (*redirect_connect_callback) (u32 api_client_index, void *mp);
diff --git a/src/vnet/session/application_interface.c b/src/vnet/session/application_interface.c
index 6ddfb70..4b30bd8 100644
--- a/src/vnet/session/application_interface.c
+++ b/src/vnet/session/application_interface.c
@@ -98,7 +98,7 @@
if (application_lookup (api_client_index))
{
- clib_warning ("Only one bind supported for now");
+ clib_warning ("Only one connection supported for now");
return VNET_API_ERROR_ADDRESS_IN_USE;
}
@@ -364,8 +364,7 @@
}
int
-vnet_disconnect_session (u32 client_index, u32 session_index,
- u32 thread_index)
+vnet_disconnect_session (u32 session_index, u32 thread_index)
{
stream_session_t *session;
diff --git a/src/vnet/session/application_interface.h b/src/vnet/session/application_interface.h
index 8d87c06..a5f2b9a 100644
--- a/src/vnet/session/application_interface.h
+++ b/src/vnet/session/application_interface.h
@@ -112,9 +112,7 @@
int vnet_bind_uri (vnet_bind_args_t *);
int vnet_unbind_uri (char *uri, u32 api_client_index);
int vnet_connect_uri (vnet_connect_args_t * a);
-int
-vnet_disconnect_session (u32 client_index, u32 session_index,
- u32 thread_index);
+int vnet_disconnect_session (u32 session_index, u32 thread_index);
int vnet_bind (vnet_bind_args_t * a);
int vnet_connect (vnet_connect_args_t * a);
diff --git a/src/vnet/session/node.c b/src/vnet/session/node.c
index 399077d..7fd7e0b 100644
--- a/src/vnet/session/node.c
+++ b/src/vnet/session/node.c
@@ -78,10 +78,11 @@
};
always_inline int
-session_fifo_rx_i (vlib_main_t * vm, vlib_node_runtime_t * node,
- session_manager_main_t * smm, session_fifo_event_t * e0,
- stream_session_t * s0, u32 thread_index, int *n_tx_packets,
- u8 peek_data)
+session_tx_fifo_read_and_snd_i (vlib_main_t * vm, vlib_node_runtime_t * node,
+ session_manager_main_t * smm,
+ session_fifo_event_t * e0,
+ stream_session_t * s0, u32 thread_index,
+ int *n_tx_packets, u8 peek_data)
{
u32 n_trace = vlib_get_trace_count (vm, node);
u32 left_to_snd0, max_len_to_snd0, len_to_deq0, n_bufs, snd_space0;
@@ -120,7 +121,7 @@
if (peek_data)
{
/* Offset in rx fifo from where to peek data */
- rx_offset = transport_vft->rx_fifo_offset (tc0);
+ rx_offset = transport_vft->tx_fifo_offset (tc0);
}
/* TODO check if transport is willing to send len_to_snd0
@@ -194,25 +195,27 @@
t0->server_thread_index = s0->thread_index;
}
+ len_to_deq0 = (left_to_snd0 < snd_mss0) ? left_to_snd0 : snd_mss0;
+
/* *INDENT-OFF* */
if (1)
{
ELOG_TYPE_DECLARE (e) = {
- .format = "evt-dequeue: id %d length %d",
- .format_args = "i4i4",
+ .format = "evt-deq: id %d len %d rd %d wnd %d",
+ .format_args = "i4i4i4i4",
};
struct
{
- u32 data[2];
+ u32 data[4];
} *ed;
ed = ELOG_DATA (&vm->elog_main, e);
ed->data[0] = e0->event_id;
ed->data[1] = e0->enqueue_length;
+ ed->data[2] = len_to_deq0;
+ ed->data[3] = left_to_snd0;
}
/* *INDENT-ON* */
- len_to_deq0 = (left_to_snd0 < snd_mss0) ? left_to_snd0 : snd_mss0;
-
/* Make room for headers */
data0 = vlib_buffer_make_headroom (b0, MAX_HDRS_LEN);
@@ -276,22 +279,25 @@
}
int
-session_fifo_rx_peek (vlib_main_t * vm, vlib_node_runtime_t * node,
- session_manager_main_t * smm, session_fifo_event_t * e0,
- stream_session_t * s0, u32 thread_index, int *n_tx_pkts)
+session_tx_fifo_peek_and_snd (vlib_main_t * vm, vlib_node_runtime_t * node,
+ session_manager_main_t * smm,
+ session_fifo_event_t * e0,
+ stream_session_t * s0, u32 thread_index,
+ int *n_tx_pkts)
{
- return session_fifo_rx_i (vm, node, smm, e0, s0, thread_index, n_tx_pkts,
- 1);
+ return session_tx_fifo_read_and_snd_i (vm, node, smm, e0, s0, thread_index,
+ n_tx_pkts, 1);
}
int
-session_fifo_rx_dequeue (vlib_main_t * vm, vlib_node_runtime_t * node,
- session_manager_main_t * smm,
- session_fifo_event_t * e0, stream_session_t * s0,
- u32 thread_index, int *n_tx_pkts)
+session_tx_fifo_dequeue_and_snd (vlib_main_t * vm, vlib_node_runtime_t * node,
+ session_manager_main_t * smm,
+ session_fifo_event_t * e0,
+ stream_session_t * s0, u32 thread_index,
+ int *n_tx_pkts)
{
- return session_fifo_rx_i (vm, node, smm, e0, s0, thread_index, n_tx_pkts,
- 0);
+ return session_tx_fifo_read_and_snd_i (vm, node, smm, e0, s0, thread_index,
+ n_tx_pkts, 0);
}
static uword
@@ -369,12 +375,16 @@
s0 = stream_session_get_if_valid (server_session_index0,
my_thread_index);
- if (!s0)
+
+ if (CLIB_DEBUG && !s0)
{
- clib_warning ("It's dead Jim!");
+ clib_warning ("It's dead, Jim!");
continue;
}
+ if (PREDICT_FALSE (s0->session_state == SESSION_STATE_CLOSED))
+ continue;
+
ASSERT (s0->thread_index == my_thread_index);
switch (e0->event_type)
diff --git a/src/vnet/session/session.c b/src/vnet/session/session.c
index b5a168c..8867e79 100644
--- a/src/vnet/session/session.c
+++ b/src/vnet/session/session.c
@@ -373,7 +373,7 @@
/* Finally, try half-open connections */
rv = clib_bihash_search_inline_48_8 (&smm->v6_half_open_hash, &kv6);
if (rv == 0)
- return tp_vfts[s->session_type].get_half_open (kv6.value & 0xFFFFFFFF);
+ return tp_vfts[proto].get_half_open (kv6.value & 0xFFFFFFFF);
return 0;
}
@@ -617,7 +617,10 @@
goto again;
}
else
- return SESSION_ERROR_NO_SPACE;
+ {
+ clib_warning ("No space to allocate fifos!");
+ return SESSION_ERROR_NO_SPACE;
+ }
}
return 0;
}
@@ -806,6 +809,10 @@
evt.event_id = serial_number++;
evt.enqueue_length = svm_fifo_max_dequeue (s->server_rx_fifo);
+ /* Built-in server? Hand event to the callback... */
+ if (app->cb_fns.builtin_server_rx_callback)
+ return app->cb_fns.builtin_server_rx_callback (s, &evt);
+
/* Add event to server's event queue */
q = app->event_queue;
@@ -1043,13 +1050,9 @@
session_manager_main_t *smm = vnet_get_session_manager_main ();
svm_fifo_segment_private_t *fifo_segment;
application_t *app;
- int rv;
- /* delete from the main lookup table */
- rv = stream_session_table_del (smm, s);
-
- if (rv)
- clib_warning ("hash delete error, rv %d", rv);
+ /* Delete from the main lookup table. */
+ stream_session_table_del (smm, s);
/* Cleanup fifo segments */
fifo_segment = svm_fifo_get_segment (s->server_segment_index);
@@ -1197,18 +1200,30 @@
void
stream_session_disconnect (stream_session_t * s)
{
- tp_vfts[s->session_type].close (s->connection_index, s->thread_index);
s->session_state = SESSION_STATE_CLOSED;
+ tp_vfts[s->session_type].close (s->connection_index, s->thread_index);
}
/**
* Cleanup transport and session state.
+ *
+ * Notify transport of the cleanup, wait for a delete notify to actually
+ * remove the session state.
*/
void
stream_session_cleanup (stream_session_t * s)
{
+ session_manager_main_t *smm = &session_manager_main;
+ int rv;
+
+ s->session_state = SESSION_STATE_CLOSED;
+
+ /* Delete from the main lookup table to avoid more enqueues */
+ rv = stream_session_table_del (smm, s);
+ if (rv)
+ clib_warning ("hash delete error, rv %d", rv);
+
tp_vfts[s->session_type].cleanup (s->connection_index, s->thread_index);
- stream_session_delete (s);
}
void
@@ -1221,7 +1236,8 @@
/* If an offset function is provided, then peek instead of dequeue */
smm->session_rx_fns[type] =
- (vft->rx_fifo_offset) ? session_fifo_rx_peek : session_fifo_rx_dequeue;
+ (vft->tx_fifo_offset) ? session_tx_fifo_peek_and_snd :
+ session_tx_fifo_dequeue_and_snd;
}
transport_proto_vft_t *
diff --git a/src/vnet/session/session.h b/src/vnet/session/session.h
index 46e5ce2..1b712e2 100644
--- a/src/vnet/session/session.h
+++ b/src/vnet/session/session.h
@@ -102,33 +102,33 @@
typedef struct _stream_session_t
{
+ /** fifo pointers. Once allocated, these do not move */
+ svm_fifo_t *server_rx_fifo;
+ svm_fifo_t *server_tx_fifo;
+
/** Type */
u8 session_type;
/** State */
u8 session_state;
+ u8 thread_index;
+
+ /** used during unbind processing */
+ u8 is_deleted;
+
+ /** To avoid n**2 "one event per frame" check */
+ u8 enqueue_epoch;
+
/** Session index in per_thread pool */
u32 session_index;
/** Transport specific */
u32 connection_index;
- u8 thread_index;
-
/** Application specific */
u32 pid;
- /** fifo pointers. Once allocated, these do not move */
- svm_fifo_t *server_rx_fifo;
- svm_fifo_t *server_tx_fifo;
-
- /** To avoid n**2 "one event per frame" check */
- u8 enqueue_epoch;
-
- /** used during unbind processing */
- u8 is_deleted;
-
/** stream server pool index */
u32 app_index;
@@ -162,8 +162,8 @@
session_fifo_event_t * e0, stream_session_t * s0,
u32 thread_index, int *n_tx_pkts);
-extern session_fifo_rx_fn session_fifo_rx_peek;
-extern session_fifo_rx_fn session_fifo_rx_dequeue;
+extern session_fifo_rx_fn session_tx_fifo_peek_and_snd;
+extern session_fifo_rx_fn session_tx_fifo_dequeue_and_snd;
struct _session_manager_main
{
diff --git a/src/vnet/session/session_api.c b/src/vnet/session/session_api.c
index 8852fc6..9c38428 100644
--- a/src/vnet/session/session_api.c
+++ b/src/vnet/session/session_api.c
@@ -130,6 +130,27 @@
vl_msg_api_send_shmem (q, (u8 *) & mp);
}
+static void
+send_session_reset_uri_callback (stream_session_t * s)
+{
+ vl_api_reset_session_t *mp;
+ unix_shared_memory_queue_t *q;
+ application_t *app = application_get (s->app_index);
+
+ q = vl_api_client_index_to_input_queue (app->api_client_index);
+
+ if (!q)
+ return;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_RESET_SESSION);
+
+ mp->session_thread_index = s->thread_index;
+ mp->session_index = s->session_index;
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
static int
send_session_connected_uri_callback (u32 api_client_index,
stream_session_t * s, u8 is_fail)
@@ -347,6 +368,26 @@
vl_msg_api_send_shmem (q, (u8 *) & mp);
}
+static void
+send_session_reset_callback (stream_session_t * s)
+{
+ vl_api_reset_sock_t *mp;
+ unix_shared_memory_queue_t *q;
+ application_t *app = application_get (s->app_index);
+
+ q = vl_api_client_index_to_input_queue (app->api_client_index);
+
+ if (!q)
+ return;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_RESET_SOCK);
+
+ mp->handle = make_session_handle (s);
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
/**
* Redirect a connect_uri message to the indicated server.
* Only sent if the server has bound the related port with
@@ -414,6 +455,7 @@
.session_accept_callback = send_session_accept_uri_callback,
.session_disconnect_callback = send_session_disconnect_uri_callback,
.session_connected_callback = send_session_connected_uri_callback,
+ .session_reset_callback = send_session_reset_uri_callback,
.add_segment_callback = send_add_segment_callback,
.redirect_connect_callback = redirect_connect_uri_callback
};
@@ -422,6 +464,7 @@
.session_accept_callback = send_session_accept_callback,
.session_disconnect_callback = send_session_disconnect_callback,
.session_connected_callback = send_session_connected_callback,
+ .session_reset_callback = send_session_reset_callback,
.add_segment_callback = send_add_segment_callback,
.redirect_connect_callback = redirect_connect_callback
};
@@ -548,8 +591,8 @@
rv = api_session_not_valid (mp->session_index, mp->session_thread_index);
if (!rv)
- rv = vnet_disconnect_session (mp->client_index, mp->session_index,
- mp->session_thread_index);
+ rv =
+ vnet_disconnect_session (mp->session_index, mp->session_thread_index);
REPLY_MACRO (VL_API_DISCONNECT_SESSION_REPLY);
}
@@ -572,8 +615,7 @@
}
/* Disconnect has been confirmed. Confirm close to transport */
- vnet_disconnect_session (mp->client_index, mp->session_index,
- mp->session_thread_index);
+ vnet_disconnect_session (mp->session_index, mp->session_thread_index);
}
static void
diff --git a/src/vnet/session/transport.h b/src/vnet/session/transport.h
index f486dbb..0da3026 100644
--- a/src/vnet/session/transport.h
+++ b/src/vnet/session/transport.h
@@ -74,7 +74,7 @@
u32 (*push_header) (transport_connection_t * tconn, vlib_buffer_t * b);
u16 (*send_mss) (transport_connection_t * tc);
u32 (*send_space) (transport_connection_t * tc);
- u32 (*rx_fifo_offset) (transport_connection_t * tc);
+ u32 (*tx_fifo_offset) (transport_connection_t * tc);
/*
* Connection retrieval
@@ -92,39 +92,39 @@
} transport_proto_vft_t;
+/* *INDENT-OFF* */
/* 16 octets */
-typedef CLIB_PACKED (struct
- {
- union
- {
- struct
- {
- ip4_address_t src; ip4_address_t dst;
- u16 src_port;
- u16 dst_port;
- /* align by making this 4 octets even though its a 1-bit field
- * NOTE: avoid key overlap with other transports that use 5 tuples for
- * session identification.
- */
- u32 proto;
- };
- u64 as_u64[2];
- };
- }) v4_connection_key_t;
+typedef CLIB_PACKED (struct {
+ union
+ {
+ struct
+ {
+ ip4_address_t src; ip4_address_t dst;
+ u16 src_port;
+ u16 dst_port;
+ /* align by making this 4 octets even though its a 1-bit field
+ * NOTE: avoid key overlap with other transports that use 5 tuples for
+ * session identification.
+ */
+ u32 proto;
+ };
+ u64 as_u64[2];
+ };
+}) v4_connection_key_t;
-typedef CLIB_PACKED (struct
- {
- union
- {
- struct
- {
- /* 48 octets */
- ip6_address_t src; ip6_address_t dst;
- u16 src_port;
- u16 dst_port; u32 proto; u8 unused_for_now[8];
- }; u64 as_u64[6];
- };
- }) v6_connection_key_t;
+typedef CLIB_PACKED (struct {
+ union
+ {
+ struct
+ {
+ /* 48 octets */
+ ip6_address_t src; ip6_address_t dst;
+ u16 src_port;
+ u16 dst_port; u32 proto; u8 unused_for_now[8];
+ }; u64 as_u64[6];
+ };
+}) v6_connection_key_t;
+/* *INDENT-ON* */
typedef clib_bihash_kv_16_8_t session_kv4_t;
typedef clib_bihash_kv_48_8_t session_kv6_t;
diff --git a/src/vnet/tcp/builtin_server.c b/src/vnet/tcp/builtin_server.c
index be65642..9b697a0 100644
--- a/src/vnet/tcp/builtin_server.c
+++ b/src/vnet/tcp/builtin_server.c
@@ -18,10 +18,24 @@
#include <vnet/session/application.h>
#include <vnet/session/application_interface.h>
+typedef struct
+{
+ u8 *rx_buf;
+ unix_shared_memory_queue_t **vpp_queue;
+ vlib_main_t *vlib_main;
+} builtin_server_main_t;
+
+builtin_server_main_t builtin_server_main;
+
+
int
builtin_session_accept_callback (stream_session_t * s)
{
+ builtin_server_main_t *bsm = &builtin_server_main;
clib_warning ("called...");
+
+ bsm->vpp_queue[s->thread_index] =
+ session_manager_get_vpp_event_queue (s->thread_index);
s->session_state = SESSION_STATE_READY;
return 0;
}
@@ -30,8 +44,19 @@
builtin_session_disconnect_callback (stream_session_t * s)
{
clib_warning ("called...");
+
+ vnet_disconnect_session (s->session_index, s->thread_index);
}
+void
+builtin_session_reset_callback (stream_session_t * s)
+{
+ clib_warning ("called.. ");
+
+ stream_session_cleanup (s);
+}
+
+
int
builtin_session_connected_callback (u32 client_index,
stream_session_t * s, u8 is_fail)
@@ -56,9 +81,57 @@
}
int
-builtin_server_rx_callback (stream_session_t * s)
+builtin_server_rx_callback (stream_session_t * s, session_fifo_event_t * e)
{
- clib_warning ("called...");
+ int n_written, bytes, total_copy_bytes;
+ int n_read;
+ svm_fifo_t *tx_fifo;
+ builtin_server_main_t *bsm = &builtin_server_main;
+ session_fifo_event_t evt;
+ static int serial_number = 0;
+
+ bytes = e->enqueue_length;
+ if (PREDICT_FALSE (bytes <= 0))
+ {
+ clib_warning ("bizarre rx callback: bytes %d", bytes);
+ return 0;
+ }
+
+ tx_fifo = s->server_tx_fifo;
+
+ /* Number of bytes we're going to copy */
+ total_copy_bytes = (bytes < (tx_fifo->nitems - tx_fifo->cursize)) ? bytes :
+ tx_fifo->nitems - tx_fifo->cursize;
+
+ if (PREDICT_FALSE (total_copy_bytes <= 0))
+ {
+ clib_warning ("no space in tx fifo, event had %d bytes", bytes);
+ return 0;
+ }
+
+ vec_validate (bsm->rx_buf, total_copy_bytes - 1);
+ _vec_len (bsm->rx_buf) = total_copy_bytes;
+
+ n_read = svm_fifo_dequeue_nowait (s->server_rx_fifo, 0, total_copy_bytes,
+ bsm->rx_buf);
+ ASSERT (n_read == total_copy_bytes);
+
+ /*
+ * Echo back
+ */
+
+ n_written = svm_fifo_enqueue_nowait (tx_fifo, 0, n_read, bsm->rx_buf);
+ ASSERT (n_written == total_copy_bytes);
+
+ /* Fabricate TX event, send to vpp */
+ evt.fifo = tx_fifo;
+ evt.event_type = FIFO_EVENT_SERVER_TX;
+ evt.enqueue_length = total_copy_bytes;
+ evt.event_id = serial_number++;
+
+ unix_shared_memory_queue_add (bsm->vpp_queue[s->thread_index], (u8 *) & evt,
+ 0 /* do wait for mutex */ );
+
return 0;
}
@@ -68,7 +141,8 @@
.session_connected_callback = builtin_session_connected_callback,
.add_segment_callback = builtin_add_segment_callback,
.redirect_connect_callback = builtin_redirect_connect_callback,
- .builtin_server_rx_callback = builtin_server_rx_callback
+ .builtin_server_rx_callback = builtin_server_rx_callback,
+ .session_reset_callback = builtin_session_reset_callback
};
static int
@@ -77,6 +151,11 @@
vnet_bind_args_t _a, *a = &_a;
u64 options[SESSION_OPTIONS_N_OPTIONS];
char segment_name[128];
+ u32 num_threads;
+ vlib_thread_main_t *vtm = vlib_get_thread_main ();
+
+ num_threads = 1 /* main thread */ + vtm->n_threads;
+ vec_validate (builtin_server_main.vpp_queue, num_threads - 1);
memset (a, 0, sizeof (*a));
memset (options, 0, sizeof (options));
@@ -110,6 +189,7 @@
}
#endif
+ vnet_session_enable_disable (vm, 1 /* turn on TCP, etc. */ );
rv = server_create (vm);
switch (rv)
{
@@ -121,10 +201,14 @@
return 0;
}
+/* *INDENT-OFF* */
VLIB_CLI_COMMAND (server_create_command, static) =
{
-.path = "test server",.short_help = "test server",.function =
- server_create_command_fn,};
+ .path = "test server",
+ .short_help = "test server",
+ .function = server_create_command_fn,
+};
+/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c
index 69433e2..d2df5c3 100644
--- a/src/vnet/tcp/tcp.c
+++ b/src/vnet/tcp/tcp.c
@@ -139,6 +139,20 @@
tcp_connection_cleanup (tc);
}
+/** Notify session that connection has been reset.
+ *
+ * Switch state to closed and wait for session to call cleanup.
+ */
+void
+tcp_connection_reset (tcp_connection_t * tc)
+{
+ if (tc->state == TCP_STATE_CLOSED)
+ return;
+
+ tc->state = TCP_STATE_CLOSED;
+ stream_session_reset_notify (&tc->connection);
+}
+
/**
* Begin connection closing procedure.
*
@@ -149,6 +163,8 @@
* calls cleanup.
* 2) TIME_WAIT (active close) whereby after 2MSL the 2MSL timer triggers
* and cleanup is called.
+ *
+ * N.B. Half-close connections are not supported
*/
void
tcp_connection_close (tcp_connection_t * tc)
@@ -166,9 +182,9 @@
else if (tc->state == TCP_STATE_CLOSE_WAIT)
tc->state = TCP_STATE_LAST_ACK;
- /* Half-close connections are not supported XXX */
-
- if (tc->state == TCP_STATE_CLOSED)
+ /* If in CLOSED and WAITCLOSE timer is not set, delete connection now */
+ if (tc->timers[TCP_TIMER_WAITCLOSE] == TCP_TIMER_HANDLE_INVALID
+ && tc->state == TCP_STATE_CLOSED)
tcp_connection_del (tc);
}
@@ -185,7 +201,10 @@
{
tcp_connection_t *tc;
tc = tcp_connection_get (conn_index, thread_index);
- tcp_connection_cleanup (tc);
+
+ /* Wait for the session tx events to clear */
+ tc->state = TCP_STATE_CLOSED;
+ tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_CLEANUP_TIME);
}
void *
@@ -227,7 +246,8 @@
{
transport_endpoint_t *tep;
u32 time_now, tei;
- u16 min = 1024, max = 65535, tries; /* XXX configurable ? */
+ u16 min = 1024, max = 65535; /* XXX configurable ? */
+ int tries;
tries = max - min;
time_now = tcp_time_now ();
@@ -505,10 +525,10 @@
}
u32
-tcp_session_rx_fifo_offset (transport_connection_t * trans_conn)
+tcp_session_tx_fifo_offset (transport_connection_t * trans_conn)
{
tcp_connection_t *tc = (tcp_connection_t *) trans_conn;
- return (tc->snd_una_max - tc->snd_una);
+ return (tc->snd_nxt - tc->snd_una);
}
/* *INDENT-OFF* */
@@ -524,7 +544,7 @@
.cleanup = tcp_session_cleanup,
.send_mss = tcp_session_send_mss,
.send_space = tcp_session_send_space,
- .rx_fifo_offset = tcp_session_rx_fifo_offset,
+ .tx_fifo_offset = tcp_session_tx_fifo_offset,
.format_connection = format_tcp_session_ip4,
.format_listener = format_tcp_listener_session_ip4,
.format_half_open = format_tcp_half_open_session_ip4
@@ -542,7 +562,7 @@
.cleanup = tcp_session_cleanup,
.send_mss = tcp_session_send_mss,
.send_space = tcp_session_send_space,
- .rx_fifo_offset = tcp_session_rx_fifo_offset,
+ .tx_fifo_offset = tcp_session_tx_fifo_offset,
.format_connection = format_tcp_session_ip6,
.format_listener = format_tcp_listener_session_ip6,
.format_half_open = format_tcp_half_open_session_ip6
@@ -579,13 +599,32 @@
}
void
-tcp_timer_2msl_handler (u32 conn_index)
+tcp_timer_waitclose_handler (u32 conn_index)
{
u32 cpu_index = os_get_cpu_number ();
tcp_connection_t *tc;
tc = tcp_connection_get (conn_index, cpu_index);
- tc->timers[TCP_TIMER_2MSL] = TCP_TIMER_HANDLE_INVALID;
+ tc->timers[TCP_TIMER_WAITCLOSE] = TCP_TIMER_HANDLE_INVALID;
+
+ /* Session didn't come back with a close(). Send FIN either way
+ * and switch to LAST_ACK. */
+ if (tc->state == TCP_STATE_CLOSE_WAIT)
+ {
+ if (tc->flags & TCP_CONN_FINSNT)
+ {
+ clib_warning ("FIN was sent and still in CLOSE WAIT. Weird!");
+ }
+
+ tcp_send_fin (tc);
+ tc->state = TCP_STATE_LAST_ACK;
+
+ /* Make sure we don't wait in LAST ACK forever */
+ tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME);
+
+ /* Don't delete the connection yet */
+ return;
+ }
tcp_connection_del (tc);
}
@@ -597,7 +636,7 @@
tcp_timer_delack_handler,
0,
tcp_timer_keep_handler,
- tcp_timer_2msl_handler,
+ tcp_timer_waitclose_handler,
tcp_timer_retransmit_syn_handler,
tcp_timer_establish_handler
};
diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h
index 7d44343..3b3d8fc 100644
--- a/src/vnet/tcp/tcp.h
+++ b/src/vnet/tcp/tcp.h
@@ -63,8 +63,8 @@
_(DELACK, "DELAYED ACK") \
_(PERSIST, "PERSIST") \
_(KEEP, "KEEP") \
- _(2MSL, "2MSL") \
- _(RETRANSMIT_SYN, "RETRANSMIT_SYN") \
+ _(WAITCLOSE, "WAIT CLOSE") \
+ _(RETRANSMIT_SYN, "RETRANSMIT SYN") \
_(ESTABLISH, "ESTABLISH")
typedef enum _tcp_timers
@@ -89,6 +89,8 @@
#define TCP_DELACK_TIME 1 /* 0.1s */
#define TCP_ESTABLISH_TIME 750 /* 75s */
#define TCP_2MSL_TIME 300 /* 30s */
+#define TCP_CLOSEWAIT_TIME 1 /* 0.1s */
+#define TCP_CLEANUP_TIME 5 /* 0.5s Time to wait before cleanup */
#define TCP_RTO_MAX 60 * THZ /* Min max RTO (60s) as per RFC6298 */
#define TCP_RTT_MAX 30 * THZ /* 30s (probably too much) */
@@ -102,6 +104,7 @@
_(DELACK, "Delay ACK") \
_(SNDACK, "Send ACK") \
_(BURSTACK, "Burst ACK set") \
+ _(FINSNT, "FIN sent") \
_(SENT_RCV_WND0, "Sent 0 receive window") \
_(RECOVERY, "Recovery on") \
_(FAST_RECOVERY, "Fast Recovery on")
@@ -331,6 +334,8 @@
always_inline tcp_connection_t *
tcp_connection_get (u32 conn_index, u32 thread_index)
{
+ if (pool_is_free_index (tcp_main.connections[thread_index], conn_index))
+ return 0;
return pool_elt_at_index (tcp_main.connections[thread_index], conn_index);
}
@@ -347,6 +352,7 @@
void tcp_connection_close (tcp_connection_t * tc);
void tcp_connection_cleanup (tcp_connection_t * tc);
void tcp_connection_del (tcp_connection_t * tc);
+void tcp_connection_reset (tcp_connection_t * tc);
always_inline tcp_connection_t *
tcp_listener_get (u32 tli)
@@ -361,7 +367,7 @@
}
void tcp_make_ack (tcp_connection_t * ts, vlib_buffer_t * b);
-void tcp_make_finack (tcp_connection_t * tc, vlib_buffer_t * b);
+void tcp_make_fin (tcp_connection_t * tc, vlib_buffer_t * b);
void tcp_make_synack (tcp_connection_t * ts, vlib_buffer_t * b);
void tcp_send_reset (vlib_buffer_t * pkt, u8 is_ip4);
void tcp_send_syn (tcp_connection_t * tc);
@@ -467,7 +473,7 @@
}
always_inline void
-tcp_retransmit_timer_set (tcp_main_t * tm, tcp_connection_t * tc)
+tcp_retransmit_timer_set (tcp_connection_t * tc)
{
/* XXX Switch to faster TW */
tcp_timer_set (tc, TCP_TIMER_RETRANSMIT,
diff --git a/src/vnet/tcp/tcp_error.def b/src/vnet/tcp/tcp_error.def
index cff5ec1..2dbdd9b 100644
--- a/src/vnet/tcp/tcp_error.def
+++ b/src/vnet/tcp/tcp_error.def
@@ -17,13 +17,13 @@
tcp_error (NO_LISTENER, "no listener for dst port")
tcp_error (LOOKUP_DROPS, "lookup drops")
tcp_error (DISPATCH, "Dispatch error")
-tcp_error (ENQUEUED, "Packets pushed into rx fifo")
+tcp_error (ENQUEUED, "Packets pushed into rx fifo")
tcp_error (PURE_ACK, "Pure acks")
tcp_error (SYNS_RCVD, "SYNs received")
tcp_error (SYN_ACKS_RCVD, "SYN-ACKs received")
-tcp_error (NOT_READY, "Session not ready for packets")
-tcp_error (FIFO_FULL, "Packets dropped for lack of rx fifo space")
-tcp_error (EVENT_FIFO_FULL, "Events not sent for lack of event fifo space")
+tcp_error (NOT_READY, "Session not ready for packets")
+tcp_error (FIFO_FULL, "Packets dropped for lack of rx fifo space")
+tcp_error (EVENT_FIFO_FULL, "Events not sent for lack of event fifo space")
tcp_error (API_QUEUE_FULL, "Sessions not created for lack of API queue space")
tcp_error (CREATE_SESSION_FAIL, "Sessions couldn't be allocated")
tcp_error (SEGMENT_INVALID, "Invalid segment")
@@ -32,4 +32,5 @@
tcp_error (ACK_OLD, "Old ACK")
tcp_error (PKTS_SENT, "Packets sent")
tcp_error (FILTERED_DUPACKS, "Filtered duplicate ACKs")
-tcp_error (RST_SENT, "Resets sent")
\ No newline at end of file
+tcp_error (RST_SENT, "Resets sent")
+tcp_error (INVALID_CONNECTION, "Invalid connection")
diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c
index 0a907d0..f19fbf8 100644
--- a/src/vnet/tcp/tcp_input.c
+++ b/src/vnet/tcp/tcp_input.c
@@ -274,10 +274,7 @@
/* 2nd: check the RST bit */
if (tcp_rst (th0))
{
- /* Notify session that connection has been reset. Switch
- * state to closed and await for session to do the cleanup. */
- stream_session_reset_notify (&tc0->connection);
- tc0->state = TCP_STATE_CLOSED;
+ tcp_connection_reset (tc0);
return -1;
}
@@ -1023,6 +1020,12 @@
tc0 = tcp_connection_get (vnet_buffer (b0)->tcp.connection_index,
my_thread_index);
+ if (PREDICT_FALSE (tc0 == 0))
+ {
+ error0 = TCP_ERROR_INVALID_CONNECTION;
+ goto drop;
+ }
+
/* Checksum computed by ipx_local no need to compute again */
if (is_ip4)
@@ -1072,12 +1075,12 @@
/* 8: check the FIN bit */
if (tcp_fin (th0))
{
- /* Send ACK and enter CLOSE-WAIT */
- tcp_make_ack (tc0, b0);
- tcp_connection_force_ack (tc0, b0);
- next0 = tcp_next_output (tc0->c_is_ip4);
+ /* Enter CLOSE-WAIT and notify session. Don't send ACK, instead
+ * wait for session to call close. To avoid lingering
+ * in CLOSE-WAIT, set timer (reuse WAITCLOSE). */
tc0->state = TCP_STATE_CLOSE_WAIT;
stream_session_disconnect_notify (&tc0->connection);
+ tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE, TCP_CLOSEWAIT_TIME);
}
drop:
@@ -1468,7 +1471,7 @@
VLIB_NODE_FUNCTION_MULTIARCH (tcp6_syn_sent_node, tcp6_syn_sent_rcv);
/**
- * Handles reception for all states except LISTEN, SYN-SEND and ESTABLISHED
+ * Handles reception for all states except LISTEN, SYN-SENT and ESTABLISHED
* as per RFC793 p. 64
*/
always_inline uword
@@ -1511,6 +1514,11 @@
b0 = vlib_get_buffer (vm, bi0);
tc0 = tcp_connection_get (vnet_buffer (b0)->tcp.connection_index,
my_thread_index);
+ if (PREDICT_FALSE (tc0 == 0))
+ {
+ error0 = TCP_ERROR_INVALID_CONNECTION;
+ goto drop;
+ }
/* Checksum computed by ipx_local no need to compute again */
@@ -1587,7 +1595,8 @@
/* Shoulder tap the server */
stream_session_accept_notify (&tc0->connection);
- tcp_timer_reset (tc0, TCP_TIMER_RETRANSMIT_SYN);
+ /* Reset SYN-ACK retransmit timer */
+ tcp_timer_reset (tc0, TCP_TIMER_RETRANSMIT);
break;
case TCP_STATE_ESTABLISHED:
/* We can get packets in established state here because they
@@ -1602,9 +1611,14 @@
* continue processing in that state. */
if (tcp_rcv_ack (tc0, b0, tcp0, &next0, &error0))
goto drop;
- tc0->state = TCP_STATE_FIN_WAIT_2;
- /* Stop all timers, 2MSL will be set lower */
- tcp_connection_timers_reset (tc0);
+
+ /* If FIN is ACKed */
+ if (tc0->snd_una == tc0->snd_una_max)
+ {
+ tc0->state = TCP_STATE_FIN_WAIT_2;
+ /* Stop all timers, 2MSL will be set lower */
+ tcp_connection_timers_reset (tc0);
+ }
break;
case TCP_STATE_FIN_WAIT_2:
/* In addition to the processing for the ESTABLISHED state, if
@@ -1639,7 +1653,17 @@
if (!tcp_rcv_ack_is_acceptable (tc0, b0))
goto drop;
- tcp_connection_del (tc0);
+ tc0->state = TCP_STATE_CLOSED;
+
+ /* Don't delete the connection/session yet. Instead, wait a
+ * reasonable amount of time until the pipes are cleared. In
+ * particular, this makes sure that we won't have dead sessions
+ * when processing events on the tx path */
+ tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, TCP_CLEANUP_TIME);
+
+ /* Stop retransmit */
+ tcp_timer_reset (tc0, TCP_TIMER_RETRANSMIT);
+
goto drop;
break;
@@ -1684,7 +1708,7 @@
case TCP_STATE_SYN_RCVD:
/* Send FIN-ACK notify app and enter CLOSE-WAIT */
tcp_connection_timers_reset (tc0);
- tcp_make_finack (tc0, b0);
+ tcp_make_fin (tc0, b0);
next0 = tcp_next_output (tc0->c_is_ip4);
stream_session_disconnect_notify (&tc0->connection);
tc0->state = TCP_STATE_CLOSE_WAIT;
@@ -1697,12 +1721,12 @@
case TCP_STATE_FIN_WAIT_1:
tc0->state = TCP_STATE_TIME_WAIT;
tcp_connection_timers_reset (tc0);
- tcp_timer_set (tc0, TCP_TIMER_2MSL, TCP_2MSL_TIME);
+ tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME);
break;
case TCP_STATE_FIN_WAIT_2:
/* Got FIN, send ACK! */
tc0->state = TCP_STATE_TIME_WAIT;
- tcp_timer_set (tc0, TCP_TIMER_2MSL, TCP_2MSL_TIME);
+ tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME);
tcp_make_ack (tc0, b0);
next0 = tcp_next_output (is_ip4);
break;
@@ -1710,7 +1734,7 @@
/* Remain in the TIME-WAIT state. Restart the 2 MSL time-wait
* timeout.
*/
- tcp_timer_update (tc0, TCP_TIMER_2MSL, TCP_2MSL_TIME);
+ tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME);
break;
}
@@ -2113,6 +2137,7 @@
n_left_to_next -= 1;
b0 = vlib_get_buffer (vm, bi0);
+ vnet_buffer (b0)->tcp.flags = 0;
if (is_ip4)
{
@@ -2168,7 +2193,6 @@
/* Send reset */
next0 = TCP_INPUT_NEXT_RESET;
error0 = TCP_ERROR_NO_LISTENER;
- vnet_buffer (b0)->tcp.flags = 0;
}
b0->error = error0 ? node->errors[error0] : 0;
@@ -2288,6 +2312,7 @@
_(ESTABLISHED, TCP_FLAG_FIN, TCP_INPUT_NEXT_ESTABLISHED, TCP_ERROR_NONE);
_(ESTABLISHED, TCP_FLAG_FIN | TCP_FLAG_ACK, TCP_INPUT_NEXT_ESTABLISHED,
TCP_ERROR_NONE);
+ _(ESTABLISHED, TCP_FLAG_RST, TCP_INPUT_NEXT_ESTABLISHED, TCP_ERROR_NONE);
/* ACK or FIN-ACK to our FIN */
_(FIN_WAIT_1, TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
_(FIN_WAIT_1, TCP_FLAG_ACK | TCP_FLAG_FIN, TCP_INPUT_NEXT_RCV_PROCESS,
diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c
index 7e431cd..aa43e9f 100644
--- a/src/vnet/tcp/tcp_output.c
+++ b/src/vnet/tcp/tcp_output.c
@@ -396,6 +396,7 @@
/* Leave enough space for headers */
vlib_buffer_make_headroom (b, MAX_HDRS_LEN);
+ vnet_buffer (b)->tcp.flags = 0;
}
/**
@@ -443,16 +444,22 @@
* Convert buffer to FIN-ACK
*/
void
-tcp_make_finack (tcp_connection_t * tc, vlib_buffer_t * b)
+tcp_make_fin (tcp_connection_t * tc, vlib_buffer_t * b)
{
tcp_main_t *tm = vnet_get_tcp_main ();
vlib_main_t *vm = tm->vlib_main;
+ u8 flags = 0;
tcp_reuse_buffer (vm, b);
- tcp_make_ack_i (tc, b, TCP_STATE_ESTABLISHED, TCP_FLAG_ACK | TCP_FLAG_FIN);
+
+ if (tc->rcv_las == tc->rcv_nxt)
+ flags = TCP_FLAG_FIN;
+ else
+ flags = TCP_FLAG_FIN | TCP_FLAG_ACK;
+
+ tcp_make_ack_i (tc, b, TCP_STATE_ESTABLISHED, flags);
/* Reset flags, make sure ack is sent */
- tc->flags = TCP_CONN_SNDACK;
vnet_buffer (b)->tcp.flags &= ~TCP_BUF_FLAG_DUPACK;
tc->snd_nxt += 1;
@@ -500,7 +507,7 @@
vnet_buffer (b)->tcp.flags = TCP_BUF_FLAG_ACK;
/* Init retransmit timer */
- tcp_retransmit_timer_set (tm, tc);
+ tcp_retransmit_timer_set (tc);
}
always_inline void
@@ -818,9 +825,9 @@
/* Leave enough space for headers */
vlib_buffer_make_headroom (b, MAX_HDRS_LEN);
- tcp_make_finack (tc, b);
-
+ tcp_make_fin (tc, b);
tcp_enqueue_to_output (vm, b, bi, tc->c_is_ip4);
+ tc->flags |= TCP_CONN_FINSNT;
}
always_inline u8
@@ -1038,7 +1045,7 @@
tcp_enqueue_to_output (vm, b, bi, tc->c_is_ip4);
/* Re-enable retransmit timer */
- tcp_retransmit_timer_set (tm, tc);
+ tcp_retransmit_timer_set (tc);
}
else
{
@@ -1139,7 +1146,6 @@
vlib_node_runtime_t * node,
vlib_frame_t * from_frame, int is_ip4)
{
- tcp_main_t *tm = vnet_get_tcp_main ();
u32 n_left_from, next_index, *from, *to_next;
u32 my_thread_index = vm->cpu_index;
@@ -1172,6 +1178,13 @@
b0 = vlib_get_buffer (vm, bi0);
tc0 = tcp_connection_get (vnet_buffer (b0)->tcp.connection_index,
my_thread_index);
+ if (PREDICT_FALSE (tc0 == 0 || tc0->state == TCP_STATE_CLOSED))
+ {
+ error0 = TCP_ERROR_INVALID_CONNECTION;
+ next0 = TCP_OUTPUT_NEXT_DROP;
+ goto done;
+ }
+
th0 = vlib_buffer_get_current (b0);
if (is_ip4)
@@ -1229,6 +1242,22 @@
tc0->rtt_ts = tcp_time_now ();
tc0->rtt_seq = tc0->snd_nxt;
}
+
+ if (1)
+ {
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format =
+ "output: snd_una %u snd_una_max %u",.format_args =
+ "i4i4",};
+ struct
+ {
+ u32 data[2];
+ } *ed;
+ ed = ELOG_DATA (&vm->elog_main, e);
+ ed->data[0] = tc0->snd_una - tc0->iss;
+ ed->data[1] = tc0->snd_una_max - tc0->iss;
+ }
}
/* Set the retransmit timer if not set already and not
@@ -1236,7 +1265,7 @@
if (!tcp_timer_is_active (tc0, TCP_TIMER_RETRANSMIT)
&& tc0->snd_nxt != tc0->snd_una)
{
- tcp_retransmit_timer_set (tm, tc0);
+ tcp_retransmit_timer_set (tc0);
tc0->rto_boff = 0;
}
diff --git a/src/vnet/udp/builtin_server.c b/src/vnet/udp/builtin_server.c
index afa66ba..46c8e73 100644
--- a/src/vnet/udp/builtin_server.c
+++ b/src/vnet/udp/builtin_server.c
@@ -39,7 +39,7 @@
}
static int
-builtin_server_rx_callback (stream_session_t * s)
+builtin_server_rx_callback (stream_session_t * s, session_fifo_event_t * ep)
{
svm_fifo_t *rx_fifo, *tx_fifo;
u32 this_transfer;
diff --git a/src/vnet/udp/udp_input.c b/src/vnet/udp/udp_input.c
index 4d50933..8827873 100644
--- a/src/vnet/udp/udp_input.c
+++ b/src/vnet/udp/udp_input.c
@@ -244,19 +244,19 @@
/* Get session's server */
server0 = application_get (s0->app_index);
- /* Built-in server? Deliver the goods... */
- if (server0->cb_fns.builtin_server_rx_callback)
- {
- server0->cb_fns.builtin_server_rx_callback (s0);
- continue;
- }
-
/* Fabricate event */
evt.fifo = s0->server_rx_fifo;
evt.event_type = FIFO_EVENT_SERVER_RX;
evt.event_id = serial_number++;
evt.enqueue_length = svm_fifo_max_dequeue (s0->server_rx_fifo);
+ /* Built-in server? Deliver the goods... */
+ if (server0->cb_fns.builtin_server_rx_callback)
+ {
+ server0->cb_fns.builtin_server_rx_callback (s0, &evt);
+ continue;
+ }
+
/* Add event to server's event queue */
q = server0->event_queue;
diff --git a/src/vnet/unix/tapcli.c b/src/vnet/unix/tapcli.c
index 496f388..fb1a8ba 100644
--- a/src/vnet/unix/tapcli.c
+++ b/src/vnet/unix/tapcli.c
@@ -1435,7 +1435,8 @@
VLIB_CLI_COMMAND (tap_connect_command, static) = {
.path = "tap connect",
- .short_help = "tap connect <intfc-name> [hwaddr <addr>]",
+ .short_help =
+ "tap connect <intfc-name> [address <ip-addr>/mw] [hwaddr <addr>]",
.function = tap_connect_command_fn,
};