tcp: pace timer handling
Type: improvement
Signed-off-by: Florin Coras <fcoras@cisco.com>
Change-Id: I93067054631d6ae2411a7b08d7b681aed7a121b2
diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c
index e34f773..f24ddb3 100644
--- a/src/vnet/tcp/tcp.c
+++ b/src/vnet/tcp/tcp.c
@@ -1288,12 +1288,155 @@
}
static void
+tcp_timer_waitclose_handler (tcp_connection_t * tc)
+{
+ switch (tc->state)
+ {
+ case TCP_STATE_CLOSE_WAIT:
+ tcp_connection_timers_reset (tc);
+ session_transport_closed_notify (&tc->connection);
+ /* App never returned with a close */
+ if (!(tc->flags & TCP_CONN_FINPNDG))
+ {
+ tcp_connection_set_state (tc, TCP_STATE_CLOSED);
+ tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time);
+ tcp_worker_stats_inc (tc->c_thread_index, to_closewait, 1);
+ break;
+ }
+
+ /* Send FIN either way and switch to LAST_ACK. */
+ tcp_cong_recovery_off (tc);
+ /* Make sure we don't try to send unsent data */
+ tc->snd_nxt = tc->snd_una;
+ tcp_send_fin (tc);
+ tcp_connection_set_state (tc, TCP_STATE_LAST_ACK);
+
+ /* Make sure we don't wait in LAST ACK forever */
+ tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.lastack_time);
+ tcp_worker_stats_inc (tc->c_thread_index, to_closewait2, 1);
+
+ /* Don't delete the connection yet */
+ break;
+ case TCP_STATE_FIN_WAIT_1:
+ tcp_connection_timers_reset (tc);
+ session_transport_closed_notify (&tc->connection);
+ if (tc->flags & TCP_CONN_FINPNDG)
+ {
+ /* If FIN pending, we haven't sent everything, but we did try.
+ * Notify session layer that transport is closed. */
+ tcp_connection_set_state (tc, TCP_STATE_CLOSED);
+ tcp_send_reset (tc);
+ tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time);
+ }
+ else
+ {
+ /* We've sent the fin but no progress. Close the connection and
+ * to make sure everything is flushed, setup a cleanup timer */
+ tcp_connection_set_state (tc, TCP_STATE_CLOSED);
+ tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time);
+ }
+ tcp_worker_stats_inc (tc->c_thread_index, to_finwait1, 1);
+ break;
+ case TCP_STATE_LAST_ACK:
+ tcp_connection_timers_reset (tc);
+ tcp_connection_set_state (tc, TCP_STATE_CLOSED);
+ tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time);
+ session_transport_closed_notify (&tc->connection);
+ tcp_worker_stats_inc (tc->c_thread_index, to_lastack, 1);
+ break;
+ case TCP_STATE_CLOSING:
+ tcp_connection_timers_reset (tc);
+ tcp_connection_set_state (tc, TCP_STATE_CLOSED);
+ tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time);
+ session_transport_closed_notify (&tc->connection);
+ tcp_worker_stats_inc (tc->c_thread_index, to_closing, 1);
+ break;
+ case TCP_STATE_FIN_WAIT_2:
+ tcp_send_reset (tc);
+ tcp_connection_timers_reset (tc);
+ tcp_connection_set_state (tc, TCP_STATE_CLOSED);
+ session_transport_closed_notify (&tc->connection);
+ tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time);
+ tcp_worker_stats_inc (tc->c_thread_index, to_finwait2, 1);
+ break;
+ default:
+ tcp_connection_del (tc);
+ break;
+ }
+}
+
+/* *INDENT-OFF* */
+static timer_expiration_handler *timer_expiration_handlers[TCP_N_TIMERS] =
+{
+ tcp_timer_retransmit_handler,
+ tcp_timer_delack_handler,
+ tcp_timer_persist_handler,
+ tcp_timer_waitclose_handler,
+ tcp_timer_retransmit_syn_handler,
+};
+/* *INDENT-ON* */
+
+static void
+tcp_dispatch_pending_timers (tcp_worker_ctx_t * wrk)
+{
+ u32 n_timers, connection_index, timer_id, thread_index, timer_handle;
+ tcp_connection_t *tc;
+ int i;
+
+ if (!(n_timers = clib_fifo_elts (wrk->pending_timers)))
+ return;
+
+ thread_index = wrk->vm->thread_index;
+ for (i = 0; i < clib_min (n_timers, 32); i++)
+ {
+ clib_fifo_sub1 (wrk->pending_timers, timer_handle);
+ connection_index = timer_handle & 0x0FFFFFFF;
+ timer_id = timer_handle >> 28;
+
+ if (PREDICT_TRUE (timer_id != TCP_TIMER_RETRANSMIT_SYN))
+ tc = tcp_connection_get (connection_index, thread_index);
+ else
+ tc = tcp_half_open_connection_get (connection_index);
+
+ if (PREDICT_FALSE (!tc))
+ continue;
+
+ /* Skip timer if it was rearmed while pending dispatch */
+ if (PREDICT_FALSE (tc->timers[timer_id] != TCP_TIMER_HANDLE_INVALID))
+ continue;
+
+ (*timer_expiration_handlers[timer_id]) (tc);
+ }
+}
+
+/**
+ * Flush ip lookup tx frames populated by timer pops
+ */
+static void
+tcp_flush_frames_to_output (tcp_worker_ctx_t * wrk)
+{
+ if (wrk->ip_lookup_tx_frames[0])
+ {
+ vlib_put_frame_to_node (wrk->vm, ip4_lookup_node.index,
+ wrk->ip_lookup_tx_frames[0]);
+ wrk->ip_lookup_tx_frames[0] = 0;
+ }
+ if (wrk->ip_lookup_tx_frames[1])
+ {
+ vlib_put_frame_to_node (wrk->vm, ip6_lookup_node.index,
+ wrk->ip_lookup_tx_frames[1]);
+ wrk->ip_lookup_tx_frames[1] = 0;
+ }
+}
+
+static void
tcp_update_time (f64 now, u8 thread_index)
{
tcp_worker_ctx_t *wrk = tcp_get_worker (thread_index);
tcp_set_time_now (wrk);
tw_timer_expire_timers_16t_2w_512sl (&wrk->timer_wheel, now);
+ tcp_dispatch_pending_timers (wrk);
tcp_flush_frames_to_output (wrk);
}
@@ -1361,111 +1504,17 @@
}
static void
-tcp_timer_waitclose_handler (u32 conn_index, u32 thread_index)
-{
- tcp_connection_t *tc;
-
- tc = tcp_connection_get (conn_index, thread_index);
- if (!tc)
- return;
-
- switch (tc->state)
- {
- case TCP_STATE_CLOSE_WAIT:
- tcp_connection_timers_reset (tc);
- session_transport_closed_notify (&tc->connection);
-
- if (!(tc->flags & TCP_CONN_FINPNDG))
- {
- clib_warning ("close-wait with fin sent");
- tcp_connection_set_state (tc, TCP_STATE_CLOSED);
- tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time);
- break;
- }
-
- /* Session didn't come back with a close. Send FIN either way
- * and switch to LAST_ACK. */
- tcp_cong_recovery_off (tc);
- /* Make sure we don't try to send unsent data */
- tc->snd_nxt = tc->snd_una;
- tcp_send_fin (tc);
- tcp_connection_set_state (tc, TCP_STATE_LAST_ACK);
-
- /* Make sure we don't wait in LAST ACK forever */
- tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.lastack_time);
- tcp_worker_stats_inc (thread_index, to_closewait, 1);
-
- /* Don't delete the connection yet */
- break;
- case TCP_STATE_FIN_WAIT_1:
- tcp_connection_timers_reset (tc);
- session_transport_closed_notify (&tc->connection);
- if (tc->flags & TCP_CONN_FINPNDG)
- {
- /* If FIN pending, we haven't sent everything, but we did try.
- * Notify session layer that transport is closed. */
- tcp_connection_set_state (tc, TCP_STATE_CLOSED);
- tcp_send_reset (tc);
- tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time);
- }
- else
- {
- /* We've sent the fin but no progress. Close the connection and
- * to make sure everything is flushed, setup a cleanup timer */
- tcp_connection_set_state (tc, TCP_STATE_CLOSED);
- tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time);
- }
- tcp_worker_stats_inc (thread_index, to_finwait1, 1);
- break;
- case TCP_STATE_LAST_ACK:
- tcp_connection_timers_reset (tc);
- tcp_connection_set_state (tc, TCP_STATE_CLOSED);
- tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time);
- session_transport_closed_notify (&tc->connection);
- tcp_worker_stats_inc (thread_index, to_lastack, 1);
- break;
- case TCP_STATE_CLOSING:
- tcp_connection_timers_reset (tc);
- tcp_connection_set_state (tc, TCP_STATE_CLOSED);
- tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time);
- session_transport_closed_notify (&tc->connection);
- tcp_worker_stats_inc (thread_index, to_closing, 1);
- break;
- case TCP_STATE_FIN_WAIT_2:
- tcp_send_reset (tc);
- tcp_connection_timers_reset (tc);
- tcp_connection_set_state (tc, TCP_STATE_CLOSED);
- session_transport_closed_notify (&tc->connection);
- tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time);
- tcp_worker_stats_inc (thread_index, to_finwait2, 1);
- break;
- default:
- tcp_connection_del (tc);
- break;
- }
-}
-
-/* *INDENT-OFF* */
-static timer_expiration_handler *timer_expiration_handlers[TCP_N_TIMERS] =
-{
- tcp_timer_retransmit_handler,
- tcp_timer_delack_handler,
- tcp_timer_persist_handler,
- tcp_timer_waitclose_handler,
- tcp_timer_retransmit_syn_handler,
-};
-/* *INDENT-ON* */
-
-static void
tcp_expired_timers_dispatch (u32 * expired_timers)
{
u32 thread_index = vlib_get_thread_index ();
u32 connection_index, timer_id, n_expired;
+ tcp_worker_ctx_t *wrk;
tcp_connection_t *tc;
int i;
+ wrk = tcp_get_worker (thread_index);
n_expired = vec_len (expired_timers);
- tcp_worker_stats_inc (thread_index, timer_expirations, n_expired);
+ tcp_workerp_stats_inc (wrk, timer_expirations, n_expired);
/*
* Invalidate all timer handles before dispatching. This avoids dangling
@@ -1486,15 +1535,7 @@
tc->timers[timer_id] = TCP_TIMER_HANDLE_INVALID;
}
- /*
- * Dispatch expired timers
- */
- for (i = 0; i < n_expired; i++)
- {
- connection_index = expired_timers[i] & 0x0FFFFFFF;
- timer_id = expired_timers[i] >> 28;
- (*timer_expiration_handlers[timer_id]) (connection_index, thread_index);
- }
+ clib_fifo_add (wrk->pending_timers, expired_timers, n_expired);
}
static void
@@ -2297,14 +2338,19 @@
for (thread = 0; thread < vec_len (tm->wrk_ctx); thread++)
{
wrk = tcp_get_worker (thread);
- vlib_cli_output (vm, "Thread %d:\n", thread);
+ vlib_cli_output (vm, "Thread %u:\n", thread);
+
+ if (clib_fifo_elts (wrk->pending_timers))
+ vlib_cli_output (vm, " %lu pending timers",
+ clib_fifo_elts (wrk->pending_timers));
#define _(name,type,str) \
if (wrk->stats.name) \
- vlib_cli_output (vm, " %ld %s", wrk->stats.name, str);
+ vlib_cli_output (vm, " %lu %s", wrk->stats.name, str);
foreach_tcp_wrk_stat
#undef _
}
+
return 0;
}
diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h
index a60e105..e7b53b8 100644
--- a/src/vnet/tcp/tcp.h
+++ b/src/vnet/tcp/tcp.h
@@ -82,13 +82,6 @@
TCP_N_TIMERS
} tcp_timers_e;
-typedef void (timer_expiration_handler) (u32 index, u32 thread_index);
-
-extern timer_expiration_handler tcp_timer_delack_handler;
-extern timer_expiration_handler tcp_timer_retransmit_handler;
-extern timer_expiration_handler tcp_timer_persist_handler;
-extern timer_expiration_handler tcp_timer_retransmit_syn_handler;
-
#define TCP_TIMER_HANDLE_INVALID ((u32) ~0)
#define TCP_TIMER_TICK 0.1 /**< Timer tick in seconds */
@@ -478,6 +471,13 @@
#define tcp_csum_offload(tc) (!((tc)->cfg_flags & TCP_CFG_F_NO_CSUM_OFFLOAD))
+typedef void (timer_expiration_handler) (tcp_connection_t * tc);
+
+extern timer_expiration_handler tcp_timer_delack_handler;
+extern timer_expiration_handler tcp_timer_retransmit_handler;
+extern timer_expiration_handler tcp_timer_persist_handler;
+extern timer_expiration_handler tcp_timer_retransmit_syn_handler;
+
always_inline void
tcp_cong_recovery_off (tcp_connection_t * tc)
{
@@ -507,6 +507,7 @@
_(rxt_segs, u64, "segments retransmitted") \
_(tr_events, u32, "timer retransmit events") \
_(to_closewait, u32, "timeout close-wait") \
+ _(to_closewait2, u32, "timeout close-wait w/data") \
_(to_finwait1, u32, "timeout fin-wait-1") \
_(to_finwait2, u32, "timeout fin-wait-2") \
_(to_lastack, u32, "timeout last-ack") \
@@ -554,6 +555,9 @@
/** tx buffer free list */
u32 *tx_buffers;
+ /* Fifo of pending timer expirations */
+ u32 *pending_timers;
+
/** worker timer wheel */
tw_timer_wheel_16t_2w_512sl_t timer_wheel;
@@ -831,7 +835,6 @@
void tcp_send_ack (tcp_connection_t * tc);
void tcp_update_burst_snd_vars (tcp_connection_t * tc);
void tcp_update_rto (tcp_connection_t * tc);
-void tcp_flush_frames_to_output (tcp_worker_ctx_t * wrk);
void tcp_send_window_update_ack (tcp_connection_t * tc);
void tcp_program_ack (tcp_connection_t * tc);
diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c
index 69b34cc..5eccda6 100644
--- a/src/vnet/tcp/tcp_output.c
+++ b/src/vnet/tcp/tcp_output.c
@@ -973,32 +973,6 @@
}
/**
- * Flush ip lookup tx frames populated by timer pops
- */
-static void
-tcp_flush_frame_to_ip_lookup (tcp_worker_ctx_t * wrk, u8 is_ip4)
-{
- if (wrk->ip_lookup_tx_frames[!is_ip4])
- {
- u32 next_index;
- next_index = is_ip4 ? ip4_lookup_node.index : ip6_lookup_node.index;
- vlib_put_frame_to_node (wrk->vm, next_index,
- wrk->ip_lookup_tx_frames[!is_ip4]);
- wrk->ip_lookup_tx_frames[!is_ip4] = 0;
- }
-}
-
-/**
- * Flush v4 and v6 tcp and ip-lookup tx frames for thread index
- */
-void
-tcp_flush_frames_to_output (tcp_worker_ctx_t * wrk)
-{
- tcp_flush_frame_to_ip_lookup (wrk, 1);
- tcp_flush_frame_to_ip_lookup (wrk, 0);
-}
-
-/**
* Send FIN
*/
void
@@ -1208,11 +1182,8 @@
* Sends delayed ACK when timer expires
*/
void
-tcp_timer_delack_handler (u32 index, u32 thread_index)
+tcp_timer_delack_handler (tcp_connection_t * tc)
{
- tcp_connection_t *tc;
-
- tc = tcp_connection_get (index, thread_index);
tcp_send_ack (tc);
}
@@ -1443,19 +1414,17 @@
}
void
-tcp_timer_retransmit_handler (u32 tc_index, u32 thread_index)
+tcp_timer_retransmit_handler (tcp_connection_t * tc)
{
- tcp_worker_ctx_t *wrk = tcp_get_worker (thread_index);
+ tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
vlib_main_t *vm = wrk->vm;
- tcp_connection_t *tc;
vlib_buffer_t *b = 0;
u32 bi, n_bytes;
tcp_workerp_stats_inc (wrk, tr_events, 1);
- tc = tcp_connection_get (tc_index, thread_index);
- /* Note: the connection may have been closed and pool_put */
- if (PREDICT_FALSE (tc == 0 || tc->state == TCP_STATE_SYN_SENT))
+ /* Should be handled by a different handler */
+ if (PREDICT_FALSE (tc->state == TCP_STATE_SYN_SENT))
return;
/* Wait-close and retransmit could pop at the same time */
@@ -1592,18 +1561,15 @@
* SYN retransmit timer handler. Active open only.
*/
void
-tcp_timer_retransmit_syn_handler (u32 tc_index, u32 thread_index)
+tcp_timer_retransmit_syn_handler (tcp_connection_t * tc)
{
- tcp_worker_ctx_t *wrk = tcp_get_worker (thread_index);
+ tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
vlib_main_t *vm = wrk->vm;
- tcp_connection_t *tc;
vlib_buffer_t *b = 0;
u32 bi;
- tc = tcp_half_open_connection_get (tc_index);
-
/* Note: the connection may have transitioned to ESTABLISHED... */
- if (PREDICT_FALSE (tc == 0 || tc->state != TCP_STATE_SYN_SENT))
+ if (PREDICT_FALSE (tc->state != TCP_STATE_SYN_SENT))
return;
/* Half-open connection actually moved to established but we were
@@ -1658,21 +1624,16 @@
*
*/
void
-tcp_timer_persist_handler (u32 index, u32 thread_index)
+tcp_timer_persist_handler (tcp_connection_t * tc)
{
- tcp_worker_ctx_t *wrk = tcp_get_worker (thread_index);
+ tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
u32 bi, max_snd_bytes, available_bytes, offset;
tcp_main_t *tm = vnet_get_tcp_main ();
vlib_main_t *vm = wrk->vm;
- tcp_connection_t *tc;
vlib_buffer_t *b;
int n_bytes = 0;
u8 *data;
- tc = tcp_connection_get_if_valid (index, thread_index);
- if (!tc)
- return;
-
/* Problem already solved or worse */
if (tc->state == TCP_STATE_CLOSED || tc->snd_wnd > tc->snd_mss
|| (tc->flags & TCP_CONN_FINSNT))