tcp: handle cleanups without timers

Type: improvement

Signed-off-by: Florin Coras <fcoras@cisco.com>
Change-Id: Ida896e9fbe83b4c76578b089bb3fd2bef5e07fd2
diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c
index e243f5a..2ada8b8 100644
--- a/src/vnet/tcp/tcp.c
+++ b/src/vnet/tcp/tcp.c
@@ -327,6 +327,18 @@
   pool_put (wrk->connections, tc);
 }
 
+void
+tcp_program_cleanup (tcp_worker_ctx_t * wrk, tcp_connection_t * tc)
+{
+  tcp_cleanup_req_t *req;
+  clib_time_type_t now;
+
+  now = transport_time_now (tc->c_thread_index);
+  clib_fifo_add2 (wrk->pending_cleanups, req);
+  req->connection_index = tc->c_c_index;
+  req->free_time = now + tcp_cfg.cleanup_time;
+}
+
 /**
  * Begin connection closing procedure.
  *
@@ -366,8 +378,8 @@
 	  tcp_send_reset (tc);
 	  tcp_connection_timers_reset (tc);
 	  tcp_connection_set_state (tc, TCP_STATE_CLOSED);
-	  tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.closewait_time);
 	  session_transport_closed_notify (&tc->connection);
+	  tcp_program_cleanup (tcp_get_worker (tc->c_thread_index), tc);
 	  tcp_worker_stats_inc (tc->c_thread_index, rst_unread, 1);
 	  break;
 	}
@@ -396,11 +408,7 @@
       tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.finwait1_time);
       break;
     case TCP_STATE_CLOSED:
-      tcp_connection_timers_reset (tc);
-      /* Delete connection but instead of doing it now wait until next
-       * dispatch cycle to give the session layer a chance to clear
-       * unhandled events */
-      tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time);
+      /* Cleanup should've been programmed already */
       break;
     default:
       TCP_DBG ("state: %u", tc->state);
@@ -431,12 +439,12 @@
 {
   tcp_connection_t *tc;
   tc = tcp_connection_get (conn_index, thread_index);
-  session_transport_closed_notify (&tc->connection);
   tcp_send_reset (tc);
   tcp_connection_timers_reset (tc);
   tcp_cong_recovery_off (tc);
   tcp_connection_set_state (tc, TCP_STATE_CLOSED);
-  tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time);
+  session_transport_closed_notify (&tc->connection);
+  tcp_program_cleanup (tcp_get_worker (thread_index), tc);
 }
 
 /**
@@ -1295,17 +1303,19 @@
 static void
 tcp_timer_waitclose_handler (tcp_connection_t * tc)
 {
+  tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
+
   switch (tc->state)
     {
     case TCP_STATE_CLOSE_WAIT:
       tcp_connection_timers_reset (tc);
-      session_transport_closed_notify (&tc->connection);
       /* App never returned with a close */
       if (!(tc->flags & TCP_CONN_FINPNDG))
 	{
 	  tcp_connection_set_state (tc, TCP_STATE_CLOSED);
-	  tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time);
-	  tcp_worker_stats_inc (tc->c_thread_index, to_closewait, 1);
+	  session_transport_closed_notify (&tc->connection);
+	  tcp_program_cleanup (wrk, tc);
+	  tcp_workerp_stats_inc (wrk, to_closewait, 1);
 	  break;
 	}
 
@@ -1315,57 +1325,62 @@
       tc->snd_nxt = tc->snd_una;
       tcp_send_fin (tc);
       tcp_connection_set_state (tc, TCP_STATE_LAST_ACK);
+      session_transport_closed_notify (&tc->connection);
 
       /* Make sure we don't wait in LAST ACK forever */
       tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.lastack_time);
-      tcp_worker_stats_inc (tc->c_thread_index, to_closewait2, 1);
+      tcp_workerp_stats_inc (wrk, to_closewait2, 1);
 
       /* Don't delete the connection yet */
       break;
     case TCP_STATE_FIN_WAIT_1:
       tcp_connection_timers_reset (tc);
-      session_transport_closed_notify (&tc->connection);
       if (tc->flags & TCP_CONN_FINPNDG)
 	{
 	  /* If FIN pending, we haven't sent everything, but we did try.
 	   * Notify session layer that transport is closed. */
 	  tcp_connection_set_state (tc, TCP_STATE_CLOSED);
 	  tcp_send_reset (tc);
-	  tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time);
+	  tcp_program_cleanup (wrk, tc);
 	}
       else
 	{
 	  /* We've sent the fin but no progress. Close the connection and
 	   * to make sure everything is flushed, setup a cleanup timer */
 	  tcp_connection_set_state (tc, TCP_STATE_CLOSED);
-	  tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time);
+	  tcp_program_cleanup (wrk, tc);
 	}
-      tcp_worker_stats_inc (tc->c_thread_index, to_finwait1, 1);
+      session_transport_closed_notify (&tc->connection);
+      tcp_workerp_stats_inc (wrk, to_finwait1, 1);
       break;
     case TCP_STATE_LAST_ACK:
       tcp_connection_timers_reset (tc);
       tcp_connection_set_state (tc, TCP_STATE_CLOSED);
-      tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time);
       session_transport_closed_notify (&tc->connection);
-      tcp_worker_stats_inc (tc->c_thread_index, to_lastack, 1);
+      tcp_program_cleanup (wrk, tc);
+      tcp_workerp_stats_inc (wrk, to_lastack, 1);
       break;
     case TCP_STATE_CLOSING:
       tcp_connection_timers_reset (tc);
       tcp_connection_set_state (tc, TCP_STATE_CLOSED);
-      tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time);
       session_transport_closed_notify (&tc->connection);
-      tcp_worker_stats_inc (tc->c_thread_index, to_closing, 1);
+      tcp_program_cleanup (wrk, tc);
+      tcp_workerp_stats_inc (wrk, to_closing, 1);
       break;
     case TCP_STATE_FIN_WAIT_2:
       tcp_send_reset (tc);
       tcp_connection_timers_reset (tc);
       tcp_connection_set_state (tc, TCP_STATE_CLOSED);
       session_transport_closed_notify (&tc->connection);
-      tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time);
-      tcp_worker_stats_inc (tc->c_thread_index, to_finwait2, 1);
+      tcp_program_cleanup (wrk, tc);
+      tcp_workerp_stats_inc (wrk, to_finwait2, 1);
+      break;
+    case TCP_STATE_TIME_WAIT:
+      tcp_connection_set_state (tc, TCP_STATE_CLOSED);
+      tcp_program_cleanup (wrk, tc);
       break;
     default:
-      tcp_connection_del (tc);
+      clib_warning ("waitclose in state: %U", format_tcp_state, tc->state);
       break;
     }
 }
@@ -1439,11 +1454,31 @@
 }
 
 static void
+tcp_handle_cleanups (tcp_worker_ctx_t * wrk, clib_time_type_t now)
+{
+  u32 thread_index = wrk->vm->thread_index;
+  tcp_cleanup_req_t *req;
+  tcp_connection_t *tc;
+
+  while (clib_fifo_elts (wrk->pending_cleanups))
+    {
+      req = clib_fifo_head (wrk->pending_cleanups);
+      if (req->free_time > now)
+	break;
+      clib_fifo_sub2 (wrk->pending_cleanups, req);
+      tc = tcp_connection_get (req->connection_index, thread_index);
+      session_transport_delete_notify (&tc->connection);
+      tcp_connection_cleanup (tc);
+    }
+}
+
+static void
 tcp_update_time (f64 now, u8 thread_index)
 {
   tcp_worker_ctx_t *wrk = tcp_get_worker (thread_index);
 
   tcp_set_time_now (wrk);
+  tcp_handle_cleanups (wrk, now);
   tw_timer_expire_timers_16t_2w_512sl (&wrk->timer_wheel, now);
   tcp_dispatch_pending_timers (wrk);
   tcp_flush_frames_to_output (wrk);
@@ -1718,7 +1753,7 @@
   tcp_cfg.lastack_time = 300;	/* 30s */
   tcp_cfg.finwait2_time = 300;	/* 30s */
   tcp_cfg.closing_time = 300;	/* 30s */
-  tcp_cfg.cleanup_time = 1;	/* 0.1s */
+  tcp_cfg.cleanup_time = 0.1;	/* 100ms */
 }
 
 static clib_error_t *
@@ -1868,7 +1903,7 @@
       else if (unformat (input, "closing-time %u", &tmp_time))
 	tcp_cfg.closing_time = tmp_time / TCP_TIMER_TICK;
       else if (unformat (input, "cleanup-time %u", &tmp_time))
-	tcp_cfg.cleanup_time = tmp_time / TCP_TIMER_TICK;
+	tcp_cfg.cleanup_time = tmp_time / 1000.0;
       else
 	return clib_error_return (0, "unknown input `%U'",
 				  format_unformat_error, input);
diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h
index bef7edb..6094ac5 100644
--- a/src/vnet/tcp/tcp.h
+++ b/src/vnet/tcp/tcp.h
@@ -522,6 +522,12 @@
 #undef _
 } tcp_wrk_stats_t;
 
+typedef struct tcp_free_req_
+{
+  clib_time_type_t free_time;
+  u32 connection_index;
+} tcp_cleanup_req_t;
+
 typedef struct tcp_worker_ctx_
 {
   CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
@@ -561,6 +567,9 @@
   /* Fifo of pending timer expirations */
   u32 *pending_timers;
 
+  /* fifo of pending free requests */
+  tcp_cleanup_req_t *pending_cleanups;
+
   /** worker timer wheel */
   tw_timer_wheel_16t_2w_512sl_t timer_wheel;
 
@@ -634,8 +643,8 @@
   /** Timer ticks to wait in closing for fin ack */
   u16 closing_time;
 
-  /** Timer ticks to wait before cleaning up the connection */
-  u16 cleanup_time;
+  /** Time to wait (sec) before cleaning up the connection */
+  f32 cleanup_time;
 
   /** Number of preallocated connections */
   u32 preallocated_connections;
@@ -1090,6 +1099,7 @@
 void tcp_connection_tx_pacer_update (tcp_connection_t * tc);
 void tcp_connection_tx_pacer_reset (tcp_connection_t * tc, u32 window,
 				    u32 start_bucket);
+void tcp_program_cleanup (tcp_worker_ctx_t * wrk, tcp_connection_t * tc);
 
 always_inline void
 tcp_cc_rcv_ack (tcp_connection_t * tc, tcp_rate_sample_t * rs)
diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c
index adbfe7a..e93e8ba 100755
--- a/src/vnet/tcp/tcp_input.c
+++ b/src/vnet/tcp/tcp_input.c
@@ -332,12 +332,10 @@
       break;
     case TCP_STATE_ESTABLISHED:
       tcp_connection_timers_reset (tc);
-      /* Set the cleanup timer, in case the session layer/app don't
-       * cleanly close the connection */
-      tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.closewait_time);
       tcp_cong_recovery_off (tc);
       tcp_program_reset_ntf (wrk, tc);
       tcp_connection_set_state (tc, TCP_STATE_CLOSED);
+      tcp_program_cleanup (wrk, tc);
       break;
     case TCP_STATE_CLOSE_WAIT:
     case TCP_STATE_FIN_WAIT_1:
@@ -345,12 +343,12 @@
     case TCP_STATE_CLOSING:
     case TCP_STATE_LAST_ACK:
       tcp_connection_timers_reset (tc);
-      tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.closewait_time);
       tcp_cong_recovery_off (tc);
       tcp_program_reset_ntf (wrk, tc);
       /* Make sure we mark the session as closed. In some states we may
        * be still trying to send data */
       tcp_connection_set_state (tc, TCP_STATE_CLOSED);
+      tcp_program_cleanup (wrk, tc);
       break;
     case TCP_STATE_CLOSED:
     case TCP_STATE_TIME_WAIT:
@@ -3029,9 +3027,8 @@
 	      if (tc0->flags & TCP_CONN_FINRCVD)
 		{
 		  tcp_connection_set_state (tc0, TCP_STATE_CLOSED);
-		  tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE,
-				 tcp_cfg.cleanup_time);
 		  session_transport_closed_notify (&tc0->connection);
+		  tcp_program_cleanup (wrk, tc0);
 		  goto drop;
 		}
 
@@ -3112,7 +3109,7 @@
 	   * we can't ensure that we have no packets already enqueued
 	   * to output. Rely instead on the waitclose timer */
 	  tcp_connection_timers_reset (tc0);
-	  tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time);
+	  tcp_program_cleanup (tcp_get_worker (tc0->c_thread_index), tc0);
 
 	  goto drop;
 
diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c
index 5eccda6..91021e8 100644
--- a/src/vnet/tcp/tcp_output.c
+++ b/src/vnet/tcp/tcp_output.c
@@ -1473,7 +1473,7 @@
 	  session_transport_closing_notify (&tc->connection);
 	  session_transport_closed_notify (&tc->connection);
 	  tcp_connection_timers_reset (tc);
-	  tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.closewait_time);
+	  tcp_program_cleanup (wrk, tc);
 	  tcp_workerp_stats_inc (wrk, tr_abort, 1);
 	  return;
 	}
@@ -1525,7 +1525,7 @@
 	{
 	  tcp_connection_set_state (tc, TCP_STATE_CLOSED);
 	  tcp_connection_timers_reset (tc);
-	  tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time);
+	  tcp_program_cleanup (wrk, tc);
 	  tcp_workerp_stats_inc (wrk, tr_abort, 1);
 	  return;
 	}