tcp: handle disconnects after enq notifications

Make sure that we notify the app of the data enqueued in the burst
before notifying of disconnect.

Change-Id: I7747a5cbb4c6bc9132007f849c24ce04b7841273
Signed-off-by: Florin Coras <fcoras@cisco.com>
diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c
index 695f614..2511a17 100644
--- a/src/vnet/tcp/tcp.c
+++ b/src/vnet/tcp/tcp.c
@@ -1332,11 +1332,13 @@
       vec_validate (tm->wrk_ctx[thread].postponed_fast_rxt, 255);
       vec_validate (tm->wrk_ctx[thread].pending_deq_acked, 255);
       vec_validate (tm->wrk_ctx[thread].pending_acks, 255);
+      vec_validate (tm->wrk_ctx[thread].pending_disconnects, 255);
       vec_reset_length (tm->wrk_ctx[thread].pending_fast_rxt);
       vec_reset_length (tm->wrk_ctx[thread].ongoing_fast_rxt);
       vec_reset_length (tm->wrk_ctx[thread].postponed_fast_rxt);
       vec_reset_length (tm->wrk_ctx[thread].pending_deq_acked);
       vec_reset_length (tm->wrk_ctx[thread].pending_acks);
+      vec_reset_length (tm->wrk_ctx[thread].pending_disconnects);
       tm->wrk_ctx[thread].vm = vlib_mains[thread];
 
       /*
diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h
index 4626297..d4bebeb 100644
--- a/src/vnet/tcp/tcp.h
+++ b/src/vnet/tcp/tcp.h
@@ -118,7 +118,7 @@
   _(SENT_RCV_WND0, "Sent 0 rcv_wnd")     	\
   _(RECOVERY, "Recovery")                    	\
   _(FAST_RECOVERY, "Fast Recovery")		\
-  _(FR_1_SMSS, "Sent 1 SMSS")			\
+  _(DCNT_PENDING, "Disconnect pending")		\
   _(HALF_OPEN_DONE, "Half-open completed")	\
   _(FINPNDG, "FIN pending")			\
   _(FRXT_PENDING, "Fast-retransmit pending")	\
@@ -352,9 +352,9 @@
 #define tcp_in_fastrecovery(tc) ((tc)->flags & TCP_CONN_FAST_RECOVERY)
 #define tcp_in_recovery(tc) ((tc)->flags & (TCP_CONN_RECOVERY))
 #define tcp_in_slowstart(tc) (tc->cwnd < tc->ssthresh)
-#define tcp_fastrecovery_sent_1_smss(tc) ((tc)->flags & TCP_CONN_FR_1_SMSS)
-#define tcp_fastrecovery_1_smss_on(tc) ((tc)->flags |= TCP_CONN_FR_1_SMSS)
-#define tcp_fastrecovery_1_smss_off(tc) ((tc)->flags &= ~TCP_CONN_FR_1_SMSS)
+#define tcp_disconnect_pending(tc) ((tc)->flags & TCP_CONN_DCNT_PENDING)
+#define tcp_disconnect_pending_on(tc) ((tc)->flags |= TCP_CONN_DCNT_PENDING)
+#define tcp_disconnect_pending_off(tc) ((tc)->flags &= ~TCP_CONN_DCNT_PENDING)
 #define tcp_fastrecovery_first(tc) ((tc)->flags & TCP_CONN_FRXT_FIRST)
 #define tcp_fastrecovery_first_on(tc) ((tc)->flags |= TCP_CONN_FRXT_FIRST)
 #define tcp_fastrecovery_first_off(tc) ((tc)->flags &= ~TCP_CONN_FRXT_FIRST)
@@ -366,7 +366,6 @@
 tcp_cong_recovery_off (tcp_connection_t * tc)
 {
   tc->flags &= ~(TCP_CONN_FAST_RECOVERY | TCP_CONN_RECOVERY);
-  tcp_fastrecovery_1_smss_off (tc);
   tcp_fastrecovery_first_off (tc);
 }
 
@@ -386,26 +385,46 @@
 typedef struct tcp_worker_ctx_
 {
   CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
-  u32 time_now;					/**< worker time */
-  tw_timer_wheel_16t_2w_512sl_t timer_wheel;	/**< worker timer wheel */
-  u32 *tx_buffers;				/**< tx buffer free list */
-  vlib_frame_t *tx_frames[2];			/**< tx frames for tcp 4/6
-						     output nodes */
-  vlib_frame_t *ip_lookup_tx_frames[2];		/**< tx frames for ip 4/6
-						     lookup nodes */
-  u32 *pending_fast_rxt;			/**< vector of connections
-						     needing fast rxt */
-  u32 *ongoing_fast_rxt;			/**< vector of connections
-						     now doing fast rxt */
-  u32 *postponed_fast_rxt;			/**< vector of connections
-						     that will do fast rxt */
+  /** worker time */
+  u32 time_now;
+
+  /** worker timer wheel */
+  tw_timer_wheel_16t_2w_512sl_t timer_wheel;
+
+  /** tx buffer free list */
+  u32 *tx_buffers;
+
+  /** tx frames for tcp 4/6 output nodes */
+  vlib_frame_t *tx_frames[2];
+
+  /** tx frames for ip 4/6 lookup nodes */
+  vlib_frame_t *ip_lookup_tx_frames[2];
+
+  /** vector of connections needing fast rxt */
+  u32 *pending_fast_rxt;
+
+  /** vector of connections now doing fast rxt */
+  u32 *ongoing_fast_rxt;
+
+  /** vector of connections that will do fast rxt */
+  u32 *postponed_fast_rxt;
+
+  /** vector of pending ack dequeues */
   u32 *pending_deq_acked;
+
+  /** vector of pending acks */
   u32 *pending_acks;
-  vlib_main_t *vm;				/**< pointer to vm */
+
+  /** vector of pending disconnect notifications */
+  u32 *pending_disconnects;
+
+  /** convenience pointer to this thread's vlib main */
+  vlib_main_t *vm;
 
     CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
-  u8 cached_opts[40];				/**< cached 'on the wire'
-						     options for bursts */
+
+  /** cached 'on the wire' options for bursts */
+  u8 cached_opts[40];
 
 } tcp_worker_ctx_t;
 
diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c
index 2578b7d..0c87065 100644
--- a/src/vnet/tcp/tcp_input.c
+++ b/src/vnet/tcp/tcp_input.c
@@ -1131,7 +1131,6 @@
   tc->rtt_ts = 0;
 
   tcp_fastrecovery_off (tc);
-  tcp_fastrecovery_1_smss_off (tc);
   tcp_fastrecovery_first_off (tc);
 
   TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 3);
@@ -1600,6 +1599,54 @@
   return 0;
 }
 
+static void
+tcp_program_disconnect (tcp_worker_ctx_t * wrk, tcp_connection_t * tc)
+{
+  if (!tcp_disconnect_pending (tc))
+    {
+      vec_add1 (wrk->pending_disconnects, tc->c_c_index);
+      tcp_disconnect_pending_on (tc);
+    }
+}
+
+static void
+tcp_handle_disconnects (tcp_worker_ctx_t * wrk)
+{
+  u32 thread_index, *pending_disconnects;
+  tcp_connection_t *tc;
+  int i;
+
+  if (!vec_len (wrk->pending_disconnects))
+    return;
+
+  thread_index = wrk->vm->thread_index;
+  pending_disconnects = wrk->pending_disconnects;
+  for (i = 0; i < vec_len (pending_disconnects); i++)
+    {
+      tc = tcp_connection_get (pending_disconnects[i], thread_index);
+      tcp_disconnect_pending_off (tc);
+      stream_session_disconnect_notify (&tc->connection);
+    }
+  _vec_len (wrk->pending_disconnects) = 0;
+}
+
+static void
+tcp_rcv_fin (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, vlib_buffer_t * b,
+	     u32 * error)
+{
+  /* Enter CLOSE-WAIT and notify session. To avoid lingering
+   * in CLOSE-WAIT, set timer (reuse WAITCLOSE). */
+  /* Account for the FIN if nothing else was received */
+  if (vnet_buffer (b)->tcp.data_len == 0)
+    tc->rcv_nxt += 1;
+  tcp_program_ack (wrk, tc);
+  tc->state = TCP_STATE_CLOSE_WAIT;
+  tcp_program_disconnect (wrk, tc);
+  tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_CLOSEWAIT_TIME);
+  TCP_EVT_DBG (TCP_EVT_FIN_RCVD, tc);
+  *error = TCP_ERROR_FIN_RCVD;
+}
+
 static u8
 tcp_sack_vector_is_sane (sack_block_t * sacks)
 {
@@ -2099,19 +2146,7 @@
 
       /* 8: check the FIN bit */
       if (PREDICT_FALSE (is_fin))
-	{
-	  /* Enter CLOSE-WAIT and notify session. To avoid lingering
-	   * in CLOSE-WAIT, set timer (reuse WAITCLOSE). */
-	  /* Account for the FIN if nothing else was received */
-	  if (vnet_buffer (b0)->tcp.data_len == 0)
-	    tc0->rcv_nxt += 1;
-	  tcp_program_ack (wrk, tc0);
-	  tc0->state = TCP_STATE_CLOSE_WAIT;
-	  stream_session_disconnect_notify (&tc0->connection);
-	  tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, TCP_CLOSEWAIT_TIME);
-	  TCP_EVT_DBG (TCP_EVT_FIN_RCVD, tc0);
-	  error0 = TCP_ERROR_FIN_RCVD;
-	}
+	tcp_rcv_fin (wrk, tc0, b0, &error0);
 
     done:
       tcp_inc_err_counter (err_counters, error0, 1);
@@ -2122,6 +2157,7 @@
   err_counters[TCP_ERROR_EVENT_FIFO_FULL] = errors;
   tcp_store_err_counters (established, err_counters);
   tcp_handle_postponed_dequeues (wrk);
+  tcp_handle_disconnects (wrk);
   vlib_buffer_free (vm, first_buffer, frame->n_vectors);
 
   return frame->n_vectors;
diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c
index e16095b..4915636 100644
--- a/src/vnet/tcp/tcp_output.c
+++ b/src/vnet/tcp/tcp_output.c
@@ -1201,8 +1201,7 @@
   tcp_push_hdr_i (tc, b, TCP_STATE_ESTABLISHED, /* compute opts */ 0,
 		  /* burst */ 1);
   tc->snd_una_max = tc->snd_nxt;
-  ASSERT (seq_leq (tc->snd_una_max, tc->snd_una + tc->snd_wnd
-		   + tcp_fastrecovery_sent_1_smss (tc) * tc->snd_mss));
+  ASSERT (seq_leq (tc->snd_una_max, tc->snd_una + tc->snd_wnd));
   tcp_validate_txf_size (tc, tc->snd_una_max - tc->snd_una);
   /* If not tracking an ACK, start tracking */
   if (tc->rtt_ts == 0 && !tcp_in_cong_recovery (tc))