tcp: handle disconnects after enq notifications
Make sure that we notify the app of the data enqueued in the burst
before notifying of disconnect.
Change-Id: I7747a5cbb4c6bc9132007f849c24ce04b7841273
Signed-off-by: Florin Coras <fcoras@cisco.com>
diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c
index 695f614..2511a17 100644
--- a/src/vnet/tcp/tcp.c
+++ b/src/vnet/tcp/tcp.c
@@ -1332,11 +1332,13 @@
vec_validate (tm->wrk_ctx[thread].postponed_fast_rxt, 255);
vec_validate (tm->wrk_ctx[thread].pending_deq_acked, 255);
vec_validate (tm->wrk_ctx[thread].pending_acks, 255);
+ vec_validate (tm->wrk_ctx[thread].pending_disconnects, 255);
vec_reset_length (tm->wrk_ctx[thread].pending_fast_rxt);
vec_reset_length (tm->wrk_ctx[thread].ongoing_fast_rxt);
vec_reset_length (tm->wrk_ctx[thread].postponed_fast_rxt);
vec_reset_length (tm->wrk_ctx[thread].pending_deq_acked);
vec_reset_length (tm->wrk_ctx[thread].pending_acks);
+ vec_reset_length (tm->wrk_ctx[thread].pending_disconnects);
tm->wrk_ctx[thread].vm = vlib_mains[thread];
/*
diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h
index 4626297..d4bebeb 100644
--- a/src/vnet/tcp/tcp.h
+++ b/src/vnet/tcp/tcp.h
@@ -118,7 +118,7 @@
_(SENT_RCV_WND0, "Sent 0 rcv_wnd") \
_(RECOVERY, "Recovery") \
_(FAST_RECOVERY, "Fast Recovery") \
- _(FR_1_SMSS, "Sent 1 SMSS") \
+ _(DCNT_PENDING, "Disconnect pending") \
_(HALF_OPEN_DONE, "Half-open completed") \
_(FINPNDG, "FIN pending") \
_(FRXT_PENDING, "Fast-retransmit pending") \
@@ -352,9 +352,9 @@
#define tcp_in_fastrecovery(tc) ((tc)->flags & TCP_CONN_FAST_RECOVERY)
#define tcp_in_recovery(tc) ((tc)->flags & (TCP_CONN_RECOVERY))
#define tcp_in_slowstart(tc) (tc->cwnd < tc->ssthresh)
-#define tcp_fastrecovery_sent_1_smss(tc) ((tc)->flags & TCP_CONN_FR_1_SMSS)
-#define tcp_fastrecovery_1_smss_on(tc) ((tc)->flags |= TCP_CONN_FR_1_SMSS)
-#define tcp_fastrecovery_1_smss_off(tc) ((tc)->flags &= ~TCP_CONN_FR_1_SMSS)
+#define tcp_disconnect_pending(tc) ((tc)->flags & TCP_CONN_DCNT_PENDING)
+#define tcp_disconnect_pending_on(tc) ((tc)->flags |= TCP_CONN_DCNT_PENDING)
+#define tcp_disconnect_pending_off(tc) ((tc)->flags &= ~TCP_CONN_DCNT_PENDING)
#define tcp_fastrecovery_first(tc) ((tc)->flags & TCP_CONN_FRXT_FIRST)
#define tcp_fastrecovery_first_on(tc) ((tc)->flags |= TCP_CONN_FRXT_FIRST)
#define tcp_fastrecovery_first_off(tc) ((tc)->flags &= ~TCP_CONN_FRXT_FIRST)
@@ -366,7 +366,6 @@
tcp_cong_recovery_off (tcp_connection_t * tc)
{
tc->flags &= ~(TCP_CONN_FAST_RECOVERY | TCP_CONN_RECOVERY);
- tcp_fastrecovery_1_smss_off (tc);
tcp_fastrecovery_first_off (tc);
}
@@ -386,26 +385,46 @@
typedef struct tcp_worker_ctx_
{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
- u32 time_now; /**< worker time */
- tw_timer_wheel_16t_2w_512sl_t timer_wheel; /**< worker timer wheel */
- u32 *tx_buffers; /**< tx buffer free list */
- vlib_frame_t *tx_frames[2]; /**< tx frames for tcp 4/6
- output nodes */
- vlib_frame_t *ip_lookup_tx_frames[2]; /**< tx frames for ip 4/6
- lookup nodes */
- u32 *pending_fast_rxt; /**< vector of connections
- needing fast rxt */
- u32 *ongoing_fast_rxt; /**< vector of connections
- now doing fast rxt */
- u32 *postponed_fast_rxt; /**< vector of connections
- that will do fast rxt */
+ /** worker time */
+ u32 time_now;
+
+ /** worker timer wheel */
+ tw_timer_wheel_16t_2w_512sl_t timer_wheel;
+
+ /** tx buffer free list */
+ u32 *tx_buffers;
+
+ /** tx frames for tcp 4/6 output nodes */
+ vlib_frame_t *tx_frames[2];
+
+ /** tx frames for ip 4/6 lookup nodes */
+ vlib_frame_t *ip_lookup_tx_frames[2];
+
+ /** vector of connections needing fast rxt */
+ u32 *pending_fast_rxt;
+
+ /** vector of connections now doing fast rxt */
+ u32 *ongoing_fast_rxt;
+
+ /** vector of connections that will do fast rxt */
+ u32 *postponed_fast_rxt;
+
+ /** vector of pending ack dequeues */
u32 *pending_deq_acked;
+
+ /** vector of pending acks */
u32 *pending_acks;
- vlib_main_t *vm; /**< pointer to vm */
+
+ /** vector of pending disconnect notifications */
+ u32 *pending_disconnects;
+
+ /** convenience pointer to this thread's vlib main */
+ vlib_main_t *vm;
CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
- u8 cached_opts[40]; /**< cached 'on the wire'
- options for bursts */
+
+ /** cached 'on the wire' options for bursts */
+ u8 cached_opts[40];
} tcp_worker_ctx_t;
diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c
index 2578b7d..0c87065 100644
--- a/src/vnet/tcp/tcp_input.c
+++ b/src/vnet/tcp/tcp_input.c
@@ -1131,7 +1131,6 @@
tc->rtt_ts = 0;
tcp_fastrecovery_off (tc);
- tcp_fastrecovery_1_smss_off (tc);
tcp_fastrecovery_first_off (tc);
TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 3);
@@ -1600,6 +1599,54 @@
return 0;
}
+static void
+tcp_program_disconnect (tcp_worker_ctx_t * wrk, tcp_connection_t * tc)
+{
+ if (!tcp_disconnect_pending (tc))
+ {
+ vec_add1 (wrk->pending_disconnects, tc->c_c_index);
+ tcp_disconnect_pending_on (tc);
+ }
+}
+
+static void
+tcp_handle_disconnects (tcp_worker_ctx_t * wrk)
+{
+ u32 thread_index, *pending_disconnects;
+ tcp_connection_t *tc;
+ int i;
+
+ if (!vec_len (wrk->pending_disconnects))
+ return;
+
+ thread_index = wrk->vm->thread_index;
+ pending_disconnects = wrk->pending_disconnects;
+ for (i = 0; i < vec_len (pending_disconnects); i++)
+ {
+ tc = tcp_connection_get (pending_disconnects[i], thread_index);
+ tcp_disconnect_pending_off (tc);
+ stream_session_disconnect_notify (&tc->connection);
+ }
+ _vec_len (wrk->pending_disconnects) = 0;
+}
+
+static void
+tcp_rcv_fin (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, vlib_buffer_t * b,
+ u32 * error)
+{
+ /* Enter CLOSE-WAIT and notify session. To avoid lingering
+ * in CLOSE-WAIT, set timer (reuse WAITCLOSE). */
+ /* Account for the FIN if nothing else was received */
+ if (vnet_buffer (b)->tcp.data_len == 0)
+ tc->rcv_nxt += 1;
+ tcp_program_ack (wrk, tc);
+ tc->state = TCP_STATE_CLOSE_WAIT;
+ tcp_program_disconnect (wrk, tc);
+ tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_CLOSEWAIT_TIME);
+ TCP_EVT_DBG (TCP_EVT_FIN_RCVD, tc);
+ *error = TCP_ERROR_FIN_RCVD;
+}
+
static u8
tcp_sack_vector_is_sane (sack_block_t * sacks)
{
@@ -2099,19 +2146,7 @@
/* 8: check the FIN bit */
if (PREDICT_FALSE (is_fin))
- {
- /* Enter CLOSE-WAIT and notify session. To avoid lingering
- * in CLOSE-WAIT, set timer (reuse WAITCLOSE). */
- /* Account for the FIN if nothing else was received */
- if (vnet_buffer (b0)->tcp.data_len == 0)
- tc0->rcv_nxt += 1;
- tcp_program_ack (wrk, tc0);
- tc0->state = TCP_STATE_CLOSE_WAIT;
- stream_session_disconnect_notify (&tc0->connection);
- tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, TCP_CLOSEWAIT_TIME);
- TCP_EVT_DBG (TCP_EVT_FIN_RCVD, tc0);
- error0 = TCP_ERROR_FIN_RCVD;
- }
+ tcp_rcv_fin (wrk, tc0, b0, &error0);
done:
tcp_inc_err_counter (err_counters, error0, 1);
@@ -2122,6 +2157,7 @@
err_counters[TCP_ERROR_EVENT_FIFO_FULL] = errors;
tcp_store_err_counters (established, err_counters);
tcp_handle_postponed_dequeues (wrk);
+ tcp_handle_disconnects (wrk);
vlib_buffer_free (vm, first_buffer, frame->n_vectors);
return frame->n_vectors;
diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c
index e16095b..4915636 100644
--- a/src/vnet/tcp/tcp_output.c
+++ b/src/vnet/tcp/tcp_output.c
@@ -1201,8 +1201,7 @@
tcp_push_hdr_i (tc, b, TCP_STATE_ESTABLISHED, /* compute opts */ 0,
/* burst */ 1);
tc->snd_una_max = tc->snd_nxt;
- ASSERT (seq_leq (tc->snd_una_max, tc->snd_una + tc->snd_wnd
- + tcp_fastrecovery_sent_1_smss (tc) * tc->snd_mss));
+ ASSERT (seq_leq (tc->snd_una_max, tc->snd_una + tc->snd_wnd));
tcp_validate_txf_size (tc, tc->snd_una_max - tc->snd_una);
/* If not tracking an ACK, start tracking */
if (tc->rtt_ts == 0 && !tcp_in_cong_recovery (tc))