Implement sack based tcp loss recovery (RFC 6675)
- refactor existing congestion control code (RFC 6582/5681). Handling of ack
feedback now consists of: ack parsing, cc event detection, event handling,
congestion control update
- extend sack scoreboard to support sack based retransmissions
- basic implementation of Eifel detection algorithm (RFC 3522) for
detecting spurious retransmissions
- actually initialize the per-thread frame freelist hash tables
- increase worker stack size to 2mb
- fix session queue node out-of-buffer handling
- ensure that the local buffer cache vec_len matches reality
- avoid 2x spurious event requeues when short of buffers
- count out-of-buffer events
- make the builtin server thread-safe
- fix bihash template threading issue: need to paint -1 across uninitialized
working_copy_length vector elements (via rebase from master)
Change-Id: I646cb9f1add9a67d08f4a87badbcb117980ebfc4
Signed-off-by: Florin Coras <fcoras@cisco.com>
Signed-off-by: Dave Barach <dbarach@cisco.com>
diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c
index 9b7b2f6..e0b67a8 100644
--- a/src/vnet/tcp/tcp.c
+++ b/src/vnet/tcp/tcp.c
@@ -195,8 +195,8 @@
TCP_EVT_DBG (TCP_EVT_CLOSE, tc);
/* Send FIN if needed */
- if (tc->state == TCP_STATE_ESTABLISHED || tc->state == TCP_STATE_SYN_RCVD
- || tc->state == TCP_STATE_CLOSE_WAIT)
+ if (tc->state == TCP_STATE_ESTABLISHED
+ || tc->state == TCP_STATE_SYN_RCVD || tc->state == TCP_STATE_CLOSE_WAIT)
tcp_send_fin (tc);
/* Switch state */
@@ -480,7 +480,7 @@
format_tcp_timers (u8 * s, va_list * args)
{
tcp_connection_t *tc = va_arg (*args, tcp_connection_t *);
- int i, last = 0;
+ int i, last = -1;
for (i = 0; i < TCP_N_TIMERS; i++)
if (tc->timers[i] != TCP_TIMER_HANDLE_INVALID)
@@ -493,7 +493,7 @@
s = format (s, "%s,", tcp_conn_timers[i]);
}
- if (last > 0)
+ if (last >= 0)
s = format (s, "%s]", tcp_conn_timers[i]);
else
s = format (s, "]");
@@ -526,19 +526,19 @@
s = format (s, " snd_wnd %u rcv_wnd %u snd_wl1 %u snd_wl2 %u\n",
tc->snd_wnd, tc->rcv_wnd, tc->snd_wl1 - tc->irs,
tc->snd_wl2 - tc->iss);
- s = format (s, " flight size %u send space %u rcv_wnd available %d\n",
- tcp_flight_size (tc), tcp_snd_space (tc),
- tc->rcv_wnd - (tc->rcv_nxt - tc->rcv_las));
+ s = format (s, " flight size %u send space %u rcv_wnd_av %d\n",
+ tcp_flight_size (tc), tcp_available_snd_space (tc),
+ tcp_rcv_wnd_available (tc));
s = format (s, " cong %U ", format_tcp_congestion_status, tc);
s = format (s, "cwnd %u ssthresh %u rtx_bytes %u bytes_acked %u\n",
- tc->cwnd, tc->ssthresh, tc->rtx_bytes, tc->bytes_acked);
- s = format (s, " prev_ssthresh %u snd_congestion %u\n", tc->prev_ssthresh,
- tc->snd_congestion - tc->iss);
+ tc->cwnd, tc->ssthresh, tc->snd_rxt_bytes, tc->bytes_acked);
+ s = format (s, " prev_ssthresh %u snd_congestion %u dupack %u\n",
+ tc->prev_ssthresh, tc->snd_congestion - tc->iss,
+ tc->rcv_dupacks);
s = format (s, " rto %u rto_boff %u srtt %u rttvar %u rtt_ts %u ", tc->rto,
tc->rto_boff, tc->srtt, tc->rttvar, tc->rtt_ts);
s = format (s, "rtt_seq %u\n", tc->rtt_seq);
- if (scoreboard_first_hole (&tc->sack_sb))
- s = format (s, " scoreboard: %U\n", format_tcp_scoreboard, &tc->sack_sb);
+ s = format (s, " scoreboard: %U\n", format_tcp_scoreboard, &tc->sack_sb);
if (vec_len (tc->snd_sacks))
s = format (s, " sacks tx: %U\n", format_tcp_sacks, tc);
@@ -595,9 +595,10 @@
tc = tcp_connection_get (tci, thread_index);
if (tc)
- return format (s, "%U", format_tcp_connection, tc, verbose);
+ s = format (s, "%U", format_tcp_connection, tc, verbose);
else
- return format (s, "empty");
+ s = format (s, "empty");
+ return s;
}
u8 *
@@ -643,13 +644,17 @@
{
sack_scoreboard_t *sb = va_arg (*args, sack_scoreboard_t *);
sack_scoreboard_hole_t *hole;
- s = format (s, "head %u tail %u snd_una_adv %u\n", sb->head, sb->tail,
- sb->snd_una_adv);
- s = format (s, "sacked_bytes %u last_sacked_bytes %u", sb->sacked_bytes,
- sb->last_sacked_bytes);
- s = format (s, " max_byte_sacked %u\n", sb->max_byte_sacked);
- s = format (s, "holes:\n");
+ s = format (s, "sacked_bytes %u last_sacked_bytes %u lost_bytes %u\n",
+ sb->sacked_bytes, sb->last_sacked_bytes, sb->lost_bytes);
+ s = format (s, " last_bytes_delivered %u high_sacked %u snd_una_adv %u\n",
+ sb->last_bytes_delivered, sb->high_sacked, sb->snd_una_adv);
+ s = format (s, " cur_rxt_hole %u high_rxt %u rescue_rxt %u",
+ sb->cur_rxt_hole, sb->high_rxt, sb->rescue_rxt);
+
hole = scoreboard_first_hole (sb);
+ if (hole)
+ s = format (s, "\n head %u tail %u holes:\n", sb->head, sb->tail);
+
while (hole)
{
s = format (s, "%U", format_tcp_sack_hole, hole);
@@ -736,7 +741,7 @@
if (tcp_in_recovery (tc))
{
tc->snd_nxt = tc->snd_una_max;
- snd_space = tcp_available_wnd (tc) - tc->rtx_bytes
+ snd_space = tcp_available_wnd (tc) - tc->snd_rxt_bytes
- (tc->snd_una_max - tc->snd_congestion);
if (snd_space <= 0 || (tc->snd_una_max - tc->snd_una) >= tc->snd_wnd)
return 0;
@@ -744,8 +749,8 @@
}
/* If in fast recovery, send 1 SMSS if wnd allows */
- if (tcp_in_fastrecovery (tc) && tcp_available_snd_space (tc)
- && tcp_fastrecovery_sent_1_smss (tc))
+ if (tcp_in_fastrecovery (tc)
+ && tcp_available_snd_space (tc) && !tcp_fastrecovery_sent_1_smss (tc))
{
tcp_fastrecovery_1_smss_on (tc);
return tc->snd_mss;
@@ -761,6 +766,12 @@
return tcp_snd_space (tc);
}
+i32
+tcp_rcv_wnd_available (tcp_connection_t * tc)
+{
+ return (i32) tc->rcv_wnd - (tc->rcv_nxt - tc->rcv_las);
+}
+
u32
tcp_session_tx_fifo_offset (transport_connection_t * trans_conn)
{