tcp: improve lost rxt heuristic
Type: feature
- retransmit first unacked segment if newer retransmitted packets
are acked
- avoid spurious retransmits if recovery ends with sacked bytes
Change-Id: Ic1b56d22e025822edb7609afb136e47440ea6032
Signed-off-by: Florin Coras <fcoras@cisco.com>
diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h
index f403887..7dd88bf 100644
--- a/src/vnet/tcp/tcp.h
+++ b/src/vnet/tcp/tcp.h
@@ -1172,8 +1172,14 @@
always_inline void
tcp_persist_timer_update (tcp_connection_t * tc)
{
- tcp_timer_update (tc, TCP_TIMER_PERSIST,
- clib_max (tc->rto * TCP_TO_TIMER_TICK, 1));
+ u32 interval;
+
+ if (seq_leq (tc->snd_una, tc->snd_congestion + tc->burst_acked))
+ interval = 1;
+ else
+ interval = clib_max (tc->rto * TCP_TO_TIMER_TICK, 1);
+
+ tcp_timer_update (tc, TCP_TIMER_PERSIST, interval);
}
always_inline void
diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c
index bc78b39..172dcd2 100755
--- a/src/vnet/tcp/tcp_input.c
+++ b/src/vnet/tcp/tcp_input.c
@@ -578,10 +578,16 @@
always_inline u8
tcp_recovery_no_snd_space (tcp_connection_t * tc)
{
- return (tcp_in_fastrecovery (tc)
- && tcp_fastrecovery_prr_snd_space (tc) < tc->snd_mss)
- || (tcp_in_recovery (tc)
- && tcp_available_output_snd_space (tc) < tc->snd_mss);
+ u32 space;
+
+ ASSERT (tcp_in_cong_recovery (tc));
+
+ if (tcp_in_recovery (tc))
+ space = tcp_available_output_snd_space (tc);
+ else
+ space = tcp_fastrecovery_prr_snd_space (tc);
+
+ return (space < tc->snd_mss + tc->burst_acked);
}
/**
@@ -608,7 +614,6 @@
{
/* Dequeue the newly ACKed bytes */
session_tx_fifo_dequeue_drop (&tc->connection, tc->burst_acked);
- tc->burst_acked = 0;
tcp_validate_txf_size (tc, tc->snd_una_max - tc->snd_una);
if (PREDICT_FALSE (tc->flags & TCP_CONN_PSH_PENDING))
@@ -628,9 +633,11 @@
/* Reset the pacer if we've been idle, i.e., no data sent or if
* we're in recovery and snd space constrained */
if (tc->data_segs_out == tc->prev_dsegs_out
- || tcp_recovery_no_snd_space (tc))
+ || (tcp_in_cong_recovery (tc) && tcp_recovery_no_snd_space (tc)))
transport_connection_tx_pacer_reset_bucket (&tc->connection);
+
tc->prev_dsegs_out = tc->data_segs_out;
+ tc->burst_acked = 0;
}
_vec_len (wrk->pending_deq_acked) = 0;
}
@@ -1348,28 +1355,25 @@
is_spurious = 1;
}
- tc->rcv_dupacks = 0;
- tc->prr_delivered = 0;
- tc->rxt_delivered = 0;
- tc->snd_rxt_bytes = 0;
- tc->snd_rxt_ts = 0;
- tc->rtt_ts = 0;
- tc->flags &= ~TCP_CONN_RXT_PENDING;
-
tcp_connection_tx_pacer_reset (tc, tc->cwnd, 0 /* start bucket */ );
+ tc->rcv_dupacks = 0;
/* Previous recovery left us congested. Continue sending as part
* of the current recovery event with an updated snd_congestion */
if (tc->sack_sb.sacked_bytes)
{
tc->snd_congestion = tc->snd_nxt;
- tc->snd_rxt_ts = tcp_tstamp (tc);
- tc->prr_start = tc->snd_una;
- scoreboard_init_rxt (&tc->sack_sb, tc->snd_una);
tcp_program_retransmit (tc);
return is_spurious;
}
+ tc->rxt_delivered = 0;
+ tc->snd_rxt_bytes = 0;
+ tc->snd_rxt_ts = 0;
+ tc->prr_delivered = 0;
+ tc->rtt_ts = 0;
+ tc->flags &= ~TCP_CONN_RXT_PENDING;
+
hole = scoreboard_first_hole (&tc->sack_sb);
if (hole && hole->start == tc->snd_una && hole->end == tc->snd_nxt)
scoreboard_clear (&tc->sack_sb);
@@ -1444,29 +1448,18 @@
}
/*
- * Already in recovery. See if we can exit and stop retransmitting
+ * Already in recovery
*/
- if (seq_geq (tc->snd_una, tc->snd_congestion))
- {
- /* If spurious return, we've already updated everything */
- if (tcp_cc_recover (tc))
- {
- tc->tsecr_last_ack = tc->rcv_opts.tsecr;
- return;
- }
-
- /* Treat as congestion avoidance ack */
- tcp_cc_rcv_ack (tc, rs);
- return;
- }
-
/*
* Process (re)transmit feedback. Output path uses this to decide how much
* more data to release into the network
*/
if (has_sack)
{
+ if (!tc->bytes_acked && tc->sack_sb.rxt_sacked)
+ tcp_fastrecovery_first_on (tc);
+
tc->rxt_delivered += tc->sack_sb.rxt_sacked;
tc->prr_delivered += tc->bytes_acked + tc->sack_sb.last_sacked_bytes
- tc->sack_sb.last_bytes_delivered;
@@ -1498,6 +1491,23 @@
}
/*
+ * See if we can exit and stop retransmitting
+ */
+ if (seq_geq (tc->snd_una, tc->snd_congestion))
+ {
+ /* If spurious return, we've already updated everything */
+ if (tcp_cc_recover (tc))
+ {
+ tc->tsecr_last_ack = tc->rcv_opts.tsecr;
+ return;
+ }
+
+ /* Treat as congestion avoidance ack */
+ tcp_cc_rcv_ack (tc, rs);
+ return;
+ }
+
+ /*
* Notify cc of the event
*/
diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c
index 4298611..7be3de8 100644
--- a/src/vnet/tcp/tcp_output.c
+++ b/src/vnet/tcp/tcp_output.c
@@ -1850,6 +1850,9 @@
u32 tx_adv_sack = sb->high_sacked - tc->snd_congestion;
f64 rr = (f64) tc->ssthresh / tc->prev_cwnd;
+ if (tcp_fastrecovery_first (tc))
+ return 1;
+
return (tx_adv_sack > (tc->snd_una - tc->prr_start) * rr);
}
@@ -1928,6 +1931,8 @@
ASSERT (tc->rxt_delivered <= tc->snd_rxt_bytes);
}
+ tcp_fastrecovery_first_off (tc);
+
TCP_EVT (TCP_EVT_CC_EVT, tc, 0);
hole = scoreboard_get_hole (sb, sb->cur_rxt_hole);