TCP/session improvements
- Added svm fifo flag for tracking fifo dequeue events (replaces event
length). Updated all code to switch to the new scheme.
- More session debugging
- Fix peek index wrap
- Add a trivial socket test client
- Fast retransmit/cc fixes
- tx and rx SACK fixes and unit testing
- SRTT computation fix
- remove dupack/ack burst filters
- improve ack rx
- improved segment rx
- builtin client test code
Change-Id: Ic4eb2d5ca446eb2260ccd3ccbcdaa73c64e7f4e1
Signed-off-by: Florin Coras <fcoras@cisco.com>
Signed-off-by: Dave Barach <dbarach@cisco.com>
diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h
index 082ab1d..b4286bc 100644
--- a/src/vnet/tcp/tcp.h
+++ b/src/vnet/tcp/tcp.h
@@ -30,9 +30,10 @@
#define TCP_PAWS_IDLE 24 * 24 * 60 * 60 * THZ /**< 24 days */
#define TCP_MAX_OPTION_SPACE 40
-#define TCP_DUPACK_THRESHOLD 3
-#define TCP_MAX_RX_FIFO_SIZE 2 << 20
-#define TCP_IW_N_SEGMENTS 10
+#define TCP_DUPACK_THRESHOLD 3
+#define TCP_MAX_RX_FIFO_SIZE 2 << 20
+#define TCP_IW_N_SEGMENTS 10
+#define TCP_ALWAYS_ACK 0 /**< If on, we always ack */
/** TCP FSM state definitions as per RFC793. */
#define foreach_tcp_fsm_state \
@@ -102,13 +103,12 @@
/** TCP connection flags */
#define foreach_tcp_connection_flag \
- _(DELACK, "Delay ACK") \
_(SNDACK, "Send ACK") \
- _(BURSTACK, "Burst ACK set") \
_(FINSNT, "FIN sent") \
_(SENT_RCV_WND0, "Sent 0 receive window") \
_(RECOVERY, "Recovery on") \
- _(FAST_RECOVERY, "Fast Recovery on")
+ _(FAST_RECOVERY, "Fast Recovery on") \
+ _(FR_1_SMSS, "Sent 1 SMSS")
typedef enum _tcp_connection_flag_bits
{
@@ -160,8 +160,12 @@
typedef struct _sack_scoreboard
{
sack_scoreboard_hole_t *holes; /**< Pool of holes */
- u32 head; /**< Index to first entry */
+ u32 head; /**< Index of first entry */
+ u32 tail; /**< Index of last entry */
u32 sacked_bytes; /**< Number of bytes sacked in sb */
+ u32 last_sacked_bytes; /**< Number of bytes last sacked */
+ u32 snd_una_adv; /**< Bytes to add to snd_una */
+ u32 max_byte_sacked; /**< Highest byte acked */
} sack_scoreboard_t;
typedef enum _tcp_cc_algorithm_type
@@ -214,7 +218,7 @@
sack_block_t *snd_sacks; /**< Vector of SACKs to send. XXX Fixed size? */
sack_scoreboard_t sack_sb; /**< SACK "scoreboard" that tracks holes */
- u8 rcv_dupacks; /**< Number of DUPACKs received */
+ u16 rcv_dupacks; /**< Number of DUPACKs received */
u8 snt_dupacks; /**< Number of DUPACKs sent in a burst */
/* Congestion control */
@@ -224,6 +228,7 @@
u32 bytes_acked; /**< Bytes acknowledged by current segment */
u32 rtx_bytes; /**< Retransmitted bytes */
u32 tsecr_last_ack; /**< Timestamp echoed to us in last healthy ACK */
+ u32 snd_congestion; /**< snd_una_max when congestion is detected */
tcp_cc_algorithm_t *cc_algo; /**< Congestion control algorithm */
/* RTT and RTO */
@@ -250,8 +255,10 @@
#define tcp_fastrecovery_off(tc) (tc)->flags &= ~TCP_CONN_FAST_RECOVERY
#define tcp_in_fastrecovery(tc) ((tc)->flags & TCP_CONN_FAST_RECOVERY)
#define tcp_in_recovery(tc) ((tc)->flags & (TCP_CONN_FAST_RECOVERY | TCP_CONN_RECOVERY))
-#define tcp_recovery_off(tc) ((tc)->flags &= ~(TCP_CONN_FAST_RECOVERY | TCP_CONN_RECOVERY))
#define tcp_in_slowstart(tc) (tc->cwnd < tc->ssthresh)
+#define tcp_fastrecovery_sent_1_smss(tc) ((tc)->flags & TCP_CONN_FR_1_SMSS)
+#define tcp_fastrecovery_1_smss_on(tc) ((tc)->flags |= TCP_CONN_FR_1_SMSS)
+#define tcp_fastrecovery_1_smss_off(tc) ((tc)->flags &= ~TCP_CONN_FR_1_SMSS)
typedef enum
{
@@ -293,8 +300,8 @@
/* Per worker-thread timer wheel for connections timers */
tw_timer_wheel_16t_2w_512sl_t *timer_wheels;
- /* Convenience per worker-thread vector of connections to DELACK */
- u32 **delack_connections;
+// /* Convenience per worker-thread vector of connections to DELACK */
+// u32 **delack_connections;
/* Pool of half-open connections on which we've sent a SYN */
tcp_connection_t *half_open_connections;
@@ -397,8 +404,16 @@
always_inline u32
tcp_flight_size (const tcp_connection_t * tc)
{
- return tc->snd_una_max - tc->snd_una - tc->sack_sb.sacked_bytes
- + tc->rtx_bytes;
+ int flight_size;
+
+ flight_size = (int) ((tc->snd_una_max - tc->snd_una) + tc->rtx_bytes)
+ - (tc->rcv_dupacks * tc->snd_mss) /* - tc->sack_sb.sacked_bytes */ ;
+
+ /* Happens if we don't clear sacked bytes */
+ if (flight_size < 0)
+ return 0;
+
+ return flight_size;
}
/**
@@ -439,9 +454,13 @@
return available_wnd - flight_size;
}
+void tcp_update_rcv_wnd (tcp_connection_t * tc);
+
void tcp_retransmit_first_unacked (tcp_connection_t * tc);
void tcp_fast_retransmit (tcp_connection_t * tc);
+void tcp_cc_congestion (tcp_connection_t * tc);
+void tcp_cc_recover (tcp_connection_t * tc);
always_inline u32
tcp_time_now (void)
@@ -453,7 +472,7 @@
u32
tcp_prepare_retransmit_segment (tcp_connection_t * tc, vlib_buffer_t * b,
- u32 max_bytes);
+ u32 offset, u32 max_bytes);
void tcp_connection_timers_init (tcp_connection_t * tc);
void tcp_connection_timers_reset (tcp_connection_t * tc);
@@ -477,14 +496,6 @@
}
always_inline void
-tcp_retransmit_timer_set (tcp_connection_t * tc)
-{
- /* XXX Switch to faster TW */
- tcp_timer_set (tc, TCP_TIMER_RETRANSMIT,
- clib_max (tc->rto * TCP_TO_TIMER_TICK, 1));
-}
-
-always_inline void
tcp_timer_reset (tcp_connection_t * tc, u8 timer_id)
{
if (tc->timers[timer_id] == TCP_TIMER_HANDLE_INVALID)
@@ -506,6 +517,27 @@
tc->c_c_index, timer_id, interval);
}
+/* XXX Switch retransmit to faster TW */
+always_inline void
+tcp_retransmit_timer_set (tcp_connection_t * tc)
+{
+ tcp_timer_set (tc, TCP_TIMER_RETRANSMIT,
+ clib_max (tc->rto * TCP_TO_TIMER_TICK, 1));
+}
+
+always_inline void
+tcp_retransmit_timer_update (tcp_connection_t * tc)
+{
+ tcp_timer_update (tc, TCP_TIMER_RETRANSMIT,
+ clib_max (tc->rto * TCP_TO_TIMER_TICK, 1));
+}
+
+always_inline void
+tcp_retransmit_timer_reset (tcp_connection_t * tc)
+{
+ tcp_timer_reset (tc, TCP_TIMER_RETRANSMIT);
+}
+
always_inline u8
tcp_timer_is_active (tcp_connection_t * tc, tcp_timers_e timer)
{
@@ -517,6 +549,14 @@
sack_scoreboard_hole_t * hole);
always_inline sack_scoreboard_hole_t *
+scoreboard_get_hole (sack_scoreboard_t * sb, u32 index)
+{
+ if (index != TCP_INVALID_SACK_HOLE_INDEX)
+ return pool_elt_at_index (sb->holes, index);
+ return 0;
+}
+
+always_inline sack_scoreboard_hole_t *
scoreboard_next_hole (sack_scoreboard_t * sb, sack_scoreboard_hole_t * hole)
{
if (hole->next != TCP_INVALID_SACK_HOLE_INDEX)
@@ -532,6 +572,14 @@
return 0;
}
+always_inline sack_scoreboard_hole_t *
+scoreboard_last_hole (sack_scoreboard_t * sb)
+{
+ if (sb->tail != TCP_INVALID_SACK_HOLE_INDEX)
+ return pool_elt_at_index (sb->holes, sb->tail);
+ return 0;
+}
+
always_inline void
scoreboard_clear (sack_scoreboard_t * sb)
{
@@ -540,6 +588,10 @@
{
scoreboard_remove_hole (sb, hole);
}
+ sb->sacked_bytes = 0;
+ sb->last_sacked_bytes = 0;
+ sb->snd_una_adv = 0;
+ sb->max_byte_sacked = 0;
}
always_inline u32
@@ -548,6 +600,21 @@
return hole->end - hole->start;
}
+always_inline u32
+scoreboard_hole_index (sack_scoreboard_t * sb, sack_scoreboard_hole_t * hole)
+{
+ return hole - sb->holes;
+}
+
+always_inline void
+scoreboard_init (sack_scoreboard_t * sb)
+{
+ sb->head = TCP_INVALID_SACK_HOLE_INDEX;
+ sb->tail = TCP_INVALID_SACK_HOLE_INDEX;
+}
+
+void tcp_rcv_sacks (tcp_connection_t * tc, u32 ack);
+
always_inline void
tcp_cc_algo_register (tcp_cc_algorithm_type_e type,
const tcp_cc_algorithm_t * vft)