tcp: loss recovery improvements/fixes

- fix newreno cwnd computation
- reset snd_una_max on entering recovery
- accept acks beyond snd_nxt but less than snd_congestion when in
recovery
- avoid entering fast recovery multiple times when using sacks
- avoid as much as possible sending small segments when doing fast
retransmit
- more event logging

Change-Id: I19dd151d7704e39d4eae06de3a26f5e124875366
Signed-off-by: Florin Coras <fcoras@cisco.com>
diff --git a/src/vnet/session/session.c b/src/vnet/session/session.c
index 2697c26..c016325 100644
--- a/src/vnet/session/session.c
+++ b/src/vnet/session/session.c
@@ -774,7 +774,7 @@
   stream_session_t *s;
   application_t *app;
   s = session_get (tc->s_index, tc->thread_index);
-
+  s->session_state = SESSION_STATE_CLOSED;
   app = application_get (s->app_index);
   app->cb_fns.session_reset_callback (s);
 }
diff --git a/src/vnet/session/session_node.c b/src/vnet/session/session_node.c
index 46fc4dc..e046efb 100644
--- a/src/vnet/session/session_node.c
+++ b/src/vnet/session/session_node.c
@@ -260,6 +260,8 @@
        * session is not ready or closed */
       if (s->session_state < SESSION_STATE_READY)
 	return 1;
+      if (s->session_state == SESSION_STATE_CLOSED)
+	return 2;
     }
   return 0;
 }
@@ -364,11 +366,12 @@
   session_tx_context_t *ctx = &smm->ctx[thread_index];
   transport_proto_t tp;
   vlib_buffer_t *pb;
-  u16 n_bufs;
+  u16 n_bufs, rv;
 
-  if (PREDICT_FALSE (session_tx_not_ready (s, peek_data)))
+  if (PREDICT_FALSE ((rv = session_tx_not_ready (s, peek_data))))
     {
-      vec_add1 (smm->pending_event_vector[thread_index], *e);
+      if (rv < 2)
+	vec_add1 (smm->pending_event_vector[thread_index], *e);
       return 0;
     }
 
diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c
index 25292d1..15ac7d3 100644
--- a/src/vnet/tcp/tcp.c
+++ b/src/vnet/tcp/tcp.c
@@ -734,9 +734,9 @@
   s = format (s, " snd_wnd %u rcv_wnd %u snd_wl1 %u snd_wl2 %u\n",
 	      tc->snd_wnd, tc->rcv_wnd, tc->snd_wl1 - tc->irs,
 	      tc->snd_wl2 - tc->iss);
-  s = format (s, " flight size %u send space %u rcv_wnd_av %d\n",
+  s = format (s, " flight size %u out space %u cc space %u rcv_wnd_av %u\n",
 	      tcp_flight_size (tc), tcp_available_output_snd_space (tc),
-	      tcp_rcv_wnd_available (tc));
+	      tcp_available_cc_snd_space (tc), tcp_rcv_wnd_available (tc));
   s = format (s, " cong %U ", format_tcp_congestion_status, tc);
   s = format (s, "cwnd %u ssthresh %u rtx_bytes %u bytes_acked %u\n",
 	      tc->cwnd, tc->ssthresh, tc->snd_rxt_bytes, tc->bytes_acked);
@@ -1022,7 +1022,7 @@
    * bytes of previously unsent data. */
   if (tcp_in_fastrecovery (tc) && !tcp_fastrecovery_sent_1_smss (tc))
     {
-      if (tcp_available_output_snd_space (tc) < tc->snd_mss)
+      if (tcp_available_cc_snd_space (tc) < tc->snd_mss)
 	return 0;
       tcp_fastrecovery_1_smss_on (tc);
       return tc->snd_mss;
diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h
index 837b5b4..10aa721 100644
--- a/src/vnet/tcp/tcp.h
+++ b/src/vnet/tcp/tcp.h
@@ -119,7 +119,7 @@
   _(FAST_RECOVERY, "Fast Recovery")		\
   _(FR_1_SMSS, "Sent 1 SMSS")			\
   _(HALF_OPEN_DONE, "Half-open completed")	\
-  _(FINPNDG, "FIN pending")
+  _(FINPNDG, "FIN pending")			\
 
 typedef enum _tcp_connection_flag_bits
 {
@@ -617,7 +617,7 @@
  * Estimate of how many bytes we can still push into the network
  */
 always_inline u32
-tcp_available_snd_space (const tcp_connection_t * tc)
+tcp_available_cc_snd_space (const tcp_connection_t * tc)
 {
   u32 available_wnd = tcp_available_snd_wnd (tc);
   u32 flight_size = tcp_flight_size (tc);
@@ -652,6 +652,7 @@
 
 /* Made public for unit testing only */
 void tcp_update_sack_list (tcp_connection_t * tc, u32 start, u32 end);
+u32 tcp_sack_list_bytes (tcp_connection_t * tc);
 
 always_inline u32
 tcp_time_now (void)
@@ -791,7 +792,6 @@
 void
 scoreboard_remove_hole (sack_scoreboard_t * sb,
 			sack_scoreboard_hole_t * hole);
-void scoreboard_update_lost (tcp_connection_t * tc, sack_scoreboard_t * sb);
 sack_scoreboard_hole_t *scoreboard_insert_hole (sack_scoreboard_t * sb,
 						u32 prev_index, u32 start,
 						u32 end);
diff --git a/src/vnet/tcp/tcp_debug.h b/src/vnet/tcp/tcp_debug.h
index 4af4f2e..a52efe0 100755
--- a/src/vnet/tcp/tcp_debug.h
+++ b/src/vnet/tcp/tcp_debug.h
@@ -56,6 +56,9 @@
   _(CC_PACK, "cc partial ack")		\
   _(CC_STAT, "cc stats")		\
   _(CC_RTO_STAT, "cc rto stats")	\
+  _(CC_SCOREBOARD, "scoreboard stats")	\
+  _(CC_SACKS, "snd sacks stats")	\
+  _(CC_INPUT, "ooo data delivered")	\
   _(SEG_INVALID, "invalid segment")	\
   _(PAWS_FAIL, "failed paws check")	\
   _(ACK_RCV_ERR, "invalid ack")		\
@@ -192,7 +195,7 @@
   ed->data[0] = _tc->c_c_index;						\
 }
 
-#define TCP_EVT_SYN_RCVD_HANDLER(_tc,_init, ...)				\
+#define TCP_EVT_SYN_RCVD_HANDLER(_tc,_init, ...)			\
 {									\
   if (_init)								\
     TCP_EVT_INIT_HANDLER(_tc, 0);					\
@@ -277,9 +280,9 @@
   };									\
   DECLARE_ETD(_tc, _e, 4);						\
   ed->data[0] = _tc->iss;						\
-  ed->data[1] = _tc->snd_una - _tc->iss;					\
+  ed->data[1] = _tc->snd_una - _tc->iss;				\
   ed->data[2] = _tc->snd_una_max - _tc->iss;				\
-  ed->data[3] = _tc->snd_nxt - _tc->iss;					\
+  ed->data[3] = _tc->snd_nxt - _tc->iss;				\
   TCP_EVT_STATE_CHANGE_HANDLER(_tc);					\
 }
 
@@ -288,14 +291,14 @@
   ELOG_TYPE_DECLARE (_e) =						\
   {									\
     .format = "synack-tx: iss %u irs %u snd_una %u snd_nxt %u rcv_nxt %u",\
-    .format_args = "i4i4i4i4i4",						\
+    .format_args = "i4i4i4i4i4",					\
   };									\
   DECLARE_ETD(_tc, _e, 5);						\
   ed->data[0] = _tc->iss;						\
   ed->data[1] = _tc->irs;						\
-  ed->data[2] = _tc->snd_una - _tc->iss;					\
-  ed->data[3] = _tc->snd_nxt - _tc->iss;					\
-  ed->data[4] = _tc->rcv_nxt - _tc->irs;					\
+  ed->data[2] = _tc->snd_una - _tc->iss;				\
+  ed->data[3] = _tc->snd_nxt - _tc->iss;				\
+  ed->data[4] = _tc->rcv_nxt - _tc->irs;				\
 }
 
 #define TCP_EVT_SYNACK_RCVD_HANDLER(_tc, ...)				\
@@ -303,14 +306,14 @@
   ELOG_TYPE_DECLARE (_e) =						\
   {									\
     .format = "synack-rx: iss %u irs %u snd_una %u snd_nxt %u rcv_nxt %u",\
-    .format_args = "i4i4i4i4i4",						\
+    .format_args = "i4i4i4i4i4",					\
   };									\
   DECLARE_ETD(_tc, _e, 5);						\
   ed->data[0] = _tc->iss;						\
   ed->data[1] = _tc->irs;						\
-  ed->data[2] = _tc->snd_una - _tc->iss;					\
-  ed->data[3] = _tc->snd_nxt - _tc->iss;					\
-  ed->data[4] = _tc->rcv_nxt - _tc->irs;					\
+  ed->data[2] = _tc->snd_una - _tc->iss;				\
+  ed->data[3] = _tc->snd_nxt - _tc->iss;				\
+  ed->data[4] = _tc->rcv_nxt - _tc->irs;				\
   TCP_EVT_STATE_CHANGE_HANDLER(_tc);					\
 }
 
@@ -371,7 +374,7 @@
   ELOG_TYPE_DECLARE (_e) =						\
   {									\
     .format = "%s-rxt: iss %u irs %u snd_nxt %u rcv_nxt %u",		\
-    .format_args = "t4i4i4i4i4",						\
+    .format_args = "t4i4i4i4i4",					\
     .n_enum_strings = 2,						\
     .enum_strings = {                                           	\
 	"syn",	                                             		\
@@ -382,10 +385,9 @@
   ed->data[0] = _type;							\
   ed->data[1] = _tc->iss;						\
   ed->data[2] = _tc->irs;						\
-  ed->data[3] = _tc->snd_nxt - _tc->iss;					\
-  ed->data[4] = _tc->rcv_nxt - _tc->irs;					\
+  ed->data[3] = _tc->snd_nxt - _tc->iss;				\
+  ed->data[4] = _tc->rcv_nxt - _tc->irs;				\
 }
-
 #else
 #define TCP_EVT_SYN_SENT_HANDLER(_tc, ...)
 #define TCP_EVT_SYNACK_SENT_HANDLER(_tc, ...)
@@ -399,6 +401,81 @@
 #endif
 
 #if TCP_DEBUG_SM > 1
+#define TCP_EVT_SEG_INVALID_HANDLER(_tc, _btcp, ...)			\
+{									\
+  ELOG_TYPE_DECLARE (_e) =						\
+  {									\
+    .format = "seg-inv: seq %u end %u rcv_las %u rcv_nxt %u rcv_wnd %u",\
+    .format_args = "i4i4i4i4i4",					\
+  };									\
+  DECLARE_ETD(_tc, _e, 5);						\
+  ed->data[0] = _btcp.seq_number - _tc->irs;				\
+  ed->data[1] = _btcp.seq_end - _tc->irs;				\
+  ed->data[2] = _tc->rcv_las - _tc->irs;				\
+  ed->data[3] = _tc->rcv_nxt - _tc->irs;				\
+  ed->data[4] = _tc->rcv_wnd;						\
+}
+
+#define TCP_EVT_PAWS_FAIL_HANDLER(_tc, _seq, _end, ...)			\
+{									\
+  ELOG_TYPE_DECLARE (_e) =						\
+  {									\
+    .format = "paws-err: seq %u end %u tsval %u tsval_recent %u",	\
+    .format_args = "i4i4i4i4",						\
+  };									\
+  DECLARE_ETD(_tc, _e, 4);						\
+  ed->data[0] = _seq - _tc->irs;					\
+  ed->data[1] = _end - _tc->irs;					\
+  ed->data[2] = _tc->rcv_opts.tsval;					\
+  ed->data[3] = _tc->tsval_recent;					\
+}
+
+#define TCP_EVT_ACK_RCV_ERR_HANDLER(_tc, _type, _ack, ...)		\
+{									\
+  ELOG_TYPE_DECLARE (_e) =						\
+  {									\
+    .format = "ack-err: %s ack %u snd_una %u snd_nxt %u una_max %u",	\
+    .format_args = "t4i4i4i4i4",					\
+    .n_enum_strings = 3,						\
+    .enum_strings = {                                           	\
+      "invalid",                                                 	\
+      "old",                                                 		\
+      "future",								\
+    }, 									\
+  };									\
+  DECLARE_ETD(_tc, _e, 5);						\
+  ed->data[0] = _type;							\
+  ed->data[1] = _ack - _tc->iss;					\
+  ed->data[2] = _tc->snd_una - _tc->iss;				\
+  ed->data[3] = _tc->snd_nxt - _tc->iss;				\
+  ed->data[4] = _tc->snd_una_max - _tc->iss;				\
+}
+
+#define TCP_EVT_RCV_WND_SHRUNK_HANDLER(_tc, _obs, _av, ...)		\
+{									\
+if (_av > 0) 								\
+{									\
+  ELOG_TYPE_DECLARE (_e) =						\
+  {									\
+    .format = "huh?: rcv_wnd %u obsd %u av %u rcv_nxt %u rcv_las %u",	\
+    .format_args = "i4i4i4i4i4",					\
+  };									\
+  DECLARE_ETD(_tc, _e, 5);						\
+  ed->data[0] = _tc->rcv_wnd;						\
+  ed->data[1] = _obs;							\
+  ed->data[2] = _av;							\
+  ed->data[3] = _tc->rcv_nxt - _tc->irs;				\
+  ed->data[4] = _tc->rcv_las - _tc->irs;				\
+}									\
+}
+#else
+#define TCP_EVT_SEG_INVALID_HANDLER(_tc, _btcp, ...)
+#define TCP_EVT_PAWS_FAIL_HANDLER(_tc, _seq, _end, ...)
+#define TCP_EVT_ACK_RCV_ERR_HANDLER(_tc, _type, _ack, ...)
+#define TCP_EVT_RCV_WND_SHRUNK_HANDLER(_tc, _obs, _av, ...)
+#endif
+
+#if TCP_DEBUG_SM > 2
 
 #define TCP_EVT_ACK_SENT_HANDLER(_tc, ...)				\
 {									\
@@ -505,90 +582,18 @@
 		    _tc_index);						\
     }									\
 }
-
-#define TCP_EVT_SEG_INVALID_HANDLER(_tc, _seq, _end, ...)		\
-{									\
-  ELOG_TYPE_DECLARE (_e) =						\
-  {									\
-    .format = "seg-inv: seq %u end %u rcv_las %u rcv_nxt %u rcv_wnd %u",\
-    .format_args = "i4i4i4i4i4",					\
-  };									\
-  DECLARE_ETD(_tc, _e, 5);						\
-  ed->data[0] = _seq - _tc->irs;					\
-  ed->data[1] = _end - _tc->irs;					\
-  ed->data[2] = _tc->rcv_las - _tc->irs;				\
-  ed->data[3] = _tc->rcv_nxt - _tc->irs;				\
-  ed->data[4] = _tc->rcv_wnd;						\
-}
-
-#define TCP_EVT_PAWS_FAIL_HANDLER(_tc, _seq, _end, ...)			\
-{									\
-  ELOG_TYPE_DECLARE (_e) =						\
-  {									\
-    .format = "paws-err: seq %u end %u tsval %u tsval_recent %u",	\
-    .format_args = "i4i4i4i4",						\
-  };									\
-  DECLARE_ETD(_tc, _e, 4);						\
-  ed->data[0] = _seq - _tc->irs;					\
-  ed->data[1] = _end - _tc->irs;					\
-  ed->data[2] = _tc->rcv_opts.tsval;					\
-  ed->data[3] = _tc->tsval_recent;					\
-}
-
-#define TCP_EVT_ACK_RCV_ERR_HANDLER(_tc, _type, _ack, ...)		\
-{									\
-  ELOG_TYPE_DECLARE (_e) =						\
-  {									\
-    .format = "ack-err: %s ack %u snd_una %u snd_nxt %u una_max %u",	\
-    .format_args = "t4i4i4i4i4",					\
-    .n_enum_strings = 3,						\
-    .enum_strings = {                                           	\
-      "invalid",                                                 	\
-      "old",                                                 		\
-      "future",								\
-    }, 									\
-  };									\
-  DECLARE_ETD(_tc, _e, 5);						\
-  ed->data[0] = _type;							\
-  ed->data[1] = _ack - _tc->iss;					\
-  ed->data[2] = _tc->snd_una - _tc->iss;				\
-  ed->data[3] = _tc->snd_nxt - _tc->iss;				\
-  ed->data[4] = _tc->snd_una_max - _tc->iss;				\
-}
-
-#define TCP_EVT_RCV_WND_SHRUNK_HANDLER(_tc, _obs, _av, ...)		\
-{									\
-if (_av > 0) 								\
-{									\
-  ELOG_TYPE_DECLARE (_e) =						\
-  {									\
-    .format = "huh?: rcv_wnd %u obsd %u av %u rcv_nxt %u rcv_las %u",	\
-    .format_args = "i4i4i4i4i4",					\
-  };									\
-  DECLARE_ETD(_tc, _e, 5);						\
-  ed->data[0] = _tc->rcv_wnd;						\
-  ed->data[1] = _obs;							\
-  ed->data[2] = _av;							\
-  ed->data[3] = _tc->rcv_nxt - _tc->irs;				\
-  ed->data[4] = _tc->rcv_las - _tc->irs;				\
-}									\
-}
 #else
 #define TCP_EVT_ACK_SENT_HANDLER(_tc, ...)
 #define TCP_EVT_ACK_RCVD_HANDLER(_tc, ...)
 #define TCP_EVT_PKTIZE_HANDLER(_tc, ...)
 #define TCP_EVT_INPUT_HANDLER(_tc, _type, _len, _written, ...)
 #define TCP_EVT_TIMER_POP_HANDLER(_tc_index, _timer_id, ...)
-#define TCP_EVT_SEG_INVALID_HANDLER(_tc, _seq, _end, ...)
-#define TCP_EVT_PAWS_FAIL_HANDLER(_tc, _seq, _end, ...)
-#define TCP_EVT_ACK_RCV_ERR_HANDLER(_tc, _type, _ack, ...)
-#define TCP_EVT_RCV_WND_SHRUNK_HANDLER(_tc, _obs, _av, ...)
 #endif
 
 /*
  * State machine verbose
  */
-#if TCP_DEBUG_SM > 2
+#if TCP_DEBUG_SM > 3
 #define TCP_EVT_SND_WND_HANDLER(_tc, ...)				\
 {									\
   ELOG_TYPE_DECLARE (_e) =						\
@@ -626,9 +631,9 @@
 {									\
   ELOG_TYPE_DECLARE (_e) =						\
   {									\
-    .format = "cc: %s snd_space %u snd_cong %u rxt_bytes %u",		\
-    .format_args = "t4i4i4i4",						\
-    .n_enum_strings = 6,						\
+    .format = "cc: %s snd_space %u snd_una %u out %u flight %u",	\
+    .format_args = "t4i4i4i4i4",					\
+    .n_enum_strings = 7,						\
     .enum_strings = {                                           	\
       "fast-rxt",	                                             	\
       "rxt-timeout",                                                 	\
@@ -636,13 +641,15 @@
       "recovered",							\
       "congestion",							\
       "undo",								\
+      "recovery",							\
     },  								\
   };									\
-  DECLARE_ETD(_tc, _e, 4);						\
+  DECLARE_ETD(_tc, _e, 5);						\
   ed->data[0] = _sub_evt;						\
-  ed->data[1] = tcp_available_snd_space (_tc);				\
-  ed->data[2] = _tc->snd_congestion - _tc->iss;				\
-  ed->data[3] = _tc->snd_rxt_bytes;					\
+  ed->data[1] = tcp_available_cc_snd_space (_tc);			\
+  ed->data[2] = _tc->snd_una - _tc->iss;				\
+  ed->data[3] = tcp_bytes_out(_tc);					\
+  ed->data[4] = tcp_flight_size (_tc);					\
 }
 
 #define TCP_EVT_CC_RTX_HANDLER(_tc, offset, n_bytes, ...)		\
@@ -659,19 +666,19 @@
   ed->data[3] = _tc->snd_rxt_bytes;					\
 }
 
-#define TCP_EVT_DUPACK_SENT_HANDLER(_tc, ...)				\
+#define TCP_EVT_DUPACK_SENT_HANDLER(_tc, _btcp, ...)			\
 {									\
   ELOG_TYPE_DECLARE (_e) =						\
   {									\
-    .format = "dack-tx: rcv_nxt %u rcv_wnd %u snd_nxt %u av_wnd %u snd_wnd %u",\
+    .format = "dack-tx: rcv_nxt %u seq %u rcv_wnd %u snd_nxt %u av_wnd %u",\
     .format_args = "i4i4i4i4i4",					\
   };									\
   DECLARE_ETD(_tc, _e, 5);						\
   ed->data[0] = _tc->rcv_nxt - _tc->irs;				\
-  ed->data[1] = _tc->rcv_wnd;						\
-  ed->data[2] = _tc->snd_nxt - _tc->iss;				\
-  ed->data[3] = tcp_available_snd_wnd(_tc);				\
-  ed->data[4] = _tc->snd_wnd;						\
+  ed->data[1] = _btcp.seq_number - _tc->irs;				\
+  ed->data[2] = _tc->rcv_wnd;						\
+  ed->data[3] = _tc->snd_nxt - _tc->iss;				\
+  ed->data[4] = tcp_available_snd_wnd(_tc);				\
 }
 
 #define TCP_EVT_DUPACK_RCVD_HANDLER(_tc, ...)				\
@@ -700,12 +707,75 @@
   ed->data[0] = _tc->snd_una - _tc->iss;				\
   ed->data[1] = _tc->snd_una_max - _tc->iss;				\
 }
+#define TCP_EVT_CC_SCOREBOARD_HANDLER(_tc, ...)				\
+{									\
+if (TCP_DEBUG_CC > 1 && _tc->sack_sb.last_sacked_bytes)			\
+  {									\
+    ELOG_TYPE_DECLARE (_e) =						\
+    {									\
+      .format = "sb1: holes %u lost %u sacked %u high %u highrxt %u",	\
+      .format_args = "i4i4i4i4i4",					\
+    };									\
+    DECLARE_ETD(_tc, _e, 5);						\
+    ed->data[0] = pool_elts(_tc->sack_sb.holes);			\
+    ed->data[1] = _tc->sack_sb.lost_bytes;				\
+    ed->data[2] = _tc->sack_sb.sacked_bytes;				\
+    ed->data[3] = _tc->sack_sb.high_sacked - _tc->iss;			\
+    ed->data[4] = _tc->sack_sb.high_rxt - _tc->iss;			\
+  }									\
+if (TCP_DEBUG_CC > 1 && _tc->sack_sb.last_sacked_bytes)			\
+  {									\
+    sack_scoreboard_hole_t *hole;					\
+    hole = scoreboard_first_hole (&_tc->sack_sb);			\
+    ELOG_TYPE_DECLARE (_e) =						\
+    {									\
+      .format = "sb2: first start: %u end %u last start %u end %u",	\
+      .format_args = "i4i4i4i4",					\
+    };									\
+    DECLARE_ETD(_tc, _e, 4);						\
+    ed->data[0] = hole ? hole->start - _tc->iss : 0;			\
+    ed->data[1] = hole ? hole->end - _tc->iss : 0;			\
+    hole = scoreboard_last_hole (&_tc->sack_sb);			\
+    ed->data[2] = hole ? hole->start - _tc->iss : 0;			\
+    ed->data[3] = hole ? hole->end - _tc->iss : 0;			\
+  }									\
+}
+#define TCP_EVT_CC_SACKS_HANDLER(_tc, ...)				\
+{									\
+if (TCP_DEBUG_CC > 1)							\
+  {									\
+    ELOG_TYPE_DECLARE (_e) =						\
+    {									\
+      .format = "sacks: blocks %u bytes %u",				\
+      .format_args = "i4i4",						\
+    };									\
+    DECLARE_ETD(_tc, _e, 2);						\
+    ed->data[0] = vec_len (_tc->snd_sacks);				\
+    ed->data[1] = tcp_sack_list_bytes (_tc);				\
+  }									\
+}
+#define TCP_EVT_CC_INPUT_HANDLER(_tc, _len, _written, ...)		\
+{									\
+  ELOG_TYPE_DECLARE (_e) =						\
+  {									\
+    .format = "cc input: len %u written %d rcv_nxt %u rcv_wnd(o) %d",	\
+    .format_args = "i4i4i4i4",						\
+  };									\
+  DECLARE_ETD(_tc, _e, 4);						\
+  ed->data[0] = _len;							\
+  ed->data[1] = _written;						\
+  ed->data[2] = _tc->rcv_nxt - _tc->irs;				\
+  ed->data[3] = _tc->rcv_wnd - (_tc->rcv_nxt - _tc->rcv_las);		\
+}
 #else
 #define TCP_EVT_CC_RTX_HANDLER(_tc, offset, n_bytes, ...)
-#define TCP_EVT_DUPACK_SENT_HANDLER(_tc, ...)
+#define TCP_EVT_DUPACK_SENT_HANDLER(_tc, _btcp, ...)
 #define TCP_EVT_DUPACK_RCVD_HANDLER(_tc, ...)
 #define TCP_EVT_CC_PACK_HANDLER(_tc, ...)
 #define TCP_EVT_CC_EVT_HANDLER(_tc, _sub_evt, ...)
+#define TCP_EVT_CC_SCOREBOARD_HANDLER(_tc, ...)
+#define TCP_EVT_CC_SACKS_HANDLER(_tc, ...)
+#define TCP_EVT_CC_INPUT_HANDLER(_tc, _len, _written, ...)
 #endif
 
 /*
diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c
index c864320..19ecc7d 100644
--- a/src/vnet/tcp/tcp_input.c
+++ b/src/vnet/tcp/tcp_input.c
@@ -275,6 +275,14 @@
 		      vlib_buffer_t * b0, tcp_header_t * th0,
 		      u32 * next0, u32 * error0)
 {
+  /* We could get a burst of RSTs interleaved with acks */
+  if (PREDICT_FALSE (tc0->state == TCP_STATE_CLOSED))
+    {
+      tcp_send_reset (tc0);
+      *error0 = TCP_ERROR_CONNECTION_CLOSED;
+      goto drop;
+    }
+
   if (PREDICT_FALSE (!tcp_ack (th0) && !tcp_rst (th0) && !tcp_syn (th0)))
     {
       *error0 = TCP_ERROR_SEGMENT_INVALID;
@@ -292,13 +300,7 @@
     {
       *error0 = TCP_ERROR_PAWS;
       if (CLIB_DEBUG > 2)
-	{
-	  clib_warning ("paws failed\n%U", format_tcp_connection, tc0, 2);
-	  clib_warning ("seq %u seq_end %u ack %u",
-			vnet_buffer (b0)->tcp.seq_number - tc0->irs,
-			vnet_buffer (b0)->tcp.seq_end - tc0->irs,
-			vnet_buffer (b0)->tcp.ack_number - tc0->iss);
-	}
+	clib_warning ("paws failed\n%U", format_tcp_connection, tc0, 2);
       TCP_EVT_DBG (TCP_EVT_PAWS_FAIL, tc0, vnet_buffer (b0)->tcp.seq_number,
 		   vnet_buffer (b0)->tcp.seq_end);
 
@@ -317,7 +319,7 @@
 	  if (!tcp_rst (th0))
 	    {
 	      tcp_make_ack (tc0, b0);
-	      TCP_EVT_DBG (TCP_EVT_DUPACK_SENT, tc0);
+	      TCP_EVT_DBG (TCP_EVT_DUPACK_SENT, tc0, vnet_buffer (b0)->tcp);
 	      goto error;
 	    }
 	}
@@ -329,7 +331,6 @@
 			       vnet_buffer (b0)->tcp.seq_end))
     {
       *error0 = TCP_ERROR_RCV_WND;
-
       /* If our window is 0 and the packet is in sequence, let it pass
        * through for ack processing. It should be dropped later. */
       if (!(tc0->rcv_wnd == 0
@@ -339,7 +340,7 @@
 	  if (!tcp_rst (th0))
 	    {
 	      tcp_make_ack (tc0, b0);
-	      TCP_EVT_DBG (TCP_EVT_DUPACK_SENT, tc0);
+	      TCP_EVT_DBG (TCP_EVT_DUPACK_SENT, tc0, vnet_buffer (b0)->tcp);
 	      goto error;
 	    }
 	  goto drop;
@@ -889,13 +890,14 @@
   scoreboard_update_bytes (tc, sb);
   sb->last_sacked_bytes = sb->sacked_bytes
     - (old_sacked_bytes - sb->last_bytes_delivered);
-  ASSERT (sb->last_sacked_bytes <= sb->sacked_bytes);
+  ASSERT (sb->last_sacked_bytes <= sb->sacked_bytes || tcp_in_recovery (tc));
   ASSERT (sb->sacked_bytes == 0
 	  || sb->sacked_bytes < tc->snd_una_max - seq_max (tc->snd_una, ack));
   ASSERT (sb->last_sacked_bytes + sb->lost_bytes <= tc->snd_una_max
 	  - seq_max (tc->snd_una, ack));
   ASSERT (sb->head == TCP_INVALID_SACK_HOLE_INDEX || tcp_in_recovery (tc)
 	  || sb->holes[sb->head].start == ack + sb->snd_una_adv);
+  TCP_EVT_DBG (TCP_EVT_CC_SCOREBOARD, tc);
 }
 
 /**
@@ -1063,11 +1065,18 @@
 {
   u32 rxt_delivered;
 
+  if (tcp_in_fastrecovery (tc) && tcp_opts_sack_permitted (&tc->rcv_opts))
+    {
+      if (tc->bytes_acked)
+	goto partial_ack;
+      tcp_fast_retransmit (tc);
+      return;
+    }
   /*
    * Duplicate ACK. Check if we should enter fast recovery, or if already in
    * it account for the bytes that left the network.
    */
-  if (is_dack && !tcp_in_recovery (tc))
+  else if (is_dack && !tcp_in_recovery (tc))
     {
       TCP_EVT_DBG (TCP_EVT_DUPACK_RCVD, tc, 1);
       ASSERT (tc->snd_una != tc->snd_una_max
@@ -1128,7 +1137,6 @@
 	    {
 	      tcp_fast_retransmit_no_sack (tc);
 	    }
-
 	  return;
 	}
       else if (!tc->bytes_acked
@@ -1237,6 +1245,16 @@
   /* If the ACK acks something not yet sent (SEG.ACK > SND.NXT) */
   if (PREDICT_FALSE (seq_gt (vnet_buffer (b)->tcp.ack_number, tc->snd_nxt)))
     {
+      /* When we entered recovery, we reset snd_nxt to snd_una. Seems peer
+       * still has the data so accept the ack */
+      if (tcp_in_recovery (tc)
+	  && seq_leq (vnet_buffer (b)->tcp.ack_number, tc->snd_congestion)
+	  && seq_geq (vnet_buffer (b)->tcp.ack_number, tc->snd_una))
+	{
+	  tc->snd_una_max = tc->snd_nxt = vnet_buffer (b)->tcp.ack_number;
+	  goto process_ack;
+	}
+
       /* If we have outstanding data and this is within the window, accept it,
        * probably retransmit has timed out. Otherwise ACK segment and then
        * drop it */
@@ -1264,9 +1282,7 @@
       TCP_EVT_DBG (TCP_EVT_ACK_RCV_ERR, tc, 1,
 		   vnet_buffer (b)->tcp.ack_number);
       if (tcp_in_fastrecovery (tc) && tc->rcv_dupacks == TCP_DUPACK_THRESHOLD)
-	{
-	  tcp_cc_handle_event (tc, 1);
-	}
+	tcp_cc_handle_event (tc, 1);
       /* Don't drop yet */
       return 0;
     }
@@ -1274,7 +1290,7 @@
   /*
    * Looks okay, process feedback
    */
-
+process_ack:
   if (tcp_opts_sack_permitted (&tc->rcv_opts))
     tcp_rcv_sacks (tc, vnet_buffer (b)->tcp.ack_number);
 
@@ -1390,6 +1406,15 @@
   ASSERT (tcp_sack_vector_is_sane (tc->snd_sacks));
 }
 
+u32
+tcp_sack_list_bytes (tcp_connection_t * tc)
+{
+  u32 bytes = 0, i;
+  for (i = 0; i < vec_len (tc->snd_sacks); i++)
+    bytes += tc->snd_sacks[i].end - tc->snd_sacks[i].start;
+  return bytes;
+}
+
 /** Enqueue data for delivery to application */
 always_inline int
 tcp_session_enqueue_data (tcp_connection_t * tc, vlib_buffer_t * b,
@@ -1416,6 +1441,7 @@
 
       /* Send ACK confirming the update */
       tc->flags |= TCP_CONN_SNDACK;
+      TCP_EVT_DBG (TCP_EVT_CC_INPUT, tc, data_len, written);
     }
   else if (written > 0)
     {
@@ -1488,6 +1514,7 @@
 	  end = start + ooo_segment_length (s0->server_rx_fifo, newest);
 	  tcp_update_sack_list (tc, start, end);
 	  svm_fifo_newest_ooo_segment_reset (s0->server_rx_fifo);
+	  TCP_EVT_DBG (TCP_EVT_CC_SACKS, tc);
 	}
     }
 
@@ -1508,7 +1535,7 @@
       /* constrained to send ack */
       || (tc->flags & TCP_CONN_SNDACK) != 0
       /* we're almost out of tx wnd */
-      || tcp_available_snd_space (tc) < 4 * tc->snd_mss)
+      || tcp_available_cc_snd_space (tc) < 4 * tc->snd_mss)
     return 0;
 
   return 1;
@@ -1592,7 +1619,7 @@
       *next0 = tcp_next_output (tc->c_is_ip4);
       tcp_make_ack (tc, b);
       vnet_buffer (b)->tcp.flags = TCP_BUF_FLAG_DUPACK;
-      TCP_EVT_DBG (TCP_EVT_DUPACK_SENT, tc);
+      TCP_EVT_DBG (TCP_EVT_DUPACK_SENT, tc, vnet_buffer (b)->tcp);
       goto done;
     }
 
@@ -1773,9 +1800,7 @@
 						   &error0)))
 	    {
 	      tcp_maybe_inc_err_counter (err_counters, error0);
-	      TCP_EVT_DBG (TCP_EVT_SEG_INVALID, tc0,
-			   vnet_buffer (b0)->tcp.seq_number,
-			   vnet_buffer (b0)->tcp.seq_end);
+	      TCP_EVT_DBG (TCP_EVT_SEG_INVALID, tc0, vnet_buffer (b0)->tcp);
 	      goto done;
 	    }
 
diff --git a/src/vnet/tcp/tcp_newreno.c b/src/vnet/tcp/tcp_newreno.c
index 0f43d21..a9ec58c 100644
--- a/src/vnet/tcp/tcp_newreno.c
+++ b/src/vnet/tcp/tcp_newreno.c
@@ -41,8 +41,8 @@
       if (tc->cwnd_acc_bytes >= tc->cwnd)
 	{
 	  u32 inc = tc->cwnd_acc_bytes / tc->cwnd;
-	  tc->cwnd += inc * tc->snd_mss;
 	  tc->cwnd_acc_bytes -= inc * tc->cwnd;
+	  tc->cwnd += inc * tc->snd_mss;
 	}
       tc->cwnd = clib_min (tc->cwnd,
 			   transport_tx_fifo_size (&tc->connection));
diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c
index 2745065..a036287 100644
--- a/src/vnet/tcp/tcp_output.c
+++ b/src/vnet/tcp/tcp_output.c
@@ -389,6 +389,7 @@
     {
     case TCP_STATE_ESTABLISHED:
     case TCP_STATE_FIN_WAIT_1:
+    case TCP_STATE_CLOSED:
       return tcp_make_established_options (tc, opts);
     case TCP_STATE_SYN_RCVD:
       return tcp_make_synack_options (tc, opts);
@@ -1337,8 +1338,9 @@
  * Reset congestion control, switch cwnd to loss window and try again.
  */
 static void
-tcp_rtx_timeout_cc (tcp_connection_t * tc)
+tcp_rxt_timeout_cc (tcp_connection_t * tc)
 {
+  TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 6);
   tc->prev_ssthresh = tc->ssthresh;
   tc->prev_cwnd = tc->cwnd;
 
@@ -1383,6 +1385,8 @@
       tc->timers[TCP_TIMER_RETRANSMIT] = TCP_TIMER_HANDLE_INVALID;
     }
 
+  TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 1);
+
   if (tc->state >= TCP_STATE_ESTABLISHED)
     {
       /* Lost FIN, retransmit and return */
@@ -1414,13 +1418,11 @@
 
       /* First retransmit timeout */
       if (tc->rto_boff == 1)
-	tcp_rtx_timeout_cc (tc);
+	tcp_rxt_timeout_cc (tc);
 
       tc->snd_una_max = tc->snd_nxt = tc->snd_una;
       tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
 
-      TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 1);
-
       /* Send one segment. Note that n_bytes may be zero due to buffer shortfall  */
       n_bytes = tcp_prepare_retransmit_segment (tc, 0, tc->snd_mss, &b);
 
@@ -1627,7 +1629,7 @@
 tcp_fast_retransmit_sack (tcp_connection_t * tc)
 {
   vlib_main_t *vm = vlib_get_main ();
-  u32 n_written = 0, offset, max_bytes;
+  u32 n_written = 0, offset, max_bytes, n_segs = 0;
   vlib_buffer_t *b = 0;
   sack_scoreboard_hole_t *hole;
   sack_scoreboard_t *sb;
@@ -1636,14 +1638,17 @@
   u8 snd_limited = 0, can_rescue = 0;
 
   ASSERT (tcp_in_fastrecovery (tc));
-  TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 0);
 
   old_snd_nxt = tc->snd_nxt;
   sb = &tc->sack_sb;
-  snd_space = tcp_available_snd_space (tc);
+  snd_space = tcp_available_cc_snd_space (tc);
 
+  if (snd_space < tc->snd_mss)
+    goto done;
+
+  TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 0);
   hole = scoreboard_get_hole (sb, sb->cur_rxt_hole);
-  while (hole && snd_space > 0)
+  while (hole && snd_space > 0 && n_segs++ < VLIB_FRAME_SIZE)
     {
       hole = scoreboard_next_rxt_hole (sb, hole,
 				       tcp_fastrecovery_sent_1_smss (tc),
@@ -1717,7 +1722,7 @@
   /* Start resending from first un-acked segment */
   old_snd_nxt = tc->snd_nxt;
   tc->snd_nxt = tc->snd_una;
-  snd_space = tcp_available_snd_space (tc);
+  snd_space = tcp_available_cc_snd_space (tc);
 
   while (snd_space > 0)
     {
@@ -1743,8 +1748,7 @@
 void
 tcp_fast_retransmit (tcp_connection_t * tc)
 {
-  if (tcp_opts_sack_permitted (&tc->rcv_opts)
-      && scoreboard_first_hole (&tc->sack_sb))
+  if (tcp_opts_sack_permitted (&tc->rcv_opts))
     tcp_fast_retransmit_sack (tc);
   else
     tcp_fast_retransmit_no_sack (tc);