tcp: loss recovery improvements/fixes

- fix newreno cwnd computation
- reset snd_una_max on entering recovery
- accept acks beyond snd_nxt but less than snd_congestion when in
recovery
- avoid entering fast recovery multiple times when using sacks
- avoid as much as possible sending small segments when doing fast
retransmit
- more event logging

Change-Id: I19dd151d7704e39d4eae06de3a26f5e124875366
Signed-off-by: Florin Coras <fcoras@cisco.com>
diff --git a/src/vnet/tcp/tcp_debug.h b/src/vnet/tcp/tcp_debug.h
index 4af4f2e..a52efe0 100755
--- a/src/vnet/tcp/tcp_debug.h
+++ b/src/vnet/tcp/tcp_debug.h
@@ -56,6 +56,9 @@
   _(CC_PACK, "cc partial ack")		\
   _(CC_STAT, "cc stats")		\
   _(CC_RTO_STAT, "cc rto stats")	\
+  _(CC_SCOREBOARD, "scoreboard stats")	\
+  _(CC_SACKS, "snd sacks stats")	\
+  _(CC_INPUT, "ooo data delivered")	\
   _(SEG_INVALID, "invalid segment")	\
   _(PAWS_FAIL, "failed paws check")	\
   _(ACK_RCV_ERR, "invalid ack")		\
@@ -192,7 +195,7 @@
   ed->data[0] = _tc->c_c_index;						\
 }
 
-#define TCP_EVT_SYN_RCVD_HANDLER(_tc,_init, ...)				\
+#define TCP_EVT_SYN_RCVD_HANDLER(_tc,_init, ...)			\
 {									\
   if (_init)								\
     TCP_EVT_INIT_HANDLER(_tc, 0);					\
@@ -277,9 +280,9 @@
   };									\
   DECLARE_ETD(_tc, _e, 4);						\
   ed->data[0] = _tc->iss;						\
-  ed->data[1] = _tc->snd_una - _tc->iss;					\
+  ed->data[1] = _tc->snd_una - _tc->iss;				\
   ed->data[2] = _tc->snd_una_max - _tc->iss;				\
-  ed->data[3] = _tc->snd_nxt - _tc->iss;					\
+  ed->data[3] = _tc->snd_nxt - _tc->iss;				\
   TCP_EVT_STATE_CHANGE_HANDLER(_tc);					\
 }
 
@@ -288,14 +291,14 @@
   ELOG_TYPE_DECLARE (_e) =						\
   {									\
     .format = "synack-tx: iss %u irs %u snd_una %u snd_nxt %u rcv_nxt %u",\
-    .format_args = "i4i4i4i4i4",						\
+    .format_args = "i4i4i4i4i4",					\
   };									\
   DECLARE_ETD(_tc, _e, 5);						\
   ed->data[0] = _tc->iss;						\
   ed->data[1] = _tc->irs;						\
-  ed->data[2] = _tc->snd_una - _tc->iss;					\
-  ed->data[3] = _tc->snd_nxt - _tc->iss;					\
-  ed->data[4] = _tc->rcv_nxt - _tc->irs;					\
+  ed->data[2] = _tc->snd_una - _tc->iss;				\
+  ed->data[3] = _tc->snd_nxt - _tc->iss;				\
+  ed->data[4] = _tc->rcv_nxt - _tc->irs;				\
 }
 
 #define TCP_EVT_SYNACK_RCVD_HANDLER(_tc, ...)				\
@@ -303,14 +306,14 @@
   ELOG_TYPE_DECLARE (_e) =						\
   {									\
     .format = "synack-rx: iss %u irs %u snd_una %u snd_nxt %u rcv_nxt %u",\
-    .format_args = "i4i4i4i4i4",						\
+    .format_args = "i4i4i4i4i4",					\
   };									\
   DECLARE_ETD(_tc, _e, 5);						\
   ed->data[0] = _tc->iss;						\
   ed->data[1] = _tc->irs;						\
-  ed->data[2] = _tc->snd_una - _tc->iss;					\
-  ed->data[3] = _tc->snd_nxt - _tc->iss;					\
-  ed->data[4] = _tc->rcv_nxt - _tc->irs;					\
+  ed->data[2] = _tc->snd_una - _tc->iss;				\
+  ed->data[3] = _tc->snd_nxt - _tc->iss;				\
+  ed->data[4] = _tc->rcv_nxt - _tc->irs;				\
   TCP_EVT_STATE_CHANGE_HANDLER(_tc);					\
 }
 
@@ -371,7 +374,7 @@
   ELOG_TYPE_DECLARE (_e) =						\
   {									\
     .format = "%s-rxt: iss %u irs %u snd_nxt %u rcv_nxt %u",		\
-    .format_args = "t4i4i4i4i4",						\
+    .format_args = "t4i4i4i4i4",					\
     .n_enum_strings = 2,						\
     .enum_strings = {                                           	\
 	"syn",	                                             		\
@@ -382,10 +385,9 @@
   ed->data[0] = _type;							\
   ed->data[1] = _tc->iss;						\
   ed->data[2] = _tc->irs;						\
-  ed->data[3] = _tc->snd_nxt - _tc->iss;					\
-  ed->data[4] = _tc->rcv_nxt - _tc->irs;					\
+  ed->data[3] = _tc->snd_nxt - _tc->iss;				\
+  ed->data[4] = _tc->rcv_nxt - _tc->irs;				\
 }
-
 #else
 #define TCP_EVT_SYN_SENT_HANDLER(_tc, ...)
 #define TCP_EVT_SYNACK_SENT_HANDLER(_tc, ...)
@@ -399,6 +401,81 @@
 #endif
 
 #if TCP_DEBUG_SM > 1
+#define TCP_EVT_SEG_INVALID_HANDLER(_tc, _btcp, ...)			\
+{									\
+  ELOG_TYPE_DECLARE (_e) =						\
+  {									\
+    .format = "seg-inv: seq %u end %u rcv_las %u rcv_nxt %u rcv_wnd %u",\
+    .format_args = "i4i4i4i4i4",					\
+  };									\
+  DECLARE_ETD(_tc, _e, 5);						\
+  ed->data[0] = _btcp.seq_number - _tc->irs;				\
+  ed->data[1] = _btcp.seq_end - _tc->irs;				\
+  ed->data[2] = _tc->rcv_las - _tc->irs;				\
+  ed->data[3] = _tc->rcv_nxt - _tc->irs;				\
+  ed->data[4] = _tc->rcv_wnd;						\
+}
+
+#define TCP_EVT_PAWS_FAIL_HANDLER(_tc, _seq, _end, ...)			\
+{									\
+  ELOG_TYPE_DECLARE (_e) =						\
+  {									\
+    .format = "paws-err: seq %u end %u tsval %u tsval_recent %u",	\
+    .format_args = "i4i4i4i4",						\
+  };									\
+  DECLARE_ETD(_tc, _e, 4);						\
+  ed->data[0] = _seq - _tc->irs;					\
+  ed->data[1] = _end - _tc->irs;					\
+  ed->data[2] = _tc->rcv_opts.tsval;					\
+  ed->data[3] = _tc->tsval_recent;					\
+}
+
+#define TCP_EVT_ACK_RCV_ERR_HANDLER(_tc, _type, _ack, ...)		\
+{									\
+  ELOG_TYPE_DECLARE (_e) =						\
+  {									\
+    .format = "ack-err: %s ack %u snd_una %u snd_nxt %u una_max %u",	\
+    .format_args = "t4i4i4i4i4",					\
+    .n_enum_strings = 3,						\
+    .enum_strings = {                                           	\
+      "invalid",                                                 	\
+      "old",                                                 		\
+      "future",								\
+    }, 									\
+  };									\
+  DECLARE_ETD(_tc, _e, 5);						\
+  ed->data[0] = _type;							\
+  ed->data[1] = _ack - _tc->iss;					\
+  ed->data[2] = _tc->snd_una - _tc->iss;				\
+  ed->data[3] = _tc->snd_nxt - _tc->iss;				\
+  ed->data[4] = _tc->snd_una_max - _tc->iss;				\
+}
+
+#define TCP_EVT_RCV_WND_SHRUNK_HANDLER(_tc, _obs, _av, ...)		\
+{									\
+if (_av > 0) 								\
+{									\
+  ELOG_TYPE_DECLARE (_e) =						\
+  {									\
+    .format = "huh?: rcv_wnd %u obsd %u av %u rcv_nxt %u rcv_las %u",	\
+    .format_args = "i4i4i4i4i4",					\
+  };									\
+  DECLARE_ETD(_tc, _e, 5);						\
+  ed->data[0] = _tc->rcv_wnd;						\
+  ed->data[1] = _obs;							\
+  ed->data[2] = _av;							\
+  ed->data[3] = _tc->rcv_nxt - _tc->irs;				\
+  ed->data[4] = _tc->rcv_las - _tc->irs;				\
+}									\
+}
+#else
+#define TCP_EVT_SEG_INVALID_HANDLER(_tc, _btcp, ...)
+#define TCP_EVT_PAWS_FAIL_HANDLER(_tc, _seq, _end, ...)
+#define TCP_EVT_ACK_RCV_ERR_HANDLER(_tc, _type, _ack, ...)
+#define TCP_EVT_RCV_WND_SHRUNK_HANDLER(_tc, _obs, _av, ...)
+#endif
+
+#if TCP_DEBUG_SM > 2
 
 #define TCP_EVT_ACK_SENT_HANDLER(_tc, ...)				\
 {									\
@@ -505,90 +582,18 @@
 		    _tc_index);						\
     }									\
 }
-
-#define TCP_EVT_SEG_INVALID_HANDLER(_tc, _seq, _end, ...)		\
-{									\
-  ELOG_TYPE_DECLARE (_e) =						\
-  {									\
-    .format = "seg-inv: seq %u end %u rcv_las %u rcv_nxt %u rcv_wnd %u",\
-    .format_args = "i4i4i4i4i4",					\
-  };									\
-  DECLARE_ETD(_tc, _e, 5);						\
-  ed->data[0] = _seq - _tc->irs;					\
-  ed->data[1] = _end - _tc->irs;					\
-  ed->data[2] = _tc->rcv_las - _tc->irs;				\
-  ed->data[3] = _tc->rcv_nxt - _tc->irs;				\
-  ed->data[4] = _tc->rcv_wnd;						\
-}
-
-#define TCP_EVT_PAWS_FAIL_HANDLER(_tc, _seq, _end, ...)			\
-{									\
-  ELOG_TYPE_DECLARE (_e) =						\
-  {									\
-    .format = "paws-err: seq %u end %u tsval %u tsval_recent %u",	\
-    .format_args = "i4i4i4i4",						\
-  };									\
-  DECLARE_ETD(_tc, _e, 4);						\
-  ed->data[0] = _seq - _tc->irs;					\
-  ed->data[1] = _end - _tc->irs;					\
-  ed->data[2] = _tc->rcv_opts.tsval;					\
-  ed->data[3] = _tc->tsval_recent;					\
-}
-
-#define TCP_EVT_ACK_RCV_ERR_HANDLER(_tc, _type, _ack, ...)		\
-{									\
-  ELOG_TYPE_DECLARE (_e) =						\
-  {									\
-    .format = "ack-err: %s ack %u snd_una %u snd_nxt %u una_max %u",	\
-    .format_args = "t4i4i4i4i4",					\
-    .n_enum_strings = 3,						\
-    .enum_strings = {                                           	\
-      "invalid",                                                 	\
-      "old",                                                 		\
-      "future",								\
-    }, 									\
-  };									\
-  DECLARE_ETD(_tc, _e, 5);						\
-  ed->data[0] = _type;							\
-  ed->data[1] = _ack - _tc->iss;					\
-  ed->data[2] = _tc->snd_una - _tc->iss;				\
-  ed->data[3] = _tc->snd_nxt - _tc->iss;				\
-  ed->data[4] = _tc->snd_una_max - _tc->iss;				\
-}
-
-#define TCP_EVT_RCV_WND_SHRUNK_HANDLER(_tc, _obs, _av, ...)		\
-{									\
-if (_av > 0) 								\
-{									\
-  ELOG_TYPE_DECLARE (_e) =						\
-  {									\
-    .format = "huh?: rcv_wnd %u obsd %u av %u rcv_nxt %u rcv_las %u",	\
-    .format_args = "i4i4i4i4i4",					\
-  };									\
-  DECLARE_ETD(_tc, _e, 5);						\
-  ed->data[0] = _tc->rcv_wnd;						\
-  ed->data[1] = _obs;							\
-  ed->data[2] = _av;							\
-  ed->data[3] = _tc->rcv_nxt - _tc->irs;				\
-  ed->data[4] = _tc->rcv_las - _tc->irs;				\
-}									\
-}
 #else
 #define TCP_EVT_ACK_SENT_HANDLER(_tc, ...)
 #define TCP_EVT_ACK_RCVD_HANDLER(_tc, ...)
 #define TCP_EVT_PKTIZE_HANDLER(_tc, ...)
 #define TCP_EVT_INPUT_HANDLER(_tc, _type, _len, _written, ...)
 #define TCP_EVT_TIMER_POP_HANDLER(_tc_index, _timer_id, ...)
-#define TCP_EVT_SEG_INVALID_HANDLER(_tc, _seq, _end, ...)
-#define TCP_EVT_PAWS_FAIL_HANDLER(_tc, _seq, _end, ...)
-#define TCP_EVT_ACK_RCV_ERR_HANDLER(_tc, _type, _ack, ...)
-#define TCP_EVT_RCV_WND_SHRUNK_HANDLER(_tc, _obs, _av, ...)
 #endif
 
 /*
  * State machine verbose
  */
-#if TCP_DEBUG_SM > 2
+#if TCP_DEBUG_SM > 3
 #define TCP_EVT_SND_WND_HANDLER(_tc, ...)				\
 {									\
   ELOG_TYPE_DECLARE (_e) =						\
@@ -626,9 +631,9 @@
 {									\
   ELOG_TYPE_DECLARE (_e) =						\
   {									\
-    .format = "cc: %s snd_space %u snd_cong %u rxt_bytes %u",		\
-    .format_args = "t4i4i4i4",						\
-    .n_enum_strings = 6,						\
+    .format = "cc: %s snd_space %u snd_una %u out %u flight %u",	\
+    .format_args = "t4i4i4i4i4",					\
+    .n_enum_strings = 7,						\
     .enum_strings = {                                           	\
       "fast-rxt",	                                             	\
       "rxt-timeout",                                                 	\
@@ -636,13 +641,15 @@
       "recovered",							\
       "congestion",							\
       "undo",								\
+      "recovery",							\
     },  								\
   };									\
-  DECLARE_ETD(_tc, _e, 4);						\
+  DECLARE_ETD(_tc, _e, 5);						\
   ed->data[0] = _sub_evt;						\
-  ed->data[1] = tcp_available_snd_space (_tc);				\
-  ed->data[2] = _tc->snd_congestion - _tc->iss;				\
-  ed->data[3] = _tc->snd_rxt_bytes;					\
+  ed->data[1] = tcp_available_cc_snd_space (_tc);			\
+  ed->data[2] = _tc->snd_una - _tc->iss;				\
+  ed->data[3] = tcp_bytes_out(_tc);					\
+  ed->data[4] = tcp_flight_size (_tc);					\
 }
 
 #define TCP_EVT_CC_RTX_HANDLER(_tc, offset, n_bytes, ...)		\
@@ -659,19 +666,19 @@
   ed->data[3] = _tc->snd_rxt_bytes;					\
 }
 
-#define TCP_EVT_DUPACK_SENT_HANDLER(_tc, ...)				\
+#define TCP_EVT_DUPACK_SENT_HANDLER(_tc, _btcp, ...)			\
 {									\
   ELOG_TYPE_DECLARE (_e) =						\
   {									\
-    .format = "dack-tx: rcv_nxt %u rcv_wnd %u snd_nxt %u av_wnd %u snd_wnd %u",\
+    .format = "dack-tx: rcv_nxt %u seq %u rcv_wnd %u snd_nxt %u av_wnd %u",\
     .format_args = "i4i4i4i4i4",					\
   };									\
   DECLARE_ETD(_tc, _e, 5);						\
   ed->data[0] = _tc->rcv_nxt - _tc->irs;				\
-  ed->data[1] = _tc->rcv_wnd;						\
-  ed->data[2] = _tc->snd_nxt - _tc->iss;				\
-  ed->data[3] = tcp_available_snd_wnd(_tc);				\
-  ed->data[4] = _tc->snd_wnd;						\
+  ed->data[1] = _btcp.seq_number - _tc->irs;				\
+  ed->data[2] = _tc->rcv_wnd;						\
+  ed->data[3] = _tc->snd_nxt - _tc->iss;				\
+  ed->data[4] = tcp_available_snd_wnd(_tc);				\
 }
 
 #define TCP_EVT_DUPACK_RCVD_HANDLER(_tc, ...)				\
@@ -700,12 +707,75 @@
   ed->data[0] = _tc->snd_una - _tc->iss;				\
   ed->data[1] = _tc->snd_una_max - _tc->iss;				\
 }
+#define TCP_EVT_CC_SCOREBOARD_HANDLER(_tc, ...)				\
+{									\
+if (TCP_DEBUG_CC > 1 && _tc->sack_sb.last_sacked_bytes)			\
+  {									\
+    ELOG_TYPE_DECLARE (_e) =						\
+    {									\
+      .format = "sb1: holes %u lost %u sacked %u high %u highrxt %u",	\
+      .format_args = "i4i4i4i4i4",					\
+    };									\
+    DECLARE_ETD(_tc, _e, 5);						\
+    ed->data[0] = pool_elts(_tc->sack_sb.holes);			\
+    ed->data[1] = _tc->sack_sb.lost_bytes;				\
+    ed->data[2] = _tc->sack_sb.sacked_bytes;				\
+    ed->data[3] = _tc->sack_sb.high_sacked - _tc->iss;			\
+    ed->data[4] = _tc->sack_sb.high_rxt - _tc->iss;			\
+  }									\
+if (TCP_DEBUG_CC > 1 && _tc->sack_sb.last_sacked_bytes)			\
+  {									\
+    sack_scoreboard_hole_t *hole;					\
+    hole = scoreboard_first_hole (&_tc->sack_sb);			\
+    ELOG_TYPE_DECLARE (_e) =						\
+    {									\
+      .format = "sb2: first start: %u end %u last start %u end %u",	\
+      .format_args = "i4i4i4i4",					\
+    };									\
+    DECLARE_ETD(_tc, _e, 4);						\
+    ed->data[0] = hole ? hole->start - _tc->iss : 0;			\
+    ed->data[1] = hole ? hole->end - _tc->iss : 0;			\
+    hole = scoreboard_last_hole (&_tc->sack_sb);			\
+    ed->data[2] = hole ? hole->start - _tc->iss : 0;			\
+    ed->data[3] = hole ? hole->end - _tc->iss : 0;			\
+  }									\
+}
+#define TCP_EVT_CC_SACKS_HANDLER(_tc, ...)				\
+{									\
+if (TCP_DEBUG_CC > 1)							\
+  {									\
+    ELOG_TYPE_DECLARE (_e) =						\
+    {									\
+      .format = "sacks: blocks %u bytes %u",				\
+      .format_args = "i4i4",						\
+    };									\
+    DECLARE_ETD(_tc, _e, 2);						\
+    ed->data[0] = vec_len (_tc->snd_sacks);				\
+    ed->data[1] = tcp_sack_list_bytes (_tc);				\
+  }									\
+}
+#define TCP_EVT_CC_INPUT_HANDLER(_tc, _len, _written, ...)		\
+{									\
+  ELOG_TYPE_DECLARE (_e) =						\
+  {									\
+    .format = "cc input: len %u written %d rcv_nxt %u rcv_wnd(o) %d",	\
+    .format_args = "i4i4i4i4",						\
+  };									\
+  DECLARE_ETD(_tc, _e, 4);						\
+  ed->data[0] = _len;							\
+  ed->data[1] = _written;						\
+  ed->data[2] = _tc->rcv_nxt - _tc->irs;				\
+  ed->data[3] = _tc->rcv_wnd - (_tc->rcv_nxt - _tc->rcv_las);		\
+}
 #else
 #define TCP_EVT_CC_RTX_HANDLER(_tc, offset, n_bytes, ...)
-#define TCP_EVT_DUPACK_SENT_HANDLER(_tc, ...)
+#define TCP_EVT_DUPACK_SENT_HANDLER(_tc, _btcp, ...)
 #define TCP_EVT_DUPACK_RCVD_HANDLER(_tc, ...)
 #define TCP_EVT_CC_PACK_HANDLER(_tc, ...)
 #define TCP_EVT_CC_EVT_HANDLER(_tc, _sub_evt, ...)
+#define TCP_EVT_CC_SCOREBOARD_HANDLER(_tc, ...)
+#define TCP_EVT_CC_SACKS_HANDLER(_tc, ...)
+#define TCP_EVT_CC_INPUT_HANDLER(_tc, _len, _written, ...)
 #endif
 
 /*