tcp: consume incoming buffers instead of reusing

Instead of reusing buffers for acking, consume all buffers and program
output for (dup)ack generation. This implicitly fixes the drop counters
that were artificially inflated by both data and feedback traffic.

Moreover, the patch also significantly reduces the ack traffic as we now
only generate an ack per frame, unless duplicate acks need to be sent.

Because of the reduced feedback traffic, a sender's rx path and a
receiver's tx path are now significantly less loaded. In particular, a
sender can overwhelm a 40Gbps NIC and generate tx drop bursts for low
rtts. Consequently, tx pacing is now enforced by default.

Change-Id: I619c29a8945bf26c093f8f9e197e3c6d5d43868e
Signed-off-by: Florin Coras <fcoras@cisco.com>
diff --git a/src/vnet/session/transport.c b/src/vnet/session/transport.c
index 42149eb..06c541a 100644
--- a/src/vnet/session/transport.c
+++ b/src/vnet/session/transport.c
@@ -47,7 +47,8 @@
  */
 static double transport_pacer_period;
 
-#define TRANSPORT_PACER_MIN_MSS 1460
+#define TRANSPORT_PACER_MIN_MSS 	1460
+#define TRANSPORT_PACER_MIN_BURST 	TRANSPORT_PACER_MIN_MSS
 
 u8 *
 format_transport_proto (u8 * s, va_list * args)
@@ -518,7 +519,8 @@
 static inline void
 spacer_update_max_burst_size (spacer_t * pacer, u32 max_burst_bytes)
 {
-  pacer->max_burst_size = clib_max (max_burst_bytes, TRANSPORT_PACER_MIN_MSS);
+  pacer->max_burst_size = clib_max (max_burst_bytes,
+				    TRANSPORT_PACER_MIN_BURST);
 }
 
 static inline void
@@ -561,9 +563,8 @@
 transport_connection_tx_pacer_update (transport_connection_t * tc,
 				      u64 bytes_per_sec)
 {
-  u32 burst_size;
-
-  burst_size = bytes_per_sec * transport_dispatch_period (tc->thread_index);
+  f64 dispatch_period = transport_dispatch_period (tc->thread_index);
+  u32 burst_size = 1.1 * bytes_per_sec * dispatch_period;
   spacer_set_pace_rate (&tc->pacer, bytes_per_sec);
   spacer_update_max_burst_size (&tc->pacer, burst_size);
 }